001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.math3.ml.clustering.evaluation; 019 020import java.util.List; 021 022import org.apache.commons.math3.ml.clustering.Cluster; 023import org.apache.commons.math3.ml.clustering.Clusterable; 024import org.apache.commons.math3.ml.distance.DistanceMeasure; 025import org.apache.commons.math3.stat.descriptive.moment.Variance; 026 027/** 028 * Computes the sum of intra-cluster distance variances according to the formula: 029 * <pre> 030 * \( score = \sum\limits_{i=1}^n \sigma_i^2 \) 031 * </pre> 032 * where n is the number of clusters and \( \sigma_i^2 \) is the variance of 033 * intra-cluster distances of cluster \( c_i \). 034 * 035 * @param <T> the type of the clustered points 036 * @since 3.3 037 */ 038public class SumOfClusterVariances<T extends Clusterable> extends ClusterEvaluator<T> { 039 040 /** 041 * 042 * @param measure the distance measure to use 043 */ 044 public SumOfClusterVariances(final DistanceMeasure measure) { 045 super(measure); 046 } 047 048 /** {@inheritDoc} */ 049 @Override 050 public double score(final List<? extends Cluster<T>> clusters) { 051 double varianceSum = 0.0; 052 for (final Cluster<T> cluster : clusters) { 053 if (!cluster.getPoints().isEmpty()) { 054 055 final Clusterable center = centroidOf(cluster); 056 057 // compute the distance variance of the current cluster 058 final Variance stat = new Variance(); 059 for (final T point : cluster.getPoints()) { 060 stat.increment(distance(point, center)); 061 } 062 varianceSum += stat.getResult(); 063 064 } 065 } 066 return varianceSum; 067 } 068 069}