001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.math3.ml.clustering.evaluation;
019
020import java.util.List;
021
022import org.apache.commons.math3.ml.clustering.Cluster;
023import org.apache.commons.math3.ml.clustering.Clusterable;
024import org.apache.commons.math3.ml.distance.DistanceMeasure;
025import org.apache.commons.math3.stat.descriptive.moment.Variance;
026
027/**
028 * Computes the sum of intra-cluster distance variances according to the formula:
029 * <pre>
030 * \( score = \sum\limits_{i=1}^n \sigma_i^2 \)
031 * </pre>
032 * where n is the number of clusters and \( \sigma_i^2 \) is the variance of
033 * intra-cluster distances of cluster \( c_i \).
034 *
035 * @param <T> the type of the clustered points
036 * @since 3.3
037 */
038public class SumOfClusterVariances<T extends Clusterable> extends ClusterEvaluator<T> {
039
040    /**
041     *
042     * @param measure the distance measure to use
043     */
044    public SumOfClusterVariances(final DistanceMeasure measure) {
045        super(measure);
046    }
047
048    @Override
049    public double score(final List<? extends Cluster<T>> clusters) {
050        double varianceSum = 0.0;
051        for (final Cluster<T> cluster : clusters) {
052            if (!cluster.getPoints().isEmpty()) {
053
054                final Clusterable center = centroidOf(cluster);
055
056                // compute the distance variance of the current cluster
057                final Variance stat = new Variance();
058                for (final T point : cluster.getPoints()) {
059                    stat.increment(distance(point, center));
060                }
061                varianceSum += stat.getResult();
062
063            }
064        }
065        return varianceSum;
066    }
067
068}