001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.math4.legacy.ml.clustering.evaluation;
019
020import java.util.List;
021
022import org.apache.commons.math4.legacy.ml.clustering.Cluster;
023import org.apache.commons.math4.legacy.ml.clustering.Clusterable;
024import org.apache.commons.math4.legacy.ml.clustering.ClusterEvaluator;
025import org.apache.commons.math4.legacy.ml.distance.DistanceMeasure;
026import org.apache.commons.math4.legacy.stat.descriptive.moment.Variance;
027
028/**
029 * Computes the sum of intra-cluster distance variances according to the formula:
030 * <pre>
031 * \( score = \sum\limits_{i=1}^n \sigma_i^2 \)
032 * </pre>
033 * where n is the number of clusters and \( \sigma_i^2 \) is the variance of
034 * intra-cluster distances of cluster \( c_i \).
035 *
036 * @since 3.3
037 */
038public class SumOfClusterVariances implements ClusterEvaluator {
039    /** The distance measure to use when evaluating the cluster. */
040    private final DistanceMeasure measure;
041
042    /**
043     * @param measure Distance measure.
044     */
045    public SumOfClusterVariances(final DistanceMeasure measure) {
046        this.measure = measure;
047    }
048
049    /** {@inheritDoc} */
050    @Override
051    public double score(List<? extends Cluster<? extends Clusterable>> clusters) {
052        double varianceSum = 0.0;
053        for (final Cluster<? extends Clusterable> cluster : clusters) {
054            if (!cluster.getPoints().isEmpty()) {
055
056                final Clusterable center = cluster.centroid();
057
058                // compute the distance variance of the current cluster
059                final Variance stat = new Variance();
060                for (final Clusterable point : cluster.getPoints()) {
061                    stat.increment(distance(point, center));
062                }
063
064                varianceSum += stat.getResult();
065            }
066        }
067        return varianceSum;
068    }
069
070    /** {@inheritDoc} */
071    @Override
072    public boolean isBetterScore(double a,
073                                 double b) {
074        return a < b;
075    }
076
077    /**
078     * Calculates the distance between two {@link Clusterable} instances
079     * with the configured {@link DistanceMeasure}.
080     *
081     * @param p1 the first clusterable
082     * @param p2 the second clusterable
083     * @return the distance between the two clusterables
084     */
085    private double distance(final Clusterable p1, final Clusterable p2) {
086        return measure.compute(p1.getPoint(), p2.getPoint());
087    }
088}