SumOfClusterVariances.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */

  17. package org.apache.commons.math4.legacy.ml.clustering.evaluation;

  18. import java.util.List;

  19. import org.apache.commons.math4.legacy.ml.clustering.Cluster;
  20. import org.apache.commons.math4.legacy.ml.clustering.Clusterable;
  21. import org.apache.commons.math4.legacy.ml.clustering.ClusterEvaluator;
  22. import org.apache.commons.math4.legacy.ml.distance.DistanceMeasure;
  23. import org.apache.commons.math4.legacy.stat.descriptive.moment.Variance;

  24. /**
  25.  * Computes the sum of intra-cluster distance variances according to the formula:
  26.  * <pre>
  27.  * \( score = \sum\limits_{i=1}^n \sigma_i^2 \)
  28.  * </pre>
  29.  * where n is the number of clusters and \( \sigma_i^2 \) is the variance of
  30.  * intra-cluster distances of cluster \( c_i \).
  31.  *
  32.  * @since 3.3
  33.  */
  34. public class SumOfClusterVariances implements ClusterEvaluator {
  35.     /** The distance measure to use when evaluating the cluster. */
  36.     private final DistanceMeasure measure;

  37.     /**
  38.      * @param measure Distance measure.
  39.      */
  40.     public SumOfClusterVariances(final DistanceMeasure measure) {
  41.         this.measure = measure;
  42.     }

  43.     /** {@inheritDoc} */
  44.     @Override
  45.     public double score(List<? extends Cluster<? extends Clusterable>> clusters) {
  46.         double varianceSum = 0.0;
  47.         for (final Cluster<? extends Clusterable> cluster : clusters) {
  48.             if (!cluster.getPoints().isEmpty()) {

  49.                 final Clusterable center = cluster.centroid();

  50.                 // compute the distance variance of the current cluster
  51.                 final Variance stat = new Variance();
  52.                 for (final Clusterable point : cluster.getPoints()) {
  53.                     stat.increment(distance(point, center));
  54.                 }

  55.                 varianceSum += stat.getResult();
  56.             }
  57.         }
  58.         return varianceSum;
  59.     }

  60.     /** {@inheritDoc} */
  61.     @Override
  62.     public boolean isBetterScore(double a,
  63.                                  double b) {
  64.         return a < b;
  65.     }

  66.     /**
  67.      * Calculates the distance between two {@link Clusterable} instances
  68.      * with the configured {@link DistanceMeasure}.
  69.      *
  70.      * @param p1 the first clusterable
  71.      * @param p2 the second clusterable
  72.      * @return the distance between the two clusterables
  73.      */
  74.     private double distance(final Clusterable p1, final Clusterable p2) {
  75.         return measure.compute(p1.getPoint(), p2.getPoint());
  76.     }
  77. }