1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.commons.math4.legacy.ml.clustering.evaluation;
19
20 import java.util.List;
21
22 import org.apache.commons.math4.legacy.ml.clustering.Cluster;
23 import org.apache.commons.math4.legacy.ml.clustering.Clusterable;
24 import org.apache.commons.math4.legacy.ml.clustering.ClusterEvaluator;
25 import org.apache.commons.math4.legacy.ml.distance.DistanceMeasure;
26 import org.apache.commons.math4.legacy.stat.descriptive.moment.Variance;
27
28 /**
29 * Computes the sum of intra-cluster distance variances according to the formula:
30 * <pre>
31 * \( score = \sum\limits_{i=1}^n \sigma_i^2 \)
32 * </pre>
33 * where n is the number of clusters and \( \sigma_i^2 \) is the variance of
34 * intra-cluster distances of cluster \( c_i \).
35 *
36 * @since 3.3
37 */
38 public class SumOfClusterVariances implements ClusterEvaluator {
39 /** The distance measure to use when evaluating the cluster. */
40 private final DistanceMeasure measure;
41
42 /**
43 * @param measure Distance measure.
44 */
45 public SumOfClusterVariances(final DistanceMeasure measure) {
46 this.measure = measure;
47 }
48
49 /** {@inheritDoc} */
50 @Override
51 public double score(List<? extends Cluster<? extends Clusterable>> clusters) {
52 double varianceSum = 0.0;
53 for (final Cluster<? extends Clusterable> cluster : clusters) {
54 if (!cluster.getPoints().isEmpty()) {
55
56 final Clusterable center = cluster.centroid();
57
58 // compute the distance variance of the current cluster
59 final Variance stat = new Variance();
60 for (final Clusterable point : cluster.getPoints()) {
61 stat.increment(distance(point, center));
62 }
63
64 varianceSum += stat.getResult();
65 }
66 }
67 return varianceSum;
68 }
69
70 /** {@inheritDoc} */
71 @Override
72 public boolean isBetterScore(double a,
73 double b) {
74 return a < b;
75 }
76
77 /**
78 * Calculates the distance between two {@link Clusterable} instances
79 * with the configured {@link DistanceMeasure}.
80 *
81 * @param p1 the first clusterable
82 * @param p2 the second clusterable
83 * @return the distance between the two clusterables
84 */
85 private double distance(final Clusterable p1, final Clusterable p2) {
86 return measure.compute(p1.getPoint(), p2.getPoint());
87 }
88 }