001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.math4.legacy.ml.clustering.evaluation; 019 020import java.util.List; 021 022import org.apache.commons.math4.legacy.ml.clustering.Cluster; 023import org.apache.commons.math4.legacy.ml.clustering.Clusterable; 024import org.apache.commons.math4.legacy.ml.clustering.ClusterEvaluator; 025import org.apache.commons.math4.legacy.ml.distance.DistanceMeasure; 026import org.apache.commons.math4.legacy.stat.descriptive.moment.Variance; 027 028/** 029 * Computes the sum of intra-cluster distance variances according to the formula: 030 * <pre> 031 * \( score = \sum\limits_{i=1}^n \sigma_i^2 \) 032 * </pre> 033 * where n is the number of clusters and \( \sigma_i^2 \) is the variance of 034 * intra-cluster distances of cluster \( c_i \). 035 * 036 * @since 3.3 037 */ 038public class SumOfClusterVariances implements ClusterEvaluator { 039 /** The distance measure to use when evaluating the cluster. */ 040 private final DistanceMeasure measure; 041 042 /** 043 * @param measure Distance measure. 044 */ 045 public SumOfClusterVariances(final DistanceMeasure measure) { 046 this.measure = measure; 047 } 048 049 /** {@inheritDoc} */ 050 @Override 051 public double score(List<? extends Cluster<? extends Clusterable>> clusters) { 052 double varianceSum = 0.0; 053 for (final Cluster<? extends Clusterable> cluster : clusters) { 054 if (!cluster.getPoints().isEmpty()) { 055 056 final Clusterable center = cluster.centroid(); 057 058 // compute the distance variance of the current cluster 059 final Variance stat = new Variance(); 060 for (final Clusterable point : cluster.getPoints()) { 061 stat.increment(distance(point, center)); 062 } 063 064 varianceSum += stat.getResult(); 065 } 066 } 067 return varianceSum; 068 } 069 070 /** {@inheritDoc} */ 071 @Override 072 public boolean isBetterScore(double a, 073 double b) { 074 return a < b; 075 } 076 077 /** 078 * Calculates the distance between two {@link Clusterable} instances 079 * with the configured {@link DistanceMeasure}. 080 * 081 * @param p1 the first clusterable 082 * @param p2 the second clusterable 083 * @return the distance between the two clusterables 084 */ 085 private double distance(final Clusterable p1, final Clusterable p2) { 086 return measure.compute(p1.getPoint(), p2.getPoint()); 087 } 088}