001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.math3.ml.clustering.evaluation; 019 020import java.util.List; 021 022import org.apache.commons.math3.ml.clustering.CentroidCluster; 023import org.apache.commons.math3.ml.clustering.Cluster; 024import org.apache.commons.math3.ml.clustering.Clusterable; 025import org.apache.commons.math3.ml.clustering.DoublePoint; 026import org.apache.commons.math3.ml.distance.DistanceMeasure; 027import org.apache.commons.math3.ml.distance.EuclideanDistance; 028 029/** 030 * Base class for cluster evaluation methods. 031 * 032 * @param <T> type of the clustered points 033 * @since 3.3 034 */ 035public abstract class ClusterEvaluator<T extends Clusterable> { 036 037 /** The distance measure to use when evaluating the cluster. */ 038 private final DistanceMeasure measure; 039 040 /** 041 * Creates a new cluster evaluator with an {@link EuclideanDistance} 042 * as distance measure. 043 */ 044 public ClusterEvaluator() { 045 this(new EuclideanDistance()); 046 } 047 048 /** 049 * Creates a new cluster evaluator with the given distance measure. 050 * @param measure the distance measure to use 051 */ 052 public ClusterEvaluator(final DistanceMeasure measure) { 053 this.measure = measure; 054 } 055 056 /** 057 * Computes the evaluation score for the given list of clusters. 058 * @param clusters the clusters to evaluate 059 * @return the computed score 060 */ 061 public abstract double score(List<? extends Cluster<T>> clusters); 062 063 /** 064 * Returns whether the first evaluation score is considered to be better 065 * than the second one by this evaluator. 066 * <p> 067 * Specific implementations shall override this method if the returned scores 068 * do not follow the same ordering, i.e. smaller score is better. 069 * 070 * @param score1 the first score 071 * @param score2 the second score 072 * @return {@code true} if the first score is considered to be better, {@code false} otherwise 073 */ 074 public boolean isBetterScore(double score1, double score2) { 075 return score1 < score2; 076 } 077 078 /** 079 * Calculates the distance between two {@link Clusterable} instances 080 * with the configured {@link DistanceMeasure}. 081 * 082 * @param p1 the first clusterable 083 * @param p2 the second clusterable 084 * @return the distance between the two clusterables 085 */ 086 protected double distance(final Clusterable p1, final Clusterable p2) { 087 return measure.compute(p1.getPoint(), p2.getPoint()); 088 } 089 090 /** 091 * Computes the centroid for a cluster. 092 * 093 * @param cluster the cluster 094 * @return the computed centroid for the cluster, 095 * or {@code null} if the cluster does not contain any points 096 */ 097 protected Clusterable centroidOf(final Cluster<T> cluster) { 098 final List<T> points = cluster.getPoints(); 099 if (points.isEmpty()) { 100 return null; 101 } 102 103 // in case the cluster is of type CentroidCluster, no need to compute the centroid 104 if (cluster instanceof CentroidCluster) { 105 return ((CentroidCluster<T>) cluster).getCenter(); 106 } 107 108 final int dimension = points.get(0).getPoint().length; 109 final double[] centroid = new double[dimension]; 110 for (final T p : points) { 111 final double[] point = p.getPoint(); 112 for (int i = 0; i < centroid.length; i++) { 113 centroid[i] += point[i]; 114 } 115 } 116 for (int i = 0; i < centroid.length; i++) { 117 centroid[i] /= points.size(); 118 } 119 return new DoublePoint(centroid); 120 } 121 122}