001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.math3.ml.clustering.evaluation;
019
020import java.util.List;
021
022import org.apache.commons.math3.ml.clustering.CentroidCluster;
023import org.apache.commons.math3.ml.clustering.Cluster;
024import org.apache.commons.math3.ml.clustering.Clusterable;
025import org.apache.commons.math3.ml.clustering.DoublePoint;
026import org.apache.commons.math3.ml.distance.DistanceMeasure;
027import org.apache.commons.math3.ml.distance.EuclideanDistance;
028
029/**
030 * Base class for cluster evaluation methods.
031 *
032 * @param <T> type of the clustered points
033 * @since 3.3
034 */
035public abstract class ClusterEvaluator<T extends Clusterable> {
036
037    /** The distance measure to use when evaluating the cluster. */
038    private final DistanceMeasure measure;
039
040    /**
041     * Creates a new cluster evaluator with an {@link EuclideanDistance}
042     * as distance measure.
043     */
044    public ClusterEvaluator() {
045        this(new EuclideanDistance());
046    }
047
048    /**
049     * Creates a new cluster evaluator with the given distance measure.
050     * @param measure the distance measure to use
051     */
052    public ClusterEvaluator(final DistanceMeasure measure) {
053        this.measure = measure;
054    }
055
056    /**
057     * Computes the evaluation score for the given list of clusters.
058     * @param clusters the clusters to evaluate
059     * @return the computed score
060     */
061    public abstract double score(List<? extends Cluster<T>> clusters);
062
063    /**
064     * Returns whether the first evaluation score is considered to be better
065     * than the second one by this evaluator.
066     * <p>
067     * Specific implementations shall override this method if the returned scores
068     * do not follow the same ordering, i.e. smaller score is better.
069     *
070     * @param score1 the first score
071     * @param score2 the second score
072     * @return {@code true} if the first score is considered to be better, {@code false} otherwise
073     */
074    public boolean isBetterScore(double score1, double score2) {
075        return score1 < score2;
076    }
077
078    /**
079     * Calculates the distance between two {@link Clusterable} instances
080     * with the configured {@link DistanceMeasure}.
081     *
082     * @param p1 the first clusterable
083     * @param p2 the second clusterable
084     * @return the distance between the two clusterables
085     */
086    protected double distance(final Clusterable p1, final Clusterable p2) {
087        return measure.compute(p1.getPoint(), p2.getPoint());
088    }
089
090    /**
091     * Computes the centroid for a cluster.
092     *
093     * @param cluster the cluster
094     * @return the computed centroid for the cluster,
095     * or {@code null} if the cluster does not contain any points
096     */
097    protected Clusterable centroidOf(final Cluster<T> cluster) {
098        final List<T> points = cluster.getPoints();
099        if (points.isEmpty()) {
100            return null;
101        }
102
103        // in case the cluster is of type CentroidCluster, no need to compute the centroid
104        if (cluster instanceof CentroidCluster) {
105            return ((CentroidCluster<T>) cluster).getCenter();
106        }
107
108        final int dimension = points.get(0).getPoint().length;
109        final double[] centroid = new double[dimension];
110        for (final T p : points) {
111            final double[] point = p.getPoint();
112            for (int i = 0; i < centroid.length; i++) {
113                centroid[i] += point[i];
114            }
115        }
116        for (int i = 0; i < centroid.length; i++) {
117            centroid[i] /= points.size();
118        }
119        return new DoublePoint(centroid);
120    }
121
122}