001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.math3.ml.neuralnet;
019
020import java.util.ArrayList;
021import java.util.Collection;
022import java.util.Collections;
023import java.util.HashMap;
024import java.util.List;
025import java.util.Comparator;
026
027import org.apache.commons.math3.exception.NoDataException;
028import org.apache.commons.math3.ml.distance.DistanceMeasure;
029import org.apache.commons.math3.ml.neuralnet.twod.NeuronSquareMesh2D;
030import org.apache.commons.math3.util.Pair;
031
032/**
033 * Utilities for network maps.
034 *
035 * @since 3.3
036 */
037public class MapUtils {
038    /**
039     * Class contains only static methods.
040     */
041    private MapUtils() {}
042
043    /**
044     * Finds the neuron that best matches the given features.
045     *
046     * @param features Data.
047     * @param neurons List of neurons to scan. If the list is empty
048     * {@code null} will be returned.
049     * @param distance Distance function. The neuron's features are
050     * passed as the first argument to {@link DistanceMeasure#compute(double[],double[])}.
051     * @return the neuron whose features are closest to the given data.
052     * @throws org.apache.commons.math3.exception.DimensionMismatchException
053     * if the size of the input is not compatible with the neurons features
054     * size.
055     */
056    public static Neuron findBest(double[] features,
057                                  Iterable<Neuron> neurons,
058                                  DistanceMeasure distance) {
059        Neuron best = null;
060        double min = Double.POSITIVE_INFINITY;
061        for (final Neuron n : neurons) {
062            final double d = distance.compute(n.getFeatures(), features);
063            if (d < min) {
064                min = d;
065                best = n;
066            }
067        }
068
069        return best;
070    }
071
072    /**
073     * Finds the two neurons that best match the given features.
074     *
075     * @param features Data.
076     * @param neurons List of neurons to scan. If the list is empty
077     * {@code null} will be returned.
078     * @param distance Distance function. The neuron's features are
079     * passed as the first argument to {@link DistanceMeasure#compute(double[],double[])}.
080     * @return the two neurons whose features are closest to the given data.
081     * @throws org.apache.commons.math3.exception.DimensionMismatchException
082     * if the size of the input is not compatible with the neurons features
083     * size.
084     */
085    public static Pair<Neuron, Neuron> findBestAndSecondBest(double[] features,
086                                                             Iterable<Neuron> neurons,
087                                                             DistanceMeasure distance) {
088        Neuron[] best = { null, null };
089        double[] min = { Double.POSITIVE_INFINITY,
090                         Double.POSITIVE_INFINITY };
091        for (final Neuron n : neurons) {
092            final double d = distance.compute(n.getFeatures(), features);
093            if (d < min[0]) {
094                // Replace second best with old best.
095                min[1] = min[0];
096                best[1] = best[0];
097
098                // Store current as new best.
099                min[0] = d;
100                best[0] = n;
101            } else if (d < min[1]) {
102                // Replace old second best with current.
103                min[1] = d;
104                best[1] = n;
105            }
106        }
107
108        return new Pair<Neuron, Neuron>(best[0], best[1]);
109    }
110
111    /**
112     * Creates a list of neurons sorted in increased order of the distance
113     * to the given {@code features}.
114     *
115     * @param features Data.
116     * @param neurons List of neurons to scan. If it is empty, an empty array
117     * will be returned.
118     * @param distance Distance function.
119     * @return the neurons, sorted in increasing order of distance in data
120     * space.
121     * @throws org.apache.commons.math3.exception.DimensionMismatchException
122     * if the size of the input is not compatible with the neurons features
123     * size.
124     *
125     * @see #findBest(double[],Iterable,DistanceMeasure)
126     * @see #findBestAndSecondBest(double[],Iterable,DistanceMeasure)
127     *
128     * @since 3.6
129     */
130    public static Neuron[] sort(double[] features,
131                                Iterable<Neuron> neurons,
132                                DistanceMeasure distance) {
133        final List<PairNeuronDouble> list = new ArrayList<PairNeuronDouble>();
134
135        for (final Neuron n : neurons) {
136            final double d = distance.compute(n.getFeatures(), features);
137            list.add(new PairNeuronDouble(n, d));
138        }
139
140        Collections.sort(list, PairNeuronDouble.COMPARATOR);
141
142        final int len = list.size();
143        final Neuron[] sorted = new Neuron[len];
144
145        for (int i = 0; i < len; i++) {
146            sorted[i] = list.get(i).getNeuron();
147        }
148        return sorted;
149    }
150
151    /**
152     * Computes the <a href="http://en.wikipedia.org/wiki/U-Matrix">
153     *  U-matrix</a> of a two-dimensional map.
154     *
155     * @param map Network.
156     * @param distance Function to use for computing the average
157     * distance from a neuron to its neighbours.
158     * @return the matrix of average distances.
159     */
160    public static double[][] computeU(NeuronSquareMesh2D map,
161                                      DistanceMeasure distance) {
162        final int numRows = map.getNumberOfRows();
163        final int numCols = map.getNumberOfColumns();
164        final double[][] uMatrix = new double[numRows][numCols];
165
166        final Network net = map.getNetwork();
167
168        for (int i = 0; i < numRows; i++) {
169            for (int j = 0; j < numCols; j++) {
170                final Neuron neuron = map.getNeuron(i, j);
171                final Collection<Neuron> neighbours = net.getNeighbours(neuron);
172                final double[] features = neuron.getFeatures();
173
174                double d = 0;
175                int count = 0;
176                for (Neuron n : neighbours) {
177                    ++count;
178                    d += distance.compute(features, n.getFeatures());
179                }
180
181                uMatrix[i][j] = d / count;
182            }
183        }
184
185        return uMatrix;
186    }
187
188    /**
189     * Computes the "hit" histogram of a two-dimensional map.
190     *
191     * @param data Feature vectors.
192     * @param map Network.
193     * @param distance Function to use for determining the best matching unit.
194     * @return the number of hits for each neuron in the map.
195     */
196    public static int[][] computeHitHistogram(Iterable<double[]> data,
197                                              NeuronSquareMesh2D map,
198                                              DistanceMeasure distance) {
199        final HashMap<Neuron, Integer> hit = new HashMap<Neuron, Integer>();
200        final Network net = map.getNetwork();
201
202        for (double[] f : data) {
203            final Neuron best = findBest(f, net, distance);
204            final Integer count = hit.get(best);
205            if (count == null) {
206                hit.put(best, 1);
207            } else {
208                hit.put(best, count + 1);
209            }
210        }
211
212        // Copy the histogram data into a 2D map.
213        final int numRows = map.getNumberOfRows();
214        final int numCols = map.getNumberOfColumns();
215        final int[][] histo = new int[numRows][numCols];
216
217        for (int i = 0; i < numRows; i++) {
218            for (int j = 0; j < numCols; j++) {
219                final Neuron neuron = map.getNeuron(i, j);
220                final Integer count = hit.get(neuron);
221                if (count == null) {
222                    histo[i][j] = 0;
223                } else {
224                    histo[i][j] = count;
225                }
226            }
227        }
228
229        return histo;
230    }
231
232    /**
233     * Computes the quantization error.
234     * The quantization error is the average distance between a feature vector
235     * and its "best matching unit" (closest neuron).
236     *
237     * @param data Feature vectors.
238     * @param neurons List of neurons to scan.
239     * @param distance Distance function.
240     * @return the error.
241     * @throws NoDataException if {@code data} is empty.
242     */
243    public static double computeQuantizationError(Iterable<double[]> data,
244                                                  Iterable<Neuron> neurons,
245                                                  DistanceMeasure distance) {
246        double d = 0;
247        int count = 0;
248        for (double[] f : data) {
249            ++count;
250            d += distance.compute(f, findBest(f, neurons, distance).getFeatures());
251        }
252
253        if (count == 0) {
254            throw new NoDataException();
255        }
256
257        return d / count;
258    }
259
260    /**
261     * Computes the topographic error.
262     * The topographic error is the proportion of data for which first and
263     * second best matching units are not adjacent in the map.
264     *
265     * @param data Feature vectors.
266     * @param net Network.
267     * @param distance Distance function.
268     * @return the error.
269     * @throws NoDataException if {@code data} is empty.
270     */
271    public static double computeTopographicError(Iterable<double[]> data,
272                                                 Network net,
273                                                 DistanceMeasure distance) {
274        int notAdjacentCount = 0;
275        int count = 0;
276        for (double[] f : data) {
277            ++count;
278            final Pair<Neuron, Neuron> p = findBestAndSecondBest(f, net, distance);
279            if (!net.getNeighbours(p.getFirst()).contains(p.getSecond())) {
280                // Increment count if first and second best matching units
281                // are not neighbours.
282                ++notAdjacentCount;
283            }
284        }
285
286        if (count == 0) {
287            throw new NoDataException();
288        }
289
290        return ((double) notAdjacentCount) / count;
291    }
292
293    /**
294     * Helper data structure holding a (Neuron, double) pair.
295     */
296    private static class PairNeuronDouble {
297        /** Comparator. */
298        static final Comparator<PairNeuronDouble> COMPARATOR
299            = new Comparator<PairNeuronDouble>() {
300            /** {@inheritDoc} */
301            public int compare(PairNeuronDouble o1,
302                               PairNeuronDouble o2) {
303                return Double.compare(o1.value, o2.value);
304            }
305        };
306        /** Key. */
307        private final Neuron neuron;
308        /** Value. */
309        private final double value;
310
311        /**
312         * @param neuron Neuron.
313         * @param value Value.
314         */
315        PairNeuronDouble(Neuron neuron, double value) {
316            this.neuron = neuron;
317            this.value = value;
318        }
319
320        /** @return the neuron. */
321        public Neuron getNeuron() {
322            return neuron;
323        }
324
325    }
326}