001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.math3.ml.neuralnet; 019 020import java.util.ArrayList; 021import java.util.Collection; 022import java.util.Collections; 023import java.util.HashMap; 024import java.util.List; 025import java.util.Comparator; 026 027import org.apache.commons.math3.exception.NoDataException; 028import org.apache.commons.math3.ml.distance.DistanceMeasure; 029import org.apache.commons.math3.ml.neuralnet.twod.NeuronSquareMesh2D; 030import org.apache.commons.math3.util.Pair; 031 032/** 033 * Utilities for network maps. 034 * 035 * @since 3.3 036 */ 037public class MapUtils { 038 /** 039 * Class contains only static methods. 040 */ 041 private MapUtils() {} 042 043 /** 044 * Finds the neuron that best matches the given features. 045 * 046 * @param features Data. 047 * @param neurons List of neurons to scan. If the list is empty 048 * {@code null} will be returned. 049 * @param distance Distance function. The neuron's features are 050 * passed as the first argument to {@link DistanceMeasure#compute(double[],double[])}. 051 * @return the neuron whose features are closest to the given data. 052 * @throws org.apache.commons.math3.exception.DimensionMismatchException 053 * if the size of the input is not compatible with the neurons features 054 * size. 055 */ 056 public static Neuron findBest(double[] features, 057 Iterable<Neuron> neurons, 058 DistanceMeasure distance) { 059 Neuron best = null; 060 double min = Double.POSITIVE_INFINITY; 061 for (final Neuron n : neurons) { 062 final double d = distance.compute(n.getFeatures(), features); 063 if (d < min) { 064 min = d; 065 best = n; 066 } 067 } 068 069 return best; 070 } 071 072 /** 073 * Finds the two neurons that best match the given features. 074 * 075 * @param features Data. 076 * @param neurons List of neurons to scan. If the list is empty 077 * {@code null} will be returned. 078 * @param distance Distance function. The neuron's features are 079 * passed as the first argument to {@link DistanceMeasure#compute(double[],double[])}. 080 * @return the two neurons whose features are closest to the given data. 081 * @throws org.apache.commons.math3.exception.DimensionMismatchException 082 * if the size of the input is not compatible with the neurons features 083 * size. 084 */ 085 public static Pair<Neuron, Neuron> findBestAndSecondBest(double[] features, 086 Iterable<Neuron> neurons, 087 DistanceMeasure distance) { 088 Neuron[] best = { null, null }; 089 double[] min = { Double.POSITIVE_INFINITY, 090 Double.POSITIVE_INFINITY }; 091 for (final Neuron n : neurons) { 092 final double d = distance.compute(n.getFeatures(), features); 093 if (d < min[0]) { 094 // Replace second best with old best. 095 min[1] = min[0]; 096 best[1] = best[0]; 097 098 // Store current as new best. 099 min[0] = d; 100 best[0] = n; 101 } else if (d < min[1]) { 102 // Replace old second best with current. 103 min[1] = d; 104 best[1] = n; 105 } 106 } 107 108 return new Pair<Neuron, Neuron>(best[0], best[1]); 109 } 110 111 /** 112 * Creates a list of neurons sorted in increased order of the distance 113 * to the given {@code features}. 114 * 115 * @param features Data. 116 * @param neurons List of neurons to scan. If it is empty, an empty array 117 * will be returned. 118 * @param distance Distance function. 119 * @return the neurons, sorted in increasing order of distance in data 120 * space. 121 * @throws org.apache.commons.math3.exception.DimensionMismatchException 122 * if the size of the input is not compatible with the neurons features 123 * size. 124 * 125 * @see #findBest(double[],Iterable,DistanceMeasure) 126 * @see #findBestAndSecondBest(double[],Iterable,DistanceMeasure) 127 * 128 * @since 3.6 129 */ 130 public static Neuron[] sort(double[] features, 131 Iterable<Neuron> neurons, 132 DistanceMeasure distance) { 133 final List<PairNeuronDouble> list = new ArrayList<PairNeuronDouble>(); 134 135 for (final Neuron n : neurons) { 136 final double d = distance.compute(n.getFeatures(), features); 137 list.add(new PairNeuronDouble(n, d)); 138 } 139 140 Collections.sort(list, PairNeuronDouble.COMPARATOR); 141 142 final int len = list.size(); 143 final Neuron[] sorted = new Neuron[len]; 144 145 for (int i = 0; i < len; i++) { 146 sorted[i] = list.get(i).getNeuron(); 147 } 148 return sorted; 149 } 150 151 /** 152 * Computes the <a href="http://en.wikipedia.org/wiki/U-Matrix"> 153 * U-matrix</a> of a two-dimensional map. 154 * 155 * @param map Network. 156 * @param distance Function to use for computing the average 157 * distance from a neuron to its neighbours. 158 * @return the matrix of average distances. 159 */ 160 public static double[][] computeU(NeuronSquareMesh2D map, 161 DistanceMeasure distance) { 162 final int numRows = map.getNumberOfRows(); 163 final int numCols = map.getNumberOfColumns(); 164 final double[][] uMatrix = new double[numRows][numCols]; 165 166 final Network net = map.getNetwork(); 167 168 for (int i = 0; i < numRows; i++) { 169 for (int j = 0; j < numCols; j++) { 170 final Neuron neuron = map.getNeuron(i, j); 171 final Collection<Neuron> neighbours = net.getNeighbours(neuron); 172 final double[] features = neuron.getFeatures(); 173 174 double d = 0; 175 int count = 0; 176 for (Neuron n : neighbours) { 177 ++count; 178 d += distance.compute(features, n.getFeatures()); 179 } 180 181 uMatrix[i][j] = d / count; 182 } 183 } 184 185 return uMatrix; 186 } 187 188 /** 189 * Computes the "hit" histogram of a two-dimensional map. 190 * 191 * @param data Feature vectors. 192 * @param map Network. 193 * @param distance Function to use for determining the best matching unit. 194 * @return the number of hits for each neuron in the map. 195 */ 196 public static int[][] computeHitHistogram(Iterable<double[]> data, 197 NeuronSquareMesh2D map, 198 DistanceMeasure distance) { 199 final HashMap<Neuron, Integer> hit = new HashMap<Neuron, Integer>(); 200 final Network net = map.getNetwork(); 201 202 for (double[] f : data) { 203 final Neuron best = findBest(f, net, distance); 204 final Integer count = hit.get(best); 205 if (count == null) { 206 hit.put(best, 1); 207 } else { 208 hit.put(best, count + 1); 209 } 210 } 211 212 // Copy the histogram data into a 2D map. 213 final int numRows = map.getNumberOfRows(); 214 final int numCols = map.getNumberOfColumns(); 215 final int[][] histo = new int[numRows][numCols]; 216 217 for (int i = 0; i < numRows; i++) { 218 for (int j = 0; j < numCols; j++) { 219 final Neuron neuron = map.getNeuron(i, j); 220 final Integer count = hit.get(neuron); 221 if (count == null) { 222 histo[i][j] = 0; 223 } else { 224 histo[i][j] = count; 225 } 226 } 227 } 228 229 return histo; 230 } 231 232 /** 233 * Computes the quantization error. 234 * The quantization error is the average distance between a feature vector 235 * and its "best matching unit" (closest neuron). 236 * 237 * @param data Feature vectors. 238 * @param neurons List of neurons to scan. 239 * @param distance Distance function. 240 * @return the error. 241 * @throws NoDataException if {@code data} is empty. 242 */ 243 public static double computeQuantizationError(Iterable<double[]> data, 244 Iterable<Neuron> neurons, 245 DistanceMeasure distance) { 246 double d = 0; 247 int count = 0; 248 for (double[] f : data) { 249 ++count; 250 d += distance.compute(f, findBest(f, neurons, distance).getFeatures()); 251 } 252 253 if (count == 0) { 254 throw new NoDataException(); 255 } 256 257 return d / count; 258 } 259 260 /** 261 * Computes the topographic error. 262 * The topographic error is the proportion of data for which first and 263 * second best matching units are not adjacent in the map. 264 * 265 * @param data Feature vectors. 266 * @param net Network. 267 * @param distance Distance function. 268 * @return the error. 269 * @throws NoDataException if {@code data} is empty. 270 */ 271 public static double computeTopographicError(Iterable<double[]> data, 272 Network net, 273 DistanceMeasure distance) { 274 int notAdjacentCount = 0; 275 int count = 0; 276 for (double[] f : data) { 277 ++count; 278 final Pair<Neuron, Neuron> p = findBestAndSecondBest(f, net, distance); 279 if (!net.getNeighbours(p.getFirst()).contains(p.getSecond())) { 280 // Increment count if first and second best matching units 281 // are not neighbours. 282 ++notAdjacentCount; 283 } 284 } 285 286 if (count == 0) { 287 throw new NoDataException(); 288 } 289 290 return ((double) notAdjacentCount) / count; 291 } 292 293 /** 294 * Helper data structure holding a (Neuron, double) pair. 295 */ 296 private static class PairNeuronDouble { 297 /** Comparator. */ 298 static final Comparator<PairNeuronDouble> COMPARATOR 299 = new Comparator<PairNeuronDouble>() { 300 /** {@inheritDoc} */ 301 public int compare(PairNeuronDouble o1, 302 PairNeuronDouble o2) { 303 return Double.compare(o1.value, o2.value); 304 } 305 }; 306 /** Key. */ 307 private final Neuron neuron; 308 /** Value. */ 309 private final double value; 310 311 /** 312 * @param neuron Neuron. 313 * @param value Value. 314 */ 315 PairNeuronDouble(Neuron neuron, double value) { 316 this.neuron = neuron; 317 this.value = value; 318 } 319 320 /** @return the neuron. */ 321 public Neuron getNeuron() { 322 return neuron; 323 } 324 325 } 326}