001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.math3.ml.neuralnet.twod.util; 019 020import org.apache.commons.math3.ml.neuralnet.MapUtils; 021import org.apache.commons.math3.ml.neuralnet.Neuron; 022import org.apache.commons.math3.ml.neuralnet.twod.NeuronSquareMesh2D; 023import org.apache.commons.math3.ml.distance.DistanceMeasure; 024 025/** 026 * Computes the hit histogram. 027 * Each bin will contain the number of data for which the corresponding 028 * neuron is the best matching unit. 029 * @since 3.6 030 */ 031public class HitHistogram implements MapDataVisualization { 032 /** Distance. */ 033 private final DistanceMeasure distance; 034 /** Whether to compute relative bin counts. */ 035 private final boolean normalizeCount; 036 037 /** 038 * @param normalizeCount Whether to compute relative bin counts. 039 * If {@code true}, the data count in each bin will be divided by the total 040 * number of samples. 041 * @param distance Distance. 042 */ 043 public HitHistogram(boolean normalizeCount, 044 DistanceMeasure distance) { 045 this.normalizeCount = normalizeCount; 046 this.distance = distance; 047 } 048 049 /** {@inheritDoc} */ 050 public double[][] computeImage(NeuronSquareMesh2D map, 051 Iterable<double[]> data) { 052 final int nR = map.getNumberOfRows(); 053 final int nC = map.getNumberOfColumns(); 054 055 final LocationFinder finder = new LocationFinder(map); 056 057 // Total number of samples. 058 int numSamples = 0; 059 // Hit bins. 060 final double[][] hit = new double[nR][nC]; 061 062 for (double[] sample : data) { 063 final Neuron best = MapUtils.findBest(sample, map, distance); 064 065 final LocationFinder.Location loc = finder.getLocation(best); 066 final int row = loc.getRow(); 067 final int col = loc.getColumn(); 068 hit[row][col] += 1; 069 070 ++numSamples; 071 } 072 073 if (normalizeCount) { 074 for (int r = 0; r < nR; r++) { 075 for (int c = 0; c < nC; c++) { 076 hit[r][c] /= numSamples; 077 } 078 } 079 } 080 081 return hit; 082 } 083}