001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.math4.legacy.stat; 018 019import java.text.NumberFormat; 020import java.util.ArrayList; 021import java.util.Collection; 022import java.util.Comparator; 023import java.util.Iterator; 024import java.util.List; 025import java.util.Map; 026import java.util.Map.Entry; 027import java.util.SortedMap; 028import java.util.TreeMap; 029 030import org.apache.commons.math4.legacy.exception.NullArgumentException; 031import org.apache.commons.math4.legacy.exception.util.LocalizedFormats; 032 033/** 034 * Maintains a frequency distribution. 035 * 036 * <p>The values are ordered using the default (natural order), unless a 037 * <code>Comparator</code> is supplied in the constructor.</p> 038 * 039 * @param <T> a comparable type used in the frequency distribution 040 */ 041public class Frequency<T extends Comparable<T>> { 042 /** underlying collection. */ 043 private final SortedMap<T, Long> freqTable; 044 045 /** 046 * Default constructor. 047 */ 048 public Frequency() { 049 freqTable = new TreeMap<>(); 050 } 051 052 /** 053 * Constructor allowing values Comparator to be specified. 054 * 055 * @param comparator Comparator used to order values 056 */ 057 public Frequency(Comparator<T> comparator) { 058 freqTable = new TreeMap<>(comparator); 059 } 060 061 /** 062 * Return a string representation of this frequency distribution. 063 * 064 * @return a string representation. 065 */ 066 @Override 067 public String toString() { 068 NumberFormat nf = NumberFormat.getPercentInstance(); 069 StringBuilder outBuffer = new StringBuilder(); 070 outBuffer.append("Value \t Freq. \t Pct. \t Cum Pct. \n"); 071 Iterator<T> iter = freqTable.keySet().iterator(); 072 while (iter.hasNext()) { 073 T value = iter.next(); 074 outBuffer.append(value); 075 outBuffer.append('\t'); 076 outBuffer.append(getCount(value)); 077 outBuffer.append('\t'); 078 outBuffer.append(nf.format(getPct(value))); 079 outBuffer.append('\t'); 080 outBuffer.append(nf.format(getCumPct(value))); 081 outBuffer.append('\n'); 082 } 083 return outBuffer.toString(); 084 } 085 086 /** 087 * Adds 1 to the frequency count for v. 088 * 089 * @param v the value to add. 090 */ 091 public void addValue(T v) { 092 incrementValue(v, 1); 093 } 094 095 /** 096 * Increments the frequency count for v. 097 * 098 * @param v the value to add. 099 * @param increment the amount by which the value should be incremented 100 * @since 3.1 101 */ 102 public void incrementValue(T v, long increment) { 103 Long count = freqTable.get(v); 104 if (count == null) { 105 freqTable.put(v, Long.valueOf(increment)); 106 } else { 107 freqTable.put(v, Long.valueOf(count.longValue() + increment)); 108 } 109 } 110 111 /** Clears the frequency table. */ 112 public void clear() { 113 freqTable.clear(); 114 } 115 116 /** 117 * Returns an Iterator over the set of values that have been added. 118 * 119 * @return values Iterator 120 */ 121 public Iterator<T> valuesIterator() { 122 return freqTable.keySet().iterator(); 123 } 124 125 /** 126 * Return an Iterator over the set of keys and values that have been added. 127 * Using the entry set to iterate is more efficient in the case where you 128 * need to access respective counts as well as values, since it doesn't 129 * require a "get" for every key...the value is provided in the Map.Entry. 130 * 131 * @return entry set Iterator 132 * @since 3.1 133 */ 134 public Iterator<Map.Entry<T, Long>> entrySetIterator() { 135 return freqTable.entrySet().iterator(); 136 } 137 138 //------------------------------------------------------------------------- 139 140 /** 141 * Returns the sum of all frequencies. 142 * 143 * @return the total frequency count. 144 */ 145 public long getSumFreq() { 146 long result = 0; 147 Iterator<Long> iterator = freqTable.values().iterator(); 148 while (iterator.hasNext()) { 149 result += iterator.next().longValue(); 150 } 151 return result; 152 } 153 154 /** 155 * Returns the number of values equal to v. 156 * 157 * @param v the value to lookup. 158 * @return the frequency of v. 159 */ 160 public long getCount(T v) { 161 long result = 0; 162 Long count = freqTable.get(v); 163 if (count != null) { 164 result = count.longValue(); 165 } 166 return result; 167 } 168 169 /** 170 * Returns the number of values in the frequency table. 171 * 172 * @return the number of unique values that have been added to the frequency table. 173 * @see #valuesIterator() 174 */ 175 public int getUniqueCount(){ 176 return freqTable.keySet().size(); 177 } 178 179 /** 180 * Returns the percentage of values that are equal to v 181 * (as a proportion between 0 and 1). 182 * <p> 183 * Returns <code>Double.NaN</code> if no values have been added. 184 * </p> 185 * 186 * @param v the value to lookup 187 * @return the proportion of values equal to v 188 */ 189 public double getPct(T v) { 190 final long sumFreq = getSumFreq(); 191 if (sumFreq == 0) { 192 return Double.NaN; 193 } 194 return (double) getCount(v) / (double) sumFreq; 195 } 196 197 //----------------------------------------------------------------------------------------- 198 199 /** 200 * Returns the cumulative frequency of values less than or equal to v. 201 * 202 * @param v the value to lookup. 203 * @return the proportion of values equal to v 204 */ 205 public long getCumFreq(T v) { 206 if (getSumFreq() == 0) { 207 return 0; 208 } 209 Comparator<? super T> c = freqTable.comparator(); 210 if (c == null) { 211 c = new NaturalComparator<>(); 212 } 213 long result = 0; 214 215 Long value = freqTable.get(v); 216 if (value != null) { 217 result = value.longValue(); 218 } 219 220 if (c.compare(v, freqTable.firstKey()) < 0) { 221 return 0; // v is comparable, but less than first value 222 } 223 224 if (c.compare(v, freqTable.lastKey()) >= 0) { 225 return getSumFreq(); // v is comparable, but greater than the last value 226 } 227 228 Iterator<T> values = valuesIterator(); 229 while (values.hasNext()) { 230 T nextValue = values.next(); 231 if (c.compare(v, nextValue) > 0) { 232 result += getCount(nextValue); 233 } else { 234 return result; 235 } 236 } 237 return result; 238 } 239 240 //---------------------------------------------------------------------------------------------- 241 242 /** 243 * Returns the cumulative percentage of values less than or equal to v 244 * (as a proportion between 0 and 1). 245 * <p> 246 * Returns <code>Double.NaN</code> if no values have been added. 247 * </p> 248 * 249 * @param v the value to lookup 250 * @return the proportion of values less than or equal to v 251 */ 252 public double getCumPct(T v) { 253 final long sumFreq = getSumFreq(); 254 if (sumFreq == 0) { 255 return Double.NaN; 256 } 257 return (double) getCumFreq(v) / (double) sumFreq; 258 } 259 260 /** 261 * Returns the mode value(s) in comparator order. 262 * 263 * @return a list containing the value(s) which appear most often. 264 * @since 3.3 265 */ 266 public List<T> getMode() { 267 long mostPopular = 0; // frequencies are always positive 268 269 // Get the max count first, so we avoid having to recreate the List each time 270 for(Long l : freqTable.values()) { 271 long frequency = l.longValue(); 272 if (frequency > mostPopular) { 273 mostPopular = frequency; 274 } 275 } 276 277 List<T> modeList = new ArrayList<>(); 278 for (Entry<T, Long> ent : freqTable.entrySet()) { 279 long frequency = ent.getValue().longValue(); 280 if (frequency == mostPopular) { 281 modeList.add(ent.getKey()); 282 } 283 } 284 return modeList; 285 } 286 287 //---------------------------------------------------------------------------------------------- 288 289 /** 290 * Merge another Frequency object's counts into this instance. 291 * This Frequency's counts will be incremented (or set when not already set) 292 * by the counts represented by other. 293 * 294 * @param other the other {@link Frequency} object to be merged 295 * @throws NullArgumentException if {@code other} is null 296 * @since 3.1 297 */ 298 public void merge(final Frequency<T> other) throws NullArgumentException { 299 NullArgumentException.check(other, LocalizedFormats.NULL_NOT_ALLOWED); 300 301 final Iterator<Map.Entry<T, Long>> iter = other.entrySetIterator(); 302 while (iter.hasNext()) { 303 final Map.Entry<T, Long> entry = iter.next(); 304 incrementValue(entry.getKey(), entry.getValue().longValue()); 305 } 306 } 307 308 /** 309 * Merge a {@link Collection} of {@link Frequency} objects into this instance. 310 * This Frequency's counts will be incremented (or set when not already set) 311 * by the counts represented by each of the others. 312 * 313 * @param others the other {@link Frequency} objects to be merged 314 * @throws NullArgumentException if the collection is null 315 * @since 3.1 316 */ 317 public void merge(final Collection<Frequency<T>> others) throws NullArgumentException { 318 NullArgumentException.check(others, LocalizedFormats.NULL_NOT_ALLOWED); 319 320 for (final Frequency<T> freq : others) { 321 merge(freq); 322 } 323 } 324 325 //---------------------------------------------------------------------------------------------- 326 327 /** 328 * A Comparator that compares comparable objects using the 329 * natural order. Copied from Commons Collections ComparableComparator. 330 * 331 * @param <U> the type of the objects compared 332 */ 333 private static final class NaturalComparator<U extends Comparable<U>> implements Comparator<U> { 334 /** 335 * Compare the two {@link Comparable Comparable} arguments. 336 * This method is equivalent to: 337 * <pre>(({@link Comparable Comparable})o1).{@link Comparable#compareTo compareTo}(o2)</pre> 338 * 339 * @param o1 the first object 340 * @param o2 the second object 341 * @return result of comparison 342 * @throws NullPointerException when <i>o1</i> is <code>null</code>, 343 * or when <code>((Comparable)o1).compareTo(o2)</code> does 344 */ 345 @Override 346 public int compare(U o1, U o2) { 347 return o1.compareTo(o2); 348 } 349 } 350 351 /** {@inheritDoc} */ 352 @Override 353 public int hashCode() { 354 final int prime = 31; 355 int result = 1; 356 result = prime * result + 357 ((freqTable == null) ? 0 : freqTable.hashCode()); 358 return result; 359 } 360 361 /** {@inheritDoc} */ 362 @Override 363 public boolean equals(Object obj) { 364 if (this == obj) { 365 return true; 366 } 367 if (!(obj instanceof Frequency<?>)) { 368 return false; 369 } 370 Frequency<?> other = (Frequency<?>) obj; 371 return freqTable.equals(other.freqTable); 372 } 373}