001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.math3.stat; 018 019import java.io.Serializable; 020import java.text.NumberFormat; 021import java.util.ArrayList; 022import java.util.Collection; 023import java.util.Iterator; 024import java.util.Comparator; 025import java.util.List; 026import java.util.Map; 027import java.util.Map.Entry; 028import java.util.SortedMap; 029import java.util.TreeMap; 030 031import org.apache.commons.math3.exception.MathIllegalArgumentException; 032import org.apache.commons.math3.exception.NullArgumentException; 033import org.apache.commons.math3.exception.util.LocalizedFormats; 034import org.apache.commons.math3.util.MathUtils; 035 036/** 037 * Maintains a frequency distribution. 038 * <p> 039 * Accepts int, long, char or Comparable values. New values added must be 040 * comparable to those that have been added, otherwise the add method will 041 * throw an IllegalArgumentException.</p> 042 * <p> 043 * Integer values (int, long, Integer, Long) are not distinguished by type -- 044 * i.e. <code>addValue(Long.valueOf(2)), addValue(2), addValue(2l)</code> all have 045 * the same effect (similarly for arguments to <code>getCount,</code> etc.).</p> 046 * <p>NOTE: byte and short values will be implicitly converted to int values 047 * by the compiler, thus there are no explicit overloaded methods for these 048 * primitive types.</p> 049 * <p> 050 * char values are converted by <code>addValue</code> to Character instances. 051 * As such, these values are not comparable to integral values, so attempts 052 * to combine integral types with chars in a frequency distribution will fail. 053 * </p> 054 * <p> 055 * Float is not coerced to Double. 056 * Since they are not Comparable with each other the user must do any necessary coercion. 057 * Float.NaN and Double.NaN are not treated specially; they may occur in input and will 058 * occur in output if appropriate. 059 * </b> 060 * <p> 061 * The values are ordered using the default (natural order), unless a 062 * <code>Comparator</code> is supplied in the constructor.</p> 063 * 064 */ 065public class Frequency implements Serializable { 066 067 /** Serializable version identifier */ 068 private static final long serialVersionUID = -3845586908418844111L; 069 070 /** underlying collection */ 071 private final SortedMap<Comparable<?>, Long> freqTable; 072 073 /** 074 * Default constructor. 075 */ 076 public Frequency() { 077 freqTable = new TreeMap<Comparable<?>, Long>(); 078 } 079 080 /** 081 * Constructor allowing values Comparator to be specified. 082 * 083 * @param comparator Comparator used to order values 084 */ 085 @SuppressWarnings("unchecked") // TODO is the cast OK? 086 public Frequency(Comparator<?> comparator) { 087 freqTable = new TreeMap<Comparable<?>, Long>((Comparator<? super Comparable<?>>) comparator); 088 } 089 090 /** 091 * Return a string representation of this frequency distribution. 092 * 093 * @return a string representation. 094 */ 095 @Override 096 public String toString() { 097 NumberFormat nf = NumberFormat.getPercentInstance(); 098 StringBuilder outBuffer = new StringBuilder(); 099 outBuffer.append("Value \t Freq. \t Pct. \t Cum Pct. \n"); 100 Iterator<Comparable<?>> iter = freqTable.keySet().iterator(); 101 while (iter.hasNext()) { 102 Comparable<?> value = iter.next(); 103 outBuffer.append(value); 104 outBuffer.append('\t'); 105 outBuffer.append(getCount(value)); 106 outBuffer.append('\t'); 107 outBuffer.append(nf.format(getPct(value))); 108 outBuffer.append('\t'); 109 outBuffer.append(nf.format(getCumPct(value))); 110 outBuffer.append('\n'); 111 } 112 return outBuffer.toString(); 113 } 114 115 /** 116 * Adds 1 to the frequency count for v. 117 * <p> 118 * If other objects have already been added to this Frequency, v must 119 * be comparable to those that have already been added. 120 * </p> 121 * 122 * @param v the value to add. 123 * @throws MathIllegalArgumentException if <code>v</code> is not comparable with previous entries 124 */ 125 public void addValue(Comparable<?> v) throws MathIllegalArgumentException { 126 incrementValue(v, 1); 127 } 128 129 /** 130 * Adds 1 to the frequency count for v. 131 * 132 * @param v the value to add. 133 * @throws MathIllegalArgumentException if the table contains entries not 134 * comparable to Long 135 */ 136 public void addValue(int v) throws MathIllegalArgumentException { 137 addValue(Long.valueOf(v)); 138 } 139 140 /** 141 * Adds 1 to the frequency count for v. 142 * 143 * @param v the value to add. 144 * @throws MathIllegalArgumentException if the table contains entries not 145 * comparable to Long 146 */ 147 public void addValue(long v) throws MathIllegalArgumentException { 148 addValue(Long.valueOf(v)); 149 } 150 151 /** 152 * Adds 1 to the frequency count for v. 153 * 154 * @param v the value to add. 155 * @throws MathIllegalArgumentException if the table contains entries not 156 * comparable to Char 157 */ 158 public void addValue(char v) throws MathIllegalArgumentException { 159 addValue(Character.valueOf(v)); 160 } 161 162 /** 163 * Increments the frequency count for v. 164 * <p> 165 * If other objects have already been added to this Frequency, v must 166 * be comparable to those that have already been added. 167 * </p> 168 * 169 * @param v the value to add. 170 * @param increment the amount by which the value should be incremented 171 * @throws MathIllegalArgumentException if <code>v</code> is not comparable with previous entries 172 * @since 3.1 173 */ 174 public void incrementValue(Comparable<?> v, long increment) throws MathIllegalArgumentException { 175 Comparable<?> obj = v; 176 if (v instanceof Integer) { 177 obj = Long.valueOf(((Integer) v).longValue()); 178 } 179 try { 180 Long count = freqTable.get(obj); 181 if (count == null) { 182 freqTable.put(obj, Long.valueOf(increment)); 183 } else { 184 freqTable.put(obj, Long.valueOf(count.longValue() + increment)); 185 } 186 } catch (ClassCastException ex) { 187 //TreeMap will throw ClassCastException if v is not comparable 188 throw new MathIllegalArgumentException( 189 LocalizedFormats.INSTANCES_NOT_COMPARABLE_TO_EXISTING_VALUES, 190 v.getClass().getName()); 191 } 192 } 193 194 /** 195 * Increments the frequency count for v. 196 * <p> 197 * If other objects have already been added to this Frequency, v must 198 * be comparable to those that have already been added. 199 * </p> 200 * 201 * @param v the value to add. 202 * @param increment the amount by which the value should be incremented 203 * @throws MathIllegalArgumentException if the table contains entries not 204 * comparable to Long 205 * @since 3.3 206 */ 207 public void incrementValue(int v, long increment) throws MathIllegalArgumentException { 208 incrementValue(Long.valueOf(v), increment); 209 } 210 211 /** 212 * Increments the frequency count for v. 213 * <p> 214 * If other objects have already been added to this Frequency, v must 215 * be comparable to those that have already been added. 216 * </p> 217 * 218 * @param v the value to add. 219 * @param increment the amount by which the value should be incremented 220 * @throws MathIllegalArgumentException if the table contains entries not 221 * comparable to Long 222 * @since 3.3 223 */ 224 public void incrementValue(long v, long increment) throws MathIllegalArgumentException { 225 incrementValue(Long.valueOf(v), increment); 226 } 227 228 /** 229 * Increments the frequency count for v. 230 * <p> 231 * If other objects have already been added to this Frequency, v must 232 * be comparable to those that have already been added. 233 * </p> 234 * 235 * @param v the value to add. 236 * @param increment the amount by which the value should be incremented 237 * @throws MathIllegalArgumentException if the table contains entries not 238 * comparable to Char 239 * @since 3.3 240 */ 241 public void incrementValue(char v, long increment) throws MathIllegalArgumentException { 242 incrementValue(Character.valueOf(v), increment); 243 } 244 245 /** Clears the frequency table */ 246 public void clear() { 247 freqTable.clear(); 248 } 249 250 /** 251 * Returns an Iterator over the set of values that have been added. 252 * <p> 253 * If added values are integral (i.e., integers, longs, Integers, or Longs), 254 * they are converted to Longs when they are added, so the objects returned 255 * by the Iterator will in this case be Longs.</p> 256 * 257 * @return values Iterator 258 */ 259 public Iterator<Comparable<?>> valuesIterator() { 260 return freqTable.keySet().iterator(); 261 } 262 263 /** 264 * Return an Iterator over the set of keys and values that have been added. 265 * Using the entry set to iterate is more efficient in the case where you 266 * need to access respective counts as well as values, since it doesn't 267 * require a "get" for every key...the value is provided in the Map.Entry. 268 * <p> 269 * If added values are integral (i.e., integers, longs, Integers, or Longs), 270 * they are converted to Longs when they are added, so the values of the 271 * map entries returned by the Iterator will in this case be Longs.</p> 272 * 273 * @return entry set Iterator 274 * @since 3.1 275 */ 276 public Iterator<Map.Entry<Comparable<?>, Long>> entrySetIterator() { 277 return freqTable.entrySet().iterator(); 278 } 279 280 //------------------------------------------------------------------------- 281 282 /** 283 * Returns the sum of all frequencies. 284 * 285 * @return the total frequency count. 286 */ 287 public long getSumFreq() { 288 long result = 0; 289 Iterator<Long> iterator = freqTable.values().iterator(); 290 while (iterator.hasNext()) { 291 result += iterator.next().longValue(); 292 } 293 return result; 294 } 295 296 /** 297 * Returns the number of values equal to v. 298 * Returns 0 if the value is not comparable. 299 * 300 * @param v the value to lookup. 301 * @return the frequency of v. 302 */ 303 public long getCount(Comparable<?> v) { 304 if (v instanceof Integer) { 305 return getCount(((Integer) v).longValue()); 306 } 307 long result = 0; 308 try { 309 Long count = freqTable.get(v); 310 if (count != null) { 311 result = count.longValue(); 312 } 313 } catch (ClassCastException ex) { // NOPMD 314 // ignore and return 0 -- ClassCastException will be thrown if value is not comparable 315 } 316 return result; 317 } 318 319 /** 320 * Returns the number of values equal to v. 321 * 322 * @param v the value to lookup. 323 * @return the frequency of v. 324 */ 325 public long getCount(int v) { 326 return getCount(Long.valueOf(v)); 327 } 328 329 /** 330 * Returns the number of values equal to v. 331 * 332 * @param v the value to lookup. 333 * @return the frequency of v. 334 */ 335 public long getCount(long v) { 336 return getCount(Long.valueOf(v)); 337 } 338 339 /** 340 * Returns the number of values equal to v. 341 * 342 * @param v the value to lookup. 343 * @return the frequency of v. 344 */ 345 public long getCount(char v) { 346 return getCount(Character.valueOf(v)); 347 } 348 349 /** 350 * Returns the number of values in the frequency table. 351 * 352 * @return the number of unique values that have been added to the frequency table. 353 * @see #valuesIterator() 354 */ 355 public int getUniqueCount(){ 356 return freqTable.keySet().size(); 357 } 358 359 /** 360 * Returns the percentage of values that are equal to v 361 * (as a proportion between 0 and 1). 362 * <p> 363 * Returns <code>Double.NaN</code> if no values have been added. 364 * Returns 0 if at least one value has been added, but v is not comparable 365 * to the values set.</p> 366 * 367 * @param v the value to lookup 368 * @return the proportion of values equal to v 369 */ 370 public double getPct(Comparable<?> v) { 371 final long sumFreq = getSumFreq(); 372 if (sumFreq == 0) { 373 return Double.NaN; 374 } 375 return (double) getCount(v) / (double) sumFreq; 376 } 377 378 /** 379 * Returns the percentage of values that are equal to v 380 * (as a proportion between 0 and 1). 381 * 382 * @param v the value to lookup 383 * @return the proportion of values equal to v 384 */ 385 public double getPct(int v) { 386 return getPct(Long.valueOf(v)); 387 } 388 389 /** 390 * Returns the percentage of values that are equal to v 391 * (as a proportion between 0 and 1). 392 * 393 * @param v the value to lookup 394 * @return the proportion of values equal to v 395 */ 396 public double getPct(long v) { 397 return getPct(Long.valueOf(v)); 398 } 399 400 /** 401 * Returns the percentage of values that are equal to v 402 * (as a proportion between 0 and 1). 403 * 404 * @param v the value to lookup 405 * @return the proportion of values equal to v 406 */ 407 public double getPct(char v) { 408 return getPct(Character.valueOf(v)); 409 } 410 411 //----------------------------------------------------------------------------------------- 412 413 /** 414 * Returns the cumulative frequency of values less than or equal to v. 415 * <p> 416 * Returns 0 if v is not comparable to the values set.</p> 417 * 418 * @param v the value to lookup. 419 * @return the proportion of values equal to v 420 */ 421 @SuppressWarnings({ "rawtypes", "unchecked" }) 422 public long getCumFreq(Comparable<?> v) { 423 if (getSumFreq() == 0) { 424 return 0; 425 } 426 if (v instanceof Integer) { 427 return getCumFreq(((Integer) v).longValue()); 428 } 429 Comparator<Comparable<?>> c = (Comparator<Comparable<?>>) freqTable.comparator(); 430 if (c == null) { 431 c = new NaturalComparator(); 432 } 433 long result = 0; 434 435 try { 436 Long value = freqTable.get(v); 437 if (value != null) { 438 result = value.longValue(); 439 } 440 } catch (ClassCastException ex) { 441 return result; // v is not comparable 442 } 443 444 if (c.compare(v, freqTable.firstKey()) < 0) { 445 return 0; // v is comparable, but less than first value 446 } 447 448 if (c.compare(v, freqTable.lastKey()) >= 0) { 449 return getSumFreq(); // v is comparable, but greater than the last value 450 } 451 452 Iterator<Comparable<?>> values = valuesIterator(); 453 while (values.hasNext()) { 454 Comparable<?> nextValue = values.next(); 455 if (c.compare(v, nextValue) > 0) { 456 result += getCount(nextValue); 457 } else { 458 return result; 459 } 460 } 461 return result; 462 } 463 464 /** 465 * Returns the cumulative frequency of values less than or equal to v. 466 * <p> 467 * Returns 0 if v is not comparable to the values set.</p> 468 * 469 * @param v the value to lookup 470 * @return the proportion of values equal to v 471 */ 472 public long getCumFreq(int v) { 473 return getCumFreq(Long.valueOf(v)); 474 } 475 476 /** 477 * Returns the cumulative frequency of values less than or equal to v. 478 * <p> 479 * Returns 0 if v is not comparable to the values set.</p> 480 * 481 * @param v the value to lookup 482 * @return the proportion of values equal to v 483 */ 484 public long getCumFreq(long v) { 485 return getCumFreq(Long.valueOf(v)); 486 } 487 488 /** 489 * Returns the cumulative frequency of values less than or equal to v. 490 * <p> 491 * Returns 0 if v is not comparable to the values set.</p> 492 * 493 * @param v the value to lookup 494 * @return the proportion of values equal to v 495 */ 496 public long getCumFreq(char v) { 497 return getCumFreq(Character.valueOf(v)); 498 } 499 500 //---------------------------------------------------------------------------------------------- 501 502 /** 503 * Returns the cumulative percentage of values less than or equal to v 504 * (as a proportion between 0 and 1). 505 * <p> 506 * Returns <code>Double.NaN</code> if no values have been added. 507 * Returns 0 if at least one value has been added, but v is not comparable 508 * to the values set.</p> 509 * 510 * @param v the value to lookup 511 * @return the proportion of values less than or equal to v 512 */ 513 public double getCumPct(Comparable<?> v) { 514 final long sumFreq = getSumFreq(); 515 if (sumFreq == 0) { 516 return Double.NaN; 517 } 518 return (double) getCumFreq(v) / (double) sumFreq; 519 } 520 521 /** 522 * Returns the cumulative percentage of values less than or equal to v 523 * (as a proportion between 0 and 1). 524 * <p> 525 * Returns 0 if v is not comparable to the values set.</p> 526 * 527 * @param v the value to lookup 528 * @return the proportion of values less than or equal to v 529 */ 530 public double getCumPct(int v) { 531 return getCumPct(Long.valueOf(v)); 532 } 533 534 /** 535 * Returns the cumulative percentage of values less than or equal to v 536 * (as a proportion between 0 and 1). 537 * <p> 538 * Returns 0 if v is not comparable to the values set.</p> 539 * 540 * @param v the value to lookup 541 * @return the proportion of values less than or equal to v 542 */ 543 public double getCumPct(long v) { 544 return getCumPct(Long.valueOf(v)); 545 } 546 547 /** 548 * Returns the cumulative percentage of values less than or equal to v 549 * (as a proportion between 0 and 1). 550 * <p> 551 * Returns 0 if v is not comparable to the values set.</p> 552 * 553 * @param v the value to lookup 554 * @return the proportion of values less than or equal to v 555 */ 556 public double getCumPct(char v) { 557 return getCumPct(Character.valueOf(v)); 558 } 559 560 /** 561 * Returns the mode value(s) in comparator order. 562 * 563 * @return a list containing the value(s) which appear most often. 564 * @since 3.3 565 */ 566 public List<Comparable<?>> getMode() { 567 long mostPopular = 0; // frequencies are always positive 568 569 // Get the max count first, so we avoid having to recreate the List each time 570 for(Long l : freqTable.values()) { 571 long frequency = l.longValue(); 572 if (frequency > mostPopular) { 573 mostPopular = frequency; 574 } 575 } 576 577 List<Comparable<?>> modeList = new ArrayList<Comparable<?>>(); 578 for (Entry<Comparable<?>, Long> ent : freqTable.entrySet()) { 579 long frequency = ent.getValue().longValue(); 580 if (frequency == mostPopular) { 581 modeList.add(ent.getKey()); 582 } 583 } 584 return modeList; 585 } 586 587 //---------------------------------------------------------------------------------------------- 588 589 /** 590 * Merge another Frequency object's counts into this instance. 591 * This Frequency's counts will be incremented (or set when not already set) 592 * by the counts represented by other. 593 * 594 * @param other the other {@link Frequency} object to be merged 595 * @throws NullArgumentException if {@code other} is null 596 * @since 3.1 597 */ 598 public void merge(final Frequency other) throws NullArgumentException { 599 MathUtils.checkNotNull(other, LocalizedFormats.NULL_NOT_ALLOWED); 600 601 final Iterator<Map.Entry<Comparable<?>, Long>> iter = other.entrySetIterator(); 602 while (iter.hasNext()) { 603 final Map.Entry<Comparable<?>, Long> entry = iter.next(); 604 incrementValue(entry.getKey(), entry.getValue().longValue()); 605 } 606 } 607 608 /** 609 * Merge a {@link Collection} of {@link Frequency} objects into this instance. 610 * This Frequency's counts will be incremented (or set when not already set) 611 * by the counts represented by each of the others. 612 * 613 * @param others the other {@link Frequency} objects to be merged 614 * @throws NullArgumentException if the collection is null 615 * @since 3.1 616 */ 617 public void merge(final Collection<Frequency> others) throws NullArgumentException { 618 MathUtils.checkNotNull(others, LocalizedFormats.NULL_NOT_ALLOWED); 619 620 for (final Frequency freq : others) { 621 merge(freq); 622 } 623 } 624 625 //---------------------------------------------------------------------------------------------- 626 627 /** 628 * A Comparator that compares comparable objects using the 629 * natural order. Copied from Commons Collections ComparableComparator. 630 * @param <T> the type of the objects compared 631 */ 632 private static class NaturalComparator<T extends Comparable<T>> implements Comparator<Comparable<T>>, Serializable { 633 634 /** Serializable version identifier */ 635 private static final long serialVersionUID = -3852193713161395148L; 636 637 /** 638 * Compare the two {@link Comparable Comparable} arguments. 639 * This method is equivalent to: 640 * <pre>(({@link Comparable Comparable})o1).{@link Comparable#compareTo compareTo}(o2)</pre> 641 * 642 * @param o1 the first object 643 * @param o2 the second object 644 * @return result of comparison 645 * @throws NullPointerException when <i>o1</i> is <code>null</code>, 646 * or when <code>((Comparable)o1).compareTo(o2)</code> does 647 * @throws ClassCastException when <i>o1</i> is not a {@link Comparable Comparable}, 648 * or when <code>((Comparable)o1).compareTo(o2)</code> does 649 */ 650 @SuppressWarnings("unchecked") // cast to (T) may throw ClassCastException, see Javadoc 651 public int compare(Comparable<T> o1, Comparable<T> o2) { 652 return o1.compareTo((T) o2); 653 } 654 } 655 656 /** {@inheritDoc} */ 657 @Override 658 public int hashCode() { 659 final int prime = 31; 660 int result = 1; 661 result = prime * result + 662 ((freqTable == null) ? 0 : freqTable.hashCode()); 663 return result; 664 } 665 666 /** {@inheritDoc} */ 667 @Override 668 public boolean equals(Object obj) { 669 if (this == obj) { 670 return true; 671 } 672 if (!(obj instanceof Frequency)) { 673 return false; 674 } 675 Frequency other = (Frequency) obj; 676 if (freqTable == null) { 677 if (other.freqTable != null) { 678 return false; 679 } 680 } else if (!freqTable.equals(other.freqTable)) { 681 return false; 682 } 683 return true; 684 } 685 686}