View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.math3.stat;
18  
19  import java.io.Serializable;
20  import java.text.NumberFormat;
21  import java.util.ArrayList;
22  import java.util.Collection;
23  import java.util.Iterator;
24  import java.util.Comparator;
25  import java.util.List;
26  import java.util.Map;
27  import java.util.Map.Entry;
28  import java.util.TreeMap;
29  
30  import org.apache.commons.math3.exception.MathIllegalArgumentException;
31  import org.apache.commons.math3.exception.NullArgumentException;
32  import org.apache.commons.math3.exception.util.LocalizedFormats;
33  import org.apache.commons.math3.util.MathUtils;
34  
35  /**
36   * Maintains a frequency distribution.
37   * <p>
38   * Accepts int, long, char or Comparable values.  New values added must be
39   * comparable to those that have been added, otherwise the add method will
40   * throw an IllegalArgumentException.</p>
41   * <p>
42   * Integer values (int, long, Integer, Long) are not distinguished by type --
43   * i.e. <code>addValue(Long.valueOf(2)), addValue(2), addValue(2l)</code> all have
44   * the same effect (similarly for arguments to <code>getCount,</code> etc.).</p>
45   * <p>NOTE: byte and short values will be implicitly converted to int values
46   * by the compiler, thus there are no explicit overloaded methods for these
47   * primitive types.</p>
48   * <p>
49   * char values are converted by <code>addValue</code> to Character instances.
50   * As such, these values are not comparable to integral values, so attempts
51   * to combine integral types with chars in a frequency distribution will fail.
52   * </p>
53   * <p>
54   * Float is not coerced to Double.
55   * Since they are not Comparable with each other the user must do any necessary coercion.
56   * Float.NaN and Double.NaN are not treated specially; they may occur in input and will
57   * occur in output if appropriate.
58   * </b>
59   * <p>
60   * The values are ordered using the default (natural order), unless a
61   * <code>Comparator</code> is supplied in the constructor.</p>
62   *
63   * @version $Id: Frequency.java 1519820 2013-09-03 19:58:03Z tn $
64   */
65  public class Frequency implements Serializable {
66  
67      /** Serializable version identifier */
68      private static final long serialVersionUID = -3845586908418844111L;
69  
70      /** underlying collection */
71      private final TreeMap<Comparable<?>, Long> freqTable;
72  
73      /**
74       * Default constructor.
75       */
76      public Frequency() {
77          freqTable = new TreeMap<Comparable<?>, Long>();
78      }
79  
80      /**
81       * Constructor allowing values Comparator to be specified.
82       *
83       * @param comparator Comparator used to order values
84       */
85      @SuppressWarnings("unchecked") // TODO is the cast OK?
86      public Frequency(Comparator<?> comparator) {
87          freqTable = new TreeMap<Comparable<?>, Long>((Comparator<? super Comparable<?>>) comparator);
88      }
89  
90      /**
91       * Return a string representation of this frequency distribution.
92       *
93       * @return a string representation.
94       */
95      @Override
96      public String toString() {
97          NumberFormat nf = NumberFormat.getPercentInstance();
98          StringBuilder outBuffer = new StringBuilder();
99          outBuffer.append("Value \t Freq. \t Pct. \t Cum Pct. \n");
100         Iterator<Comparable<?>> iter = freqTable.keySet().iterator();
101         while (iter.hasNext()) {
102             Comparable<?> value = iter.next();
103             outBuffer.append(value);
104             outBuffer.append('\t');
105             outBuffer.append(getCount(value));
106             outBuffer.append('\t');
107             outBuffer.append(nf.format(getPct(value)));
108             outBuffer.append('\t');
109             outBuffer.append(nf.format(getCumPct(value)));
110             outBuffer.append('\n');
111         }
112         return outBuffer.toString();
113     }
114 
115     /**
116      * Adds 1 to the frequency count for v.
117      * <p>
118      * If other objects have already been added to this Frequency, v must
119      * be comparable to those that have already been added.
120      * </p>
121      *
122      * @param v the value to add.
123      * @throws MathIllegalArgumentException if <code>v</code> is not comparable with previous entries
124      */
125     public void addValue(Comparable<?> v) throws MathIllegalArgumentException {
126         incrementValue(v, 1);
127     }
128 
129     /**
130      * Adds 1 to the frequency count for v.
131      *
132      * @param v the value to add.
133      * @throws MathIllegalArgumentException if the table contains entries not
134      * comparable to Long
135      */
136     public void addValue(int v) throws MathIllegalArgumentException {
137         addValue(Long.valueOf(v));
138     }
139 
140     /**
141      * Adds 1 to the frequency count for v.
142      *
143      * @param v the value to add.
144      * @throws MathIllegalArgumentException if the table contains entries not
145      * comparable to Long
146      */
147     public void addValue(long v) throws MathIllegalArgumentException {
148         addValue(Long.valueOf(v));
149     }
150 
151     /**
152      * Adds 1 to the frequency count for v.
153      *
154      * @param v the value to add.
155      * @throws MathIllegalArgumentException if the table contains entries not
156      * comparable to Char
157      */
158     public void addValue(char v) throws MathIllegalArgumentException {
159         addValue(Character.valueOf(v));
160     }
161 
162     /**
163      * Increments the frequency count for v.
164      * <p>
165      * If other objects have already been added to this Frequency, v must
166      * be comparable to those that have already been added.
167      * </p>
168      *
169      * @param v the value to add.
170      * @param increment the amount by which the value should be incremented
171      * @throws MathIllegalArgumentException if <code>v</code> is not comparable with previous entries
172      * @since 3.1
173      */
174     public void incrementValue(Comparable<?> v, long increment) throws MathIllegalArgumentException {
175         Comparable<?> obj = v;
176         if (v instanceof Integer) {
177             obj = Long.valueOf(((Integer) v).longValue());
178         }
179         try {
180             Long count = freqTable.get(obj);
181             if (count == null) {
182                 freqTable.put(obj, Long.valueOf(increment));
183             } else {
184                 freqTable.put(obj, Long.valueOf(count.longValue() + increment));
185             }
186         } catch (ClassCastException ex) {
187             //TreeMap will throw ClassCastException if v is not comparable
188             throw new MathIllegalArgumentException(
189                   LocalizedFormats.INSTANCES_NOT_COMPARABLE_TO_EXISTING_VALUES,
190                   v.getClass().getName());
191         }
192     }
193 
194     /**
195      * Increments the frequency count for v.
196      * <p>
197      * If other objects have already been added to this Frequency, v must
198      * be comparable to those that have already been added.
199      * </p>
200      *
201      * @param v the value to add.
202      * @param increment the amount by which the value should be incremented
203      * @throws MathIllegalArgumentException if the table contains entries not
204      * comparable to Long
205      * @since 3.3
206      */
207     public void incrementValue(int v, long increment) throws MathIllegalArgumentException {
208         incrementValue(Long.valueOf(v), increment);
209     }
210 
211     /**
212      * Increments the frequency count for v.
213      * <p>
214      * If other objects have already been added to this Frequency, v must
215      * be comparable to those that have already been added.
216      * </p>
217      *
218      * @param v the value to add.
219      * @param increment the amount by which the value should be incremented
220      * @throws MathIllegalArgumentException if the table contains entries not
221      * comparable to Long
222      * @since 3.3
223      */
224     public void incrementValue(long v, long increment) throws MathIllegalArgumentException {
225         incrementValue(Long.valueOf(v), increment);
226     }
227 
228     /**
229      * Increments the frequency count for v.
230      * <p>
231      * If other objects have already been added to this Frequency, v must
232      * be comparable to those that have already been added.
233      * </p>
234      *
235      * @param v the value to add.
236      * @param increment the amount by which the value should be incremented
237      * @throws MathIllegalArgumentException if the table contains entries not
238      * comparable to Char
239      * @since 3.3
240      */
241     public void incrementValue(char v, long increment) throws MathIllegalArgumentException {
242         incrementValue(Character.valueOf(v), increment);
243     }
244 
245     /** Clears the frequency table */
246     public void clear() {
247         freqTable.clear();
248     }
249 
250     /**
251      * Returns an Iterator over the set of values that have been added.
252      * <p>
253      * If added values are integral (i.e., integers, longs, Integers, or Longs),
254      * they are converted to Longs when they are added, so the objects returned
255      * by the Iterator will in this case be Longs.</p>
256      *
257      * @return values Iterator
258      */
259     public Iterator<Comparable<?>> valuesIterator() {
260         return freqTable.keySet().iterator();
261     }
262 
263     /**
264      * Return an Iterator over the set of keys and values that have been added.
265      * Using the entry set to iterate is more efficient in the case where you
266      * need to access respective counts as well as values, since it doesn't
267      * require a "get" for every key...the value is provided in the Map.Entry.
268      * <p>
269      * If added values are integral (i.e., integers, longs, Integers, or Longs),
270      * they are converted to Longs when they are added, so the values of the
271      * map entries returned by the Iterator will in this case be Longs.</p>
272      *
273      * @return entry set Iterator
274      * @since 3.1
275      */
276     public Iterator<Map.Entry<Comparable<?>, Long>> entrySetIterator() {
277         return freqTable.entrySet().iterator();
278     }
279 
280     //-------------------------------------------------------------------------
281 
282     /**
283      * Returns the sum of all frequencies.
284      *
285      * @return the total frequency count.
286      */
287     public long getSumFreq() {
288         long result = 0;
289         Iterator<Long> iterator = freqTable.values().iterator();
290         while (iterator.hasNext())  {
291             result += iterator.next().longValue();
292         }
293         return result;
294     }
295 
296     /**
297      * Returns the number of values equal to v.
298      * Returns 0 if the value is not comparable.
299      *
300      * @param v the value to lookup.
301      * @return the frequency of v.
302      */
303     public long getCount(Comparable<?> v) {
304         if (v instanceof Integer) {
305             return getCount(((Integer) v).longValue());
306         }
307         long result = 0;
308         try {
309             Long count =  freqTable.get(v);
310             if (count != null) {
311                 result = count.longValue();
312             }
313         } catch (ClassCastException ex) { // NOPMD
314             // ignore and return 0 -- ClassCastException will be thrown if value is not comparable
315         }
316         return result;
317     }
318 
319     /**
320      * Returns the number of values equal to v.
321      *
322      * @param v the value to lookup.
323      * @return the frequency of v.
324      */
325     public long getCount(int v) {
326         return getCount(Long.valueOf(v));
327     }
328 
329     /**
330      * Returns the number of values equal to v.
331      *
332      * @param v the value to lookup.
333      * @return the frequency of v.
334      */
335     public long getCount(long v) {
336         return getCount(Long.valueOf(v));
337     }
338 
339     /**
340      * Returns the number of values equal to v.
341      *
342      * @param v the value to lookup.
343      * @return the frequency of v.
344      */
345     public long getCount(char v) {
346         return getCount(Character.valueOf(v));
347     }
348 
349     /**
350      * Returns the number of values in the frequency table.
351      *
352      * @return the number of unique values that have been added to the frequency table.
353      * @see #valuesIterator()
354      */
355     public int getUniqueCount(){
356         return freqTable.keySet().size();
357     }
358 
359     /**
360      * Returns the percentage of values that are equal to v
361      * (as a proportion between 0 and 1).
362      * <p>
363      * Returns <code>Double.NaN</code> if no values have been added.
364      * Returns 0 if at least one value has been added, but v is not comparable
365      * to the values set.</p>
366      *
367      * @param v the value to lookup
368      * @return the proportion of values equal to v
369      */
370     public double getPct(Comparable<?> v) {
371         final long sumFreq = getSumFreq();
372         if (sumFreq == 0) {
373             return Double.NaN;
374         }
375         return (double) getCount(v) / (double) sumFreq;
376     }
377 
378     /**
379      * Returns the percentage of values that are equal to v
380      * (as a proportion between 0 and 1).
381      *
382      * @param v the value to lookup
383      * @return the proportion of values equal to v
384      */
385     public double getPct(int v) {
386         return getPct(Long.valueOf(v));
387     }
388 
389     /**
390      * Returns the percentage of values that are equal to v
391      * (as a proportion between 0 and 1).
392      *
393      * @param v the value to lookup
394      * @return the proportion of values equal to v
395      */
396     public double getPct(long v) {
397         return getPct(Long.valueOf(v));
398     }
399 
400     /**
401      * Returns the percentage of values that are equal to v
402      * (as a proportion between 0 and 1).
403      *
404      * @param v the value to lookup
405      * @return the proportion of values equal to v
406      */
407     public double getPct(char v) {
408         return getPct(Character.valueOf(v));
409     }
410 
411     //-----------------------------------------------------------------------------------------
412 
413     /**
414      * Returns the cumulative frequency of values less than or equal to v.
415      * <p>
416      * Returns 0 if v is not comparable to the values set.</p>
417      *
418      * @param v the value to lookup.
419      * @return the proportion of values equal to v
420      */
421     @SuppressWarnings({ "rawtypes", "unchecked" })
422     public long getCumFreq(Comparable<?> v) {
423         if (getSumFreq() == 0) {
424             return 0;
425         }
426         if (v instanceof Integer) {
427             return getCumFreq(((Integer) v).longValue());
428         }
429         Comparator<Comparable<?>> c = (Comparator<Comparable<?>>) freqTable.comparator();
430         if (c == null) {
431             c = new NaturalComparator();
432         }
433         long result = 0;
434 
435         try {
436             Long value = freqTable.get(v);
437             if (value != null) {
438                 result = value.longValue();
439             }
440         } catch (ClassCastException ex) {
441             return result;   // v is not comparable
442         }
443 
444         if (c.compare(v, freqTable.firstKey()) < 0) {
445             return 0;  // v is comparable, but less than first value
446         }
447 
448         if (c.compare(v, freqTable.lastKey()) >= 0) {
449             return getSumFreq();    // v is comparable, but greater than the last value
450         }
451 
452         Iterator<Comparable<?>> values = valuesIterator();
453         while (values.hasNext()) {
454             Comparable<?> nextValue = values.next();
455             if (c.compare(v, nextValue) > 0) {
456                 result += getCount(nextValue);
457             } else {
458                 return result;
459             }
460         }
461         return result;
462     }
463 
464      /**
465      * Returns the cumulative frequency of values less than or equal to v.
466      * <p>
467      * Returns 0 if v is not comparable to the values set.</p>
468      *
469      * @param v the value to lookup
470      * @return the proportion of values equal to v
471      */
472     public long getCumFreq(int v) {
473         return getCumFreq(Long.valueOf(v));
474     }
475 
476      /**
477      * Returns the cumulative frequency of values less than or equal to v.
478      * <p>
479      * Returns 0 if v is not comparable to the values set.</p>
480      *
481      * @param v the value to lookup
482      * @return the proportion of values equal to v
483      */
484     public long getCumFreq(long v) {
485         return getCumFreq(Long.valueOf(v));
486     }
487 
488     /**
489      * Returns the cumulative frequency of values less than or equal to v.
490      * <p>
491      * Returns 0 if v is not comparable to the values set.</p>
492      *
493      * @param v the value to lookup
494      * @return the proportion of values equal to v
495      */
496     public long getCumFreq(char v) {
497         return getCumFreq(Character.valueOf(v));
498     }
499 
500     //----------------------------------------------------------------------------------------------
501 
502     /**
503      * Returns the cumulative percentage of values less than or equal to v
504      * (as a proportion between 0 and 1).
505      * <p>
506      * Returns <code>Double.NaN</code> if no values have been added.
507      * Returns 0 if at least one value has been added, but v is not comparable
508      * to the values set.</p>
509      *
510      * @param v the value to lookup
511      * @return the proportion of values less than or equal to v
512      */
513     public double getCumPct(Comparable<?> v) {
514         final long sumFreq = getSumFreq();
515         if (sumFreq == 0) {
516             return Double.NaN;
517         }
518         return (double) getCumFreq(v) / (double) sumFreq;
519     }
520 
521     /**
522      * Returns the cumulative percentage of values less than or equal to v
523      * (as a proportion between 0 and 1).
524      * <p>
525      * Returns 0 if v is not comparable to the values set.</p>
526      *
527      * @param v the value to lookup
528      * @return the proportion of values less than or equal to v
529      */
530     public double getCumPct(int v) {
531         return getCumPct(Long.valueOf(v));
532     }
533 
534     /**
535      * Returns the cumulative percentage of values less than or equal to v
536      * (as a proportion between 0 and 1).
537      * <p>
538      * Returns 0 if v is not comparable to the values set.</p>
539      *
540      * @param v the value to lookup
541      * @return the proportion of values less than or equal to v
542      */
543     public double getCumPct(long v) {
544         return getCumPct(Long.valueOf(v));
545     }
546 
547     /**
548      * Returns the cumulative percentage of values less than or equal to v
549      * (as a proportion between 0 and 1).
550      * <p>
551      * Returns 0 if v is not comparable to the values set.</p>
552      *
553      * @param v the value to lookup
554      * @return the proportion of values less than or equal to v
555      */
556     public double getCumPct(char v) {
557         return getCumPct(Character.valueOf(v));
558     }
559 
560     /**
561      * Returns the mode value(s) in comparator order.
562      *
563      * @return a list containing the value(s) which appear most often.
564      * @since 3.3
565      */
566     public List<Comparable<?>> getMode() {
567         long mostPopular = 0; // frequencies are always positive
568 
569         // Get the max count first, so we avoid having to recreate the List each time
570         for(Long l : freqTable.values()) {
571             long frequency = l.longValue();
572             if (frequency > mostPopular) {
573                 mostPopular = frequency;
574             }
575         }
576 
577         List<Comparable<?>> modeList = new ArrayList<Comparable<?>>();
578         for (Entry<Comparable<?>, Long> ent : freqTable.entrySet()) {
579             long frequency = ent.getValue().longValue();
580             if (frequency == mostPopular) {
581                modeList.add(ent.getKey());
582             }
583         }
584         return modeList;
585     }
586 
587     //----------------------------------------------------------------------------------------------
588 
589     /**
590      * Merge another Frequency object's counts into this instance.
591      * This Frequency's counts will be incremented (or set when not already set)
592      * by the counts represented by other.
593      *
594      * @param other the other {@link Frequency} object to be merged
595      * @throws NullArgumentException if {@code other} is null
596      * @since 3.1
597      */
598     public void merge(final Frequency other) throws NullArgumentException {
599         MathUtils.checkNotNull(other, LocalizedFormats.NULL_NOT_ALLOWED);
600 
601         final Iterator<Map.Entry<Comparable<?>, Long>> iter = other.entrySetIterator();
602         while (iter.hasNext()) {
603             final Map.Entry<Comparable<?>, Long> entry = iter.next();
604             incrementValue(entry.getKey(), entry.getValue().longValue());
605         }
606     }
607 
608     /**
609      * Merge a {@link Collection} of {@link Frequency} objects into this instance.
610      * This Frequency's counts will be incremented (or set when not already set)
611      * by the counts represented by each of the others.
612      *
613      * @param others the other {@link Frequency} objects to be merged
614      * @throws NullArgumentException if the collection is null
615      * @since 3.1
616      */
617     public void merge(final Collection<Frequency> others) throws NullArgumentException {
618         MathUtils.checkNotNull(others, LocalizedFormats.NULL_NOT_ALLOWED);
619 
620         for (final Frequency freq : others) {
621             merge(freq);
622         }
623     }
624 
625     //----------------------------------------------------------------------------------------------
626 
627     /**
628      * A Comparator that compares comparable objects using the
629      * natural order.  Copied from Commons Collections ComparableComparator.
630      */
631     private static class NaturalComparator<T extends Comparable<T>> implements Comparator<Comparable<T>>, Serializable {
632 
633         /** Serializable version identifier */
634         private static final long serialVersionUID = -3852193713161395148L;
635 
636         /**
637          * Compare the two {@link Comparable Comparable} arguments.
638          * This method is equivalent to:
639          * <pre>(({@link Comparable Comparable})o1).{@link Comparable#compareTo compareTo}(o2)</pre>
640          *
641          * @param  o1 the first object
642          * @param  o2 the second object
643          * @return  result of comparison
644          * @throws NullPointerException when <i>o1</i> is <code>null</code>,
645          *         or when <code>((Comparable)o1).compareTo(o2)</code> does
646          * @throws ClassCastException when <i>o1</i> is not a {@link Comparable Comparable},
647          *         or when <code>((Comparable)o1).compareTo(o2)</code> does
648          */
649         @SuppressWarnings("unchecked") // cast to (T) may throw ClassCastException, see Javadoc
650         public int compare(Comparable<T> o1, Comparable<T> o2) {
651             return o1.compareTo((T) o2);
652         }
653     }
654 
655     /** {@inheritDoc} */
656     @Override
657     public int hashCode() {
658         final int prime = 31;
659         int result = 1;
660         result = prime * result +
661                  ((freqTable == null) ? 0 : freqTable.hashCode());
662         return result;
663     }
664 
665     /** {@inheritDoc} */
666     @Override
667     public boolean equals(Object obj) {
668         if (this == obj) {
669             return true;
670         }
671         if (!(obj instanceof Frequency)) {
672             return false;
673         }
674         Frequency other = (Frequency) obj;
675         if (freqTable == null) {
676             if (other.freqTable != null) {
677                 return false;
678             }
679         } else if (!freqTable.equals(other.freqTable)) {
680             return false;
681         }
682         return true;
683     }
684 
685 }