View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.math3.stat;
18  
19  import java.io.Serializable;
20  import java.text.NumberFormat;
21  import java.util.ArrayList;
22  import java.util.Collection;
23  import java.util.Iterator;
24  import java.util.Comparator;
25  import java.util.List;
26  import java.util.Map;
27  import java.util.Map.Entry;
28  import java.util.TreeMap;
29  
30  import org.apache.commons.math3.exception.MathIllegalArgumentException;
31  import org.apache.commons.math3.exception.NullArgumentException;
32  import org.apache.commons.math3.exception.util.LocalizedFormats;
33  import org.apache.commons.math3.util.MathUtils;
34  
35  /**
36   * Maintains a frequency distribution.
37   * <p>
38   * Accepts int, long, char or Comparable values.  New values added must be
39   * comparable to those that have been added, otherwise the add method will
40   * throw an IllegalArgumentException.</p>
41   * <p>
42   * Integer values (int, long, Integer, Long) are not distinguished by type --
43   * i.e. <code>addValue(Long.valueOf(2)), addValue(2), addValue(2l)</code> all have
44   * the same effect (similarly for arguments to <code>getCount,</code> etc.).</p>
45   * <p>NOTE: byte and short values will be implicitly converted to int values
46   * by the compiler, thus there are no explicit overloaded methods for these
47   * primitive types.</p>
48   * <p>
49   * char values are converted by <code>addValue</code> to Character instances.
50   * As such, these values are not comparable to integral values, so attempts
51   * to combine integral types with chars in a frequency distribution will fail.
52   * </p>
53   * <p>
54   * Float is not coerced to Double.
55   * Since they are not Comparable with each other the user must do any necessary coercion.
56   * Float.NaN and Double.NaN are not treated specially; they may occur in input and will
57   * occur in output if appropriate.
58   * </b>
59   * <p>
60   * The values are ordered using the default (natural order), unless a
61   * <code>Comparator</code> is supplied in the constructor.</p>
62   *
63   */
64  public class Frequency implements Serializable {
65  
66      /** Serializable version identifier */
67      private static final long serialVersionUID = -3845586908418844111L;
68  
69      /** underlying collection */
70      private final TreeMap<Comparable<?>, Long> freqTable;
71  
72      /**
73       * Default constructor.
74       */
75      public Frequency() {
76          freqTable = new TreeMap<Comparable<?>, Long>();
77      }
78  
79      /**
80       * Constructor allowing values Comparator to be specified.
81       *
82       * @param comparator Comparator used to order values
83       */
84      @SuppressWarnings("unchecked") // TODO is the cast OK?
85      public Frequency(Comparator<?> comparator) {
86          freqTable = new TreeMap<Comparable<?>, Long>((Comparator<? super Comparable<?>>) comparator);
87      }
88  
89      /**
90       * Return a string representation of this frequency distribution.
91       *
92       * @return a string representation.
93       */
94      @Override
95      public String toString() {
96          NumberFormat nf = NumberFormat.getPercentInstance();
97          StringBuilder outBuffer = new StringBuilder();
98          outBuffer.append("Value \t Freq. \t Pct. \t Cum Pct. \n");
99          Iterator<Comparable<?>> iter = freqTable.keySet().iterator();
100         while (iter.hasNext()) {
101             Comparable<?> value = iter.next();
102             outBuffer.append(value);
103             outBuffer.append('\t');
104             outBuffer.append(getCount(value));
105             outBuffer.append('\t');
106             outBuffer.append(nf.format(getPct(value)));
107             outBuffer.append('\t');
108             outBuffer.append(nf.format(getCumPct(value)));
109             outBuffer.append('\n');
110         }
111         return outBuffer.toString();
112     }
113 
114     /**
115      * Adds 1 to the frequency count for v.
116      * <p>
117      * If other objects have already been added to this Frequency, v must
118      * be comparable to those that have already been added.
119      * </p>
120      *
121      * @param v the value to add.
122      * @throws MathIllegalArgumentException if <code>v</code> is not comparable with previous entries
123      */
124     public void addValue(Comparable<?> v) throws MathIllegalArgumentException {
125         incrementValue(v, 1);
126     }
127 
128     /**
129      * Adds 1 to the frequency count for v.
130      *
131      * @param v the value to add.
132      * @throws MathIllegalArgumentException if the table contains entries not
133      * comparable to Long
134      */
135     public void addValue(int v) throws MathIllegalArgumentException {
136         addValue(Long.valueOf(v));
137     }
138 
139     /**
140      * Adds 1 to the frequency count for v.
141      *
142      * @param v the value to add.
143      * @throws MathIllegalArgumentException if the table contains entries not
144      * comparable to Long
145      */
146     public void addValue(long v) throws MathIllegalArgumentException {
147         addValue(Long.valueOf(v));
148     }
149 
150     /**
151      * Adds 1 to the frequency count for v.
152      *
153      * @param v the value to add.
154      * @throws MathIllegalArgumentException if the table contains entries not
155      * comparable to Char
156      */
157     public void addValue(char v) throws MathIllegalArgumentException {
158         addValue(Character.valueOf(v));
159     }
160 
161     /**
162      * Increments the frequency count for v.
163      * <p>
164      * If other objects have already been added to this Frequency, v must
165      * be comparable to those that have already been added.
166      * </p>
167      *
168      * @param v the value to add.
169      * @param increment the amount by which the value should be incremented
170      * @throws MathIllegalArgumentException if <code>v</code> is not comparable with previous entries
171      * @since 3.1
172      */
173     public void incrementValue(Comparable<?> v, long increment) throws MathIllegalArgumentException {
174         Comparable<?> obj = v;
175         if (v instanceof Integer) {
176             obj = Long.valueOf(((Integer) v).longValue());
177         }
178         try {
179             Long count = freqTable.get(obj);
180             if (count == null) {
181                 freqTable.put(obj, Long.valueOf(increment));
182             } else {
183                 freqTable.put(obj, Long.valueOf(count.longValue() + increment));
184             }
185         } catch (ClassCastException ex) {
186             //TreeMap will throw ClassCastException if v is not comparable
187             throw new MathIllegalArgumentException(
188                   LocalizedFormats.INSTANCES_NOT_COMPARABLE_TO_EXISTING_VALUES,
189                   v.getClass().getName());
190         }
191     }
192 
193     /**
194      * Increments the frequency count for v.
195      * <p>
196      * If other objects have already been added to this Frequency, v must
197      * be comparable to those that have already been added.
198      * </p>
199      *
200      * @param v the value to add.
201      * @param increment the amount by which the value should be incremented
202      * @throws MathIllegalArgumentException if the table contains entries not
203      * comparable to Long
204      * @since 3.3
205      */
206     public void incrementValue(int v, long increment) throws MathIllegalArgumentException {
207         incrementValue(Long.valueOf(v), increment);
208     }
209 
210     /**
211      * Increments the frequency count for v.
212      * <p>
213      * If other objects have already been added to this Frequency, v must
214      * be comparable to those that have already been added.
215      * </p>
216      *
217      * @param v the value to add.
218      * @param increment the amount by which the value should be incremented
219      * @throws MathIllegalArgumentException if the table contains entries not
220      * comparable to Long
221      * @since 3.3
222      */
223     public void incrementValue(long v, long increment) throws MathIllegalArgumentException {
224         incrementValue(Long.valueOf(v), increment);
225     }
226 
227     /**
228      * Increments the frequency count for v.
229      * <p>
230      * If other objects have already been added to this Frequency, v must
231      * be comparable to those that have already been added.
232      * </p>
233      *
234      * @param v the value to add.
235      * @param increment the amount by which the value should be incremented
236      * @throws MathIllegalArgumentException if the table contains entries not
237      * comparable to Char
238      * @since 3.3
239      */
240     public void incrementValue(char v, long increment) throws MathIllegalArgumentException {
241         incrementValue(Character.valueOf(v), increment);
242     }
243 
244     /** Clears the frequency table */
245     public void clear() {
246         freqTable.clear();
247     }
248 
249     /**
250      * Returns an Iterator over the set of values that have been added.
251      * <p>
252      * If added values are integral (i.e., integers, longs, Integers, or Longs),
253      * they are converted to Longs when they are added, so the objects returned
254      * by the Iterator will in this case be Longs.</p>
255      *
256      * @return values Iterator
257      */
258     public Iterator<Comparable<?>> valuesIterator() {
259         return freqTable.keySet().iterator();
260     }
261 
262     /**
263      * Return an Iterator over the set of keys and values that have been added.
264      * Using the entry set to iterate is more efficient in the case where you
265      * need to access respective counts as well as values, since it doesn't
266      * require a "get" for every key...the value is provided in the Map.Entry.
267      * <p>
268      * If added values are integral (i.e., integers, longs, Integers, or Longs),
269      * they are converted to Longs when they are added, so the values of the
270      * map entries returned by the Iterator will in this case be Longs.</p>
271      *
272      * @return entry set Iterator
273      * @since 3.1
274      */
275     public Iterator<Map.Entry<Comparable<?>, Long>> entrySetIterator() {
276         return freqTable.entrySet().iterator();
277     }
278 
279     //-------------------------------------------------------------------------
280 
281     /**
282      * Returns the sum of all frequencies.
283      *
284      * @return the total frequency count.
285      */
286     public long getSumFreq() {
287         long result = 0;
288         Iterator<Long> iterator = freqTable.values().iterator();
289         while (iterator.hasNext())  {
290             result += iterator.next().longValue();
291         }
292         return result;
293     }
294 
295     /**
296      * Returns the number of values equal to v.
297      * Returns 0 if the value is not comparable.
298      *
299      * @param v the value to lookup.
300      * @return the frequency of v.
301      */
302     public long getCount(Comparable<?> v) {
303         if (v instanceof Integer) {
304             return getCount(((Integer) v).longValue());
305         }
306         long result = 0;
307         try {
308             Long count =  freqTable.get(v);
309             if (count != null) {
310                 result = count.longValue();
311             }
312         } catch (ClassCastException ex) { // NOPMD
313             // ignore and return 0 -- ClassCastException will be thrown if value is not comparable
314         }
315         return result;
316     }
317 
318     /**
319      * Returns the number of values equal to v.
320      *
321      * @param v the value to lookup.
322      * @return the frequency of v.
323      */
324     public long getCount(int v) {
325         return getCount(Long.valueOf(v));
326     }
327 
328     /**
329      * Returns the number of values equal to v.
330      *
331      * @param v the value to lookup.
332      * @return the frequency of v.
333      */
334     public long getCount(long v) {
335         return getCount(Long.valueOf(v));
336     }
337 
338     /**
339      * Returns the number of values equal to v.
340      *
341      * @param v the value to lookup.
342      * @return the frequency of v.
343      */
344     public long getCount(char v) {
345         return getCount(Character.valueOf(v));
346     }
347 
348     /**
349      * Returns the number of values in the frequency table.
350      *
351      * @return the number of unique values that have been added to the frequency table.
352      * @see #valuesIterator()
353      */
354     public int getUniqueCount(){
355         return freqTable.keySet().size();
356     }
357 
358     /**
359      * Returns the percentage of values that are equal to v
360      * (as a proportion between 0 and 1).
361      * <p>
362      * Returns <code>Double.NaN</code> if no values have been added.
363      * Returns 0 if at least one value has been added, but v is not comparable
364      * to the values set.</p>
365      *
366      * @param v the value to lookup
367      * @return the proportion of values equal to v
368      */
369     public double getPct(Comparable<?> v) {
370         final long sumFreq = getSumFreq();
371         if (sumFreq == 0) {
372             return Double.NaN;
373         }
374         return (double) getCount(v) / (double) sumFreq;
375     }
376 
377     /**
378      * Returns the percentage of values that are equal to v
379      * (as a proportion between 0 and 1).
380      *
381      * @param v the value to lookup
382      * @return the proportion of values equal to v
383      */
384     public double getPct(int v) {
385         return getPct(Long.valueOf(v));
386     }
387 
388     /**
389      * Returns the percentage of values that are equal to v
390      * (as a proportion between 0 and 1).
391      *
392      * @param v the value to lookup
393      * @return the proportion of values equal to v
394      */
395     public double getPct(long v) {
396         return getPct(Long.valueOf(v));
397     }
398 
399     /**
400      * Returns the percentage of values that are equal to v
401      * (as a proportion between 0 and 1).
402      *
403      * @param v the value to lookup
404      * @return the proportion of values equal to v
405      */
406     public double getPct(char v) {
407         return getPct(Character.valueOf(v));
408     }
409 
410     //-----------------------------------------------------------------------------------------
411 
412     /**
413      * Returns the cumulative frequency of values less than or equal to v.
414      * <p>
415      * Returns 0 if v is not comparable to the values set.</p>
416      *
417      * @param v the value to lookup.
418      * @return the proportion of values equal to v
419      */
420     @SuppressWarnings({ "rawtypes", "unchecked" })
421     public long getCumFreq(Comparable<?> v) {
422         if (getSumFreq() == 0) {
423             return 0;
424         }
425         if (v instanceof Integer) {
426             return getCumFreq(((Integer) v).longValue());
427         }
428         Comparator<Comparable<?>> c = (Comparator<Comparable<?>>) freqTable.comparator();
429         if (c == null) {
430             c = new NaturalComparator();
431         }
432         long result = 0;
433 
434         try {
435             Long value = freqTable.get(v);
436             if (value != null) {
437                 result = value.longValue();
438             }
439         } catch (ClassCastException ex) {
440             return result;   // v is not comparable
441         }
442 
443         if (c.compare(v, freqTable.firstKey()) < 0) {
444             return 0;  // v is comparable, but less than first value
445         }
446 
447         if (c.compare(v, freqTable.lastKey()) >= 0) {
448             return getSumFreq();    // v is comparable, but greater than the last value
449         }
450 
451         Iterator<Comparable<?>> values = valuesIterator();
452         while (values.hasNext()) {
453             Comparable<?> nextValue = values.next();
454             if (c.compare(v, nextValue) > 0) {
455                 result += getCount(nextValue);
456             } else {
457                 return result;
458             }
459         }
460         return result;
461     }
462 
463      /**
464      * Returns the cumulative frequency of values less than or equal to v.
465      * <p>
466      * Returns 0 if v is not comparable to the values set.</p>
467      *
468      * @param v the value to lookup
469      * @return the proportion of values equal to v
470      */
471     public long getCumFreq(int v) {
472         return getCumFreq(Long.valueOf(v));
473     }
474 
475      /**
476      * Returns the cumulative frequency of values less than or equal to v.
477      * <p>
478      * Returns 0 if v is not comparable to the values set.</p>
479      *
480      * @param v the value to lookup
481      * @return the proportion of values equal to v
482      */
483     public long getCumFreq(long v) {
484         return getCumFreq(Long.valueOf(v));
485     }
486 
487     /**
488      * Returns the cumulative frequency of values less than or equal to v.
489      * <p>
490      * Returns 0 if v is not comparable to the values set.</p>
491      *
492      * @param v the value to lookup
493      * @return the proportion of values equal to v
494      */
495     public long getCumFreq(char v) {
496         return getCumFreq(Character.valueOf(v));
497     }
498 
499     //----------------------------------------------------------------------------------------------
500 
501     /**
502      * Returns the cumulative percentage of values less than or equal to v
503      * (as a proportion between 0 and 1).
504      * <p>
505      * Returns <code>Double.NaN</code> if no values have been added.
506      * Returns 0 if at least one value has been added, but v is not comparable
507      * to the values set.</p>
508      *
509      * @param v the value to lookup
510      * @return the proportion of values less than or equal to v
511      */
512     public double getCumPct(Comparable<?> v) {
513         final long sumFreq = getSumFreq();
514         if (sumFreq == 0) {
515             return Double.NaN;
516         }
517         return (double) getCumFreq(v) / (double) sumFreq;
518     }
519 
520     /**
521      * Returns the cumulative percentage of values less than or equal to v
522      * (as a proportion between 0 and 1).
523      * <p>
524      * Returns 0 if v is not comparable to the values set.</p>
525      *
526      * @param v the value to lookup
527      * @return the proportion of values less than or equal to v
528      */
529     public double getCumPct(int v) {
530         return getCumPct(Long.valueOf(v));
531     }
532 
533     /**
534      * Returns the cumulative percentage of values less than or equal to v
535      * (as a proportion between 0 and 1).
536      * <p>
537      * Returns 0 if v is not comparable to the values set.</p>
538      *
539      * @param v the value to lookup
540      * @return the proportion of values less than or equal to v
541      */
542     public double getCumPct(long v) {
543         return getCumPct(Long.valueOf(v));
544     }
545 
546     /**
547      * Returns the cumulative percentage of values less than or equal to v
548      * (as a proportion between 0 and 1).
549      * <p>
550      * Returns 0 if v is not comparable to the values set.</p>
551      *
552      * @param v the value to lookup
553      * @return the proportion of values less than or equal to v
554      */
555     public double getCumPct(char v) {
556         return getCumPct(Character.valueOf(v));
557     }
558 
559     /**
560      * Returns the mode value(s) in comparator order.
561      *
562      * @return a list containing the value(s) which appear most often.
563      * @since 3.3
564      */
565     public List<Comparable<?>> getMode() {
566         long mostPopular = 0; // frequencies are always positive
567 
568         // Get the max count first, so we avoid having to recreate the List each time
569         for(Long l : freqTable.values()) {
570             long frequency = l.longValue();
571             if (frequency > mostPopular) {
572                 mostPopular = frequency;
573             }
574         }
575 
576         List<Comparable<?>> modeList = new ArrayList<Comparable<?>>();
577         for (Entry<Comparable<?>, Long> ent : freqTable.entrySet()) {
578             long frequency = ent.getValue().longValue();
579             if (frequency == mostPopular) {
580                modeList.add(ent.getKey());
581             }
582         }
583         return modeList;
584     }
585 
586     //----------------------------------------------------------------------------------------------
587 
588     /**
589      * Merge another Frequency object's counts into this instance.
590      * This Frequency's counts will be incremented (or set when not already set)
591      * by the counts represented by other.
592      *
593      * @param other the other {@link Frequency} object to be merged
594      * @throws NullArgumentException if {@code other} is null
595      * @since 3.1
596      */
597     public void merge(final Frequency other) throws NullArgumentException {
598         MathUtils.checkNotNull(other, LocalizedFormats.NULL_NOT_ALLOWED);
599 
600         final Iterator<Map.Entry<Comparable<?>, Long>> iter = other.entrySetIterator();
601         while (iter.hasNext()) {
602             final Map.Entry<Comparable<?>, Long> entry = iter.next();
603             incrementValue(entry.getKey(), entry.getValue().longValue());
604         }
605     }
606 
607     /**
608      * Merge a {@link Collection} of {@link Frequency} objects into this instance.
609      * This Frequency's counts will be incremented (or set when not already set)
610      * by the counts represented by each of the others.
611      *
612      * @param others the other {@link Frequency} objects to be merged
613      * @throws NullArgumentException if the collection is null
614      * @since 3.1
615      */
616     public void merge(final Collection<Frequency> others) throws NullArgumentException {
617         MathUtils.checkNotNull(others, LocalizedFormats.NULL_NOT_ALLOWED);
618 
619         for (final Frequency freq : others) {
620             merge(freq);
621         }
622     }
623 
624     //----------------------------------------------------------------------------------------------
625 
626     /**
627      * A Comparator that compares comparable objects using the
628      * natural order.  Copied from Commons Collections ComparableComparator.
629      */
630     private static class NaturalComparator<T extends Comparable<T>> implements Comparator<Comparable<T>>, Serializable {
631 
632         /** Serializable version identifier */
633         private static final long serialVersionUID = -3852193713161395148L;
634 
635         /**
636          * Compare the two {@link Comparable Comparable} arguments.
637          * This method is equivalent to:
638          * <pre>(({@link Comparable Comparable})o1).{@link Comparable#compareTo compareTo}(o2)</pre>
639          *
640          * @param  o1 the first object
641          * @param  o2 the second object
642          * @return  result of comparison
643          * @throws NullPointerException when <i>o1</i> is <code>null</code>,
644          *         or when <code>((Comparable)o1).compareTo(o2)</code> does
645          * @throws ClassCastException when <i>o1</i> is not a {@link Comparable Comparable},
646          *         or when <code>((Comparable)o1).compareTo(o2)</code> does
647          */
648         @SuppressWarnings("unchecked") // cast to (T) may throw ClassCastException, see Javadoc
649         public int compare(Comparable<T> o1, Comparable<T> o2) {
650             return o1.compareTo((T) o2);
651         }
652     }
653 
654     /** {@inheritDoc} */
655     @Override
656     public int hashCode() {
657         final int prime = 31;
658         int result = 1;
659         result = prime * result +
660                  ((freqTable == null) ? 0 : freqTable.hashCode());
661         return result;
662     }
663 
664     /** {@inheritDoc} */
665     @Override
666     public boolean equals(Object obj) {
667         if (this == obj) {
668             return true;
669         }
670         if (!(obj instanceof Frequency)) {
671             return false;
672         }
673         Frequency other = (Frequency) obj;
674         if (freqTable == null) {
675             if (other.freqTable != null) {
676                 return false;
677             }
678         } else if (!freqTable.equals(other.freqTable)) {
679             return false;
680         }
681         return true;
682     }
683 
684 }