View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.math.stat;
18  
19  import java.io.Serializable;
20  import java.text.NumberFormat;
21  import java.util.Iterator;
22  import java.util.Comparator;
23  import java.util.TreeMap;
24  
25  import org.apache.commons.math.exception.MathIllegalArgumentException;
26  import org.apache.commons.math.exception.util.LocalizedFormats;
27  
28  /**
29   * Maintains a frequency distribution.
30   * <p>
31   * Accepts int, long, char or Comparable values.  New values added must be
32   * comparable to those that have been added, otherwise the add method will
33   * throw an IllegalArgumentException.</p>
34   * <p>
35   * Integer values (int, long, Integer, Long) are not distinguished by type --
36   * i.e. <code>addValue(Long.valueOf(2)), addValue(2), addValue(2l)</code> all have
37   * the same effect (similarly for arguments to <code>getCount,</code> etc.).</p>
38   * <p>
39   * char values are converted by <code>addValue</code> to Character instances.
40   * As such, these values are not comparable to integral values, so attempts
41   * to combine integral types with chars in a frequency distribution will fail.
42   * </p>
43   * <p>
44   * The values are ordered using the default (natural order), unless a
45   * <code>Comparator</code> is supplied in the constructor.</p>
46   *
47   * @version $Id: Frequency.java 1178223 2011-10-02 19:03:49Z psteitz $
48   */
49  public class Frequency implements Serializable {
50  
51      /** Serializable version identifier */
52      private static final long serialVersionUID = -3845586908418844111L;
53  
54      /** underlying collection */
55      private final TreeMap<Comparable<?>, Long> freqTable;
56  
57      /**
58       * Default constructor.
59       */
60      public Frequency() {
61          freqTable = new TreeMap<Comparable<?>, Long>();
62      }
63  
64      /**
65       * Constructor allowing values Comparator to be specified.
66       *
67       * @param comparator Comparator used to order values
68       */
69      @SuppressWarnings("unchecked") // TODO is the cast OK?
70      public Frequency(Comparator<?> comparator) {
71          freqTable = new TreeMap<Comparable<?>, Long>((Comparator<? super Comparable<?>>) comparator);
72      }
73  
74      /**
75       * Return a string representation of this frequency
76       * distribution.
77       *
78       * @return a string representation.
79       */
80      @Override
81      public String toString() {
82          NumberFormat nf = NumberFormat.getPercentInstance();
83          StringBuilder outBuffer = new StringBuilder();
84          outBuffer.append("Value \t Freq. \t Pct. \t Cum Pct. \n");
85          Iterator<Comparable<?>> iter = freqTable.keySet().iterator();
86          while (iter.hasNext()) {
87              Comparable<?> value = iter.next();
88              outBuffer.append(value);
89              outBuffer.append('\t');
90              outBuffer.append(getCount(value));
91              outBuffer.append('\t');
92              outBuffer.append(nf.format(getPct(value)));
93              outBuffer.append('\t');
94              outBuffer.append(nf.format(getCumPct(value)));
95              outBuffer.append('\n');
96          }
97          return outBuffer.toString();
98      }
99  
100     /**
101      * Adds 1 to the frequency count for v.
102      * <p>
103      * If other objects have already been added to this Frequency, v must
104      * be comparable to those that have already been added.
105      * </p>
106      *
107      * @param v the value to add.
108      * @throws IllegalArgumentException if <code>v</code> is not comparable with previous entries
109      */
110     public void addValue(Comparable<?> v){
111         Comparable<?> obj = v;
112         if (v instanceof Integer) {
113            obj = Long.valueOf(((Integer) v).longValue());
114         }
115         try {
116             Long count = freqTable.get(obj);
117             if (count == null) {
118                 freqTable.put(obj, Long.valueOf(1));
119             } else {
120                 freqTable.put(obj, Long.valueOf(count.longValue() + 1));
121             }
122         } catch (ClassCastException ex) {
123             //TreeMap will throw ClassCastException if v is not comparable
124             throw new MathIllegalArgumentException(
125                   LocalizedFormats.INSTANCES_NOT_COMPARABLE_TO_EXISTING_VALUES,
126                   v.getClass().getName());
127         }
128     }
129 
130     /**
131      * Adds 1 to the frequency count for v.
132      *
133      * @param v the value to add.
134      */
135     public void addValue(int v) {
136         addValue(Long.valueOf(v));
137     }
138 
139     /**
140      * Adds 1 to the frequency count for v.
141      *
142      * @param v the value to add.
143      */
144     public void addValue(long v) {
145         addValue(Long.valueOf(v));
146     }
147 
148     /**
149      * Adds 1 to the frequency count for v.
150      *
151      * @param v the value to add.
152      */
153     public void addValue(char v) {
154         addValue(Character.valueOf(v));
155     }
156 
157     /** Clears the frequency table */
158     public void clear() {
159         freqTable.clear();
160     }
161 
162     /**
163      * Returns an Iterator over the set of values that have been added.
164      * <p>
165      * If added values are integral (i.e., integers, longs, Integers, or Longs),
166      * they are converted to Longs when they are added, so the objects returned
167      * by the Iterator will in this case be Longs.</p>
168      *
169      * @return values Iterator
170      */
171     public Iterator<Comparable<?>> valuesIterator() {
172         return freqTable.keySet().iterator();
173     }
174 
175     //-------------------------------------------------------------------------
176 
177     /**
178      * Returns the sum of all frequencies.
179      *
180      * @return the total frequency count.
181      */
182     public long getSumFreq() {
183         long result = 0;
184         Iterator<Long> iterator = freqTable.values().iterator();
185         while (iterator.hasNext())  {
186             result += iterator.next().longValue();
187         }
188         return result;
189     }
190 
191     /**
192      * Returns the number of values = v.
193      * Returns 0 if the value is not comparable.
194      *
195      * @param v the value to lookup.
196      * @return the frequency of v.
197      */
198     public long getCount(Comparable<?> v) {
199         if (v instanceof Integer) {
200             return getCount(((Integer) v).longValue());
201         }
202         long result = 0;
203         try {
204             Long count =  freqTable.get(v);
205             if (count != null) {
206                 result = count.longValue();
207             }
208         } catch (ClassCastException ex) {
209             // ignore and return 0 -- ClassCastException will be thrown if value is not comparable
210         }
211         return result;
212     }
213 
214     /**
215      * Returns the number of values = v.
216      *
217      * @param v the value to lookup.
218      * @return the frequency of v.
219      */
220     public long getCount(int v) {
221         return getCount(Long.valueOf(v));
222     }
223 
224     /**
225      * Returns the number of values = v.
226      *
227      * @param v the value to lookup.
228      * @return the frequency of v.
229      */
230     public long getCount(long v) {
231         return getCount(Long.valueOf(v));
232     }
233 
234     /**
235      * Returns the number of values = v.
236      *
237      * @param v the value to lookup.
238      * @return the frequency of v.
239      */
240     public long getCount(char v) {
241         return getCount(Character.valueOf(v));
242     }
243 
244     /**
245      * Returns the number of values in the frequency table.
246      *
247      * @return the number of unique values that have been added to the frequency table.
248      * @see #valuesIterator()
249      */
250     public int getUniqueCount(){
251         return freqTable.keySet().size();
252     }
253 
254     /**
255      * Returns the percentage of values that are equal to v
256      * (as a proportion between 0 and 1).
257      * <p>
258      * Returns <code>Double.NaN</code> if no values have been added.</p>
259      *
260      * @param v the value to lookup
261      * @return the proportion of values equal to v
262      */
263     public double getPct(Comparable<?> v) {
264         final long sumFreq = getSumFreq();
265         if (sumFreq == 0) {
266             return Double.NaN;
267         }
268         return (double) getCount(v) / (double) sumFreq;
269     }
270 
271     /**
272      * Returns the percentage of values that are equal to v
273      * (as a proportion between 0 and 1).
274      *
275      * @param v the value to lookup
276      * @return the proportion of values equal to v
277      */
278     public double getPct(int v) {
279         return getPct(Long.valueOf(v));
280     }
281 
282     /**
283      * Returns the percentage of values that are equal to v
284      * (as a proportion between 0 and 1).
285      *
286      * @param v the value to lookup
287      * @return the proportion of values equal to v
288      */
289     public double getPct(long v) {
290         return getPct(Long.valueOf(v));
291     }
292 
293     /**
294      * Returns the percentage of values that are equal to v
295      * (as a proportion between 0 and 1).
296      *
297      * @param v the value to lookup
298      * @return the proportion of values equal to v
299      */
300     public double getPct(char v) {
301         return getPct(Character.valueOf(v));
302     }
303 
304     //-----------------------------------------------------------------------------------------
305 
306     /**
307      * Returns the cumulative frequency of values less than or equal to v.
308      * <p>
309      * Returns 0 if v is not comparable to the values set.</p>
310      *
311      * @param v the value to lookup.
312      * @return the proportion of values equal to v
313      */
314     public long getCumFreq(Comparable<?> v) {
315         if (getSumFreq() == 0) {
316             return 0;
317         }
318         if (v instanceof Integer) {
319             return getCumFreq(((Integer) v).longValue());
320         }
321         @SuppressWarnings("unchecked") // OK, freqTable is Comparable<?>
322         Comparator<Comparable<?>> c = (Comparator<Comparable<?>>) freqTable.comparator();
323         if (c == null) {
324             c = new NaturalComparator();
325         }
326         long result = 0;
327 
328         try {
329             Long value = freqTable.get(v);
330             if (value != null) {
331                 result = value.longValue();
332             }
333         } catch (ClassCastException ex) {
334             return result;   // v is not comparable
335         }
336 
337         if (c.compare(v, freqTable.firstKey()) < 0) {
338             return 0;  // v is comparable, but less than first value
339         }
340 
341         if (c.compare(v, freqTable.lastKey()) >= 0) {
342             return getSumFreq();    // v is comparable, but greater than the last value
343         }
344 
345         Iterator<Comparable<?>> values = valuesIterator();
346         while (values.hasNext()) {
347             Comparable<?> nextValue = values.next();
348             if (c.compare(v, nextValue) > 0) {
349                 result += getCount(nextValue);
350             } else {
351                 return result;
352             }
353         }
354         return result;
355     }
356 
357      /**
358      * Returns the cumulative frequency of values less than or equal to v.
359      * <p>
360      * Returns 0 if v is not comparable to the values set.</p>
361      *
362      * @param v the value to lookup
363      * @return the proportion of values equal to v
364      */
365     public long getCumFreq(int v) {
366         return getCumFreq(Long.valueOf(v));
367     }
368 
369      /**
370      * Returns the cumulative frequency of values less than or equal to v.
371      * <p>
372      * Returns 0 if v is not comparable to the values set.</p>
373      *
374      * @param v the value to lookup
375      * @return the proportion of values equal to v
376      */
377     public long getCumFreq(long v) {
378         return getCumFreq(Long.valueOf(v));
379     }
380 
381     /**
382      * Returns the cumulative frequency of values less than or equal to v.
383      * <p>
384      * Returns 0 if v is not comparable to the values set.</p>
385      *
386      * @param v the value to lookup
387      * @return the proportion of values equal to v
388      */
389     public long getCumFreq(char v) {
390         return getCumFreq(Character.valueOf(v));
391     }
392 
393     //----------------------------------------------------------------------------------------------
394 
395     /**
396      * Returns the cumulative percentage of values less than or equal to v
397      * (as a proportion between 0 and 1).
398      * <p>
399      * Returns <code>Double.NaN</code> if no values have been added.
400      * Returns 0 if at least one value has been added, but v is not comparable
401      * to the values set.</p>
402      *
403      * @param v the value to lookup
404      * @return the proportion of values less than or equal to v
405      */
406     public double getCumPct(Comparable<?> v) {
407         final long sumFreq = getSumFreq();
408         if (sumFreq == 0) {
409             return Double.NaN;
410         }
411         return (double) getCumFreq(v) / (double) sumFreq;
412     }
413 
414     /**
415      * Returns the cumulative percentage of values less than or equal to v
416      * (as a proportion between 0 and 1).
417      * <p>
418      * Returns 0 if v is not comparable to the values set.</p>
419      *
420      * @param v the value to lookup
421      * @return the proportion of values less than or equal to v
422      */
423     public double getCumPct(int v) {
424         return getCumPct(Long.valueOf(v));
425     }
426 
427     /**
428      * Returns the cumulative percentage of values less than or equal to v
429      * (as a proportion between 0 and 1).
430      * <p>
431      * Returns 0 if v is not comparable to the values set.</p>
432      *
433      * @param v the value to lookup
434      * @return the proportion of values less than or equal to v
435      */
436     public double getCumPct(long v) {
437         return getCumPct(Long.valueOf(v));
438     }
439 
440     /**
441      * Returns the cumulative percentage of values less than or equal to v
442      * (as a proportion between 0 and 1).
443      * <p>
444      * Returns 0 if v is not comparable to the values set.</p>
445      *
446      * @param v the value to lookup
447      * @return the proportion of values less than or equal to v
448      */
449     public double getCumPct(char v) {
450         return getCumPct(Character.valueOf(v));
451     }
452 
453     /**
454      * A Comparator that compares comparable objects using the
455      * natural order.  Copied from Commons Collections ComparableComparator.
456      */
457     private static class NaturalComparator<T extends Comparable<T>> implements Comparator<Comparable<T>>, Serializable {
458 
459         /** Serializable version identifier */
460         private static final long serialVersionUID = -3852193713161395148L;
461 
462         /**
463          * Compare the two {@link Comparable Comparable} arguments.
464          * This method is equivalent to:
465          * <pre>(({@link Comparable Comparable})o1).{@link Comparable#compareTo compareTo}(o2)</pre>
466          *
467          * @param  o1 the first object
468          * @param  o2 the second object
469          * @return  result of comparison
470          * @throws NullPointerException when <i>o1</i> is <code>null</code>,
471          *         or when <code>((Comparable)o1).compareTo(o2)</code> does
472          * @throws ClassCastException when <i>o1</i> is not a {@link Comparable Comparable},
473          *         or when <code>((Comparable)o1).compareTo(o2)</code> does
474          */
475         @SuppressWarnings("unchecked") // cast to (T) may throw ClassCastException, see Javadoc
476         public int compare(Comparable<T> o1, Comparable<T> o2) {
477             return o1.compareTo((T) o2);
478         }
479     }
480 
481     /** {@inheritDoc} */
482     @Override
483     public int hashCode() {
484         final int prime = 31;
485         int result = 1;
486         result = prime * result +
487                  ((freqTable == null) ? 0 : freqTable.hashCode());
488         return result;
489     }
490 
491     /** {@inheritDoc} */
492     @Override
493     public boolean equals(Object obj) {
494         if (this == obj) {
495             return true;
496         }
497         if (!(obj instanceof Frequency)) {
498             return false;
499         }
500         Frequency other = (Frequency) obj;
501         if (freqTable == null) {
502             if (other.freqTable != null) {
503                 return false;
504             }
505         } else if (!freqTable.equals(other.freqTable)) {
506             return false;
507         }
508         return true;
509     }
510 
511 }