001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.math3.stat;
018
019import java.io.Serializable;
020import java.text.NumberFormat;
021import java.util.ArrayList;
022import java.util.Collection;
023import java.util.Iterator;
024import java.util.Comparator;
025import java.util.List;
026import java.util.Map;
027import java.util.Map.Entry;
028import java.util.SortedMap;
029import java.util.TreeMap;
030
031import org.apache.commons.math3.exception.MathIllegalArgumentException;
032import org.apache.commons.math3.exception.NullArgumentException;
033import org.apache.commons.math3.exception.util.LocalizedFormats;
034import org.apache.commons.math3.util.MathUtils;
035
036/**
037 * Maintains a frequency distribution.
038 * <p>
039 * Accepts int, long, char or Comparable values.  New values added must be
040 * comparable to those that have been added, otherwise the add method will
041 * throw an IllegalArgumentException.</p>
042 * <p>
043 * Integer values (int, long, Integer, Long) are not distinguished by type --
044 * i.e. <code>addValue(Long.valueOf(2)), addValue(2), addValue(2l)</code> all have
045 * the same effect (similarly for arguments to <code>getCount,</code> etc.).</p>
046 * <p>NOTE: byte and short values will be implicitly converted to int values
047 * by the compiler, thus there are no explicit overloaded methods for these
048 * primitive types.</p>
049 * <p>
050 * char values are converted by <code>addValue</code> to Character instances.
051 * As such, these values are not comparable to integral values, so attempts
052 * to combine integral types with chars in a frequency distribution will fail.
053 * </p>
054 * <p>
055 * Float is not coerced to Double.
056 * Since they are not Comparable with each other the user must do any necessary coercion.
057 * Float.NaN and Double.NaN are not treated specially; they may occur in input and will
058 * occur in output if appropriate.
059 * </b>
060 * <p>
061 * The values are ordered using the default (natural order), unless a
062 * <code>Comparator</code> is supplied in the constructor.</p>
063 *
064 */
065public class Frequency implements Serializable {
066
067    /** Serializable version identifier */
068    private static final long serialVersionUID = -3845586908418844111L;
069
070    /** underlying collection */
071    private final SortedMap<Comparable<?>, Long> freqTable;
072
073    /**
074     * Default constructor.
075     */
076    public Frequency() {
077        freqTable = new TreeMap<Comparable<?>, Long>();
078    }
079
080    /**
081     * Constructor allowing values Comparator to be specified.
082     *
083     * @param comparator Comparator used to order values
084     */
085    @SuppressWarnings("unchecked") // TODO is the cast OK?
086    public Frequency(Comparator<?> comparator) {
087        freqTable = new TreeMap<Comparable<?>, Long>((Comparator<? super Comparable<?>>) comparator);
088    }
089
090    /**
091     * Return a string representation of this frequency distribution.
092     *
093     * @return a string representation.
094     */
095    @Override
096    public String toString() {
097        NumberFormat nf = NumberFormat.getPercentInstance();
098        StringBuilder outBuffer = new StringBuilder();
099        outBuffer.append("Value \t Freq. \t Pct. \t Cum Pct. \n");
100        Iterator<Comparable<?>> iter = freqTable.keySet().iterator();
101        while (iter.hasNext()) {
102            Comparable<?> value = iter.next();
103            outBuffer.append(value);
104            outBuffer.append('\t');
105            outBuffer.append(getCount(value));
106            outBuffer.append('\t');
107            outBuffer.append(nf.format(getPct(value)));
108            outBuffer.append('\t');
109            outBuffer.append(nf.format(getCumPct(value)));
110            outBuffer.append('\n');
111        }
112        return outBuffer.toString();
113    }
114
115    /**
116     * Adds 1 to the frequency count for v.
117     * <p>
118     * If other objects have already been added to this Frequency, v must
119     * be comparable to those that have already been added.
120     * </p>
121     *
122     * @param v the value to add.
123     * @throws MathIllegalArgumentException if <code>v</code> is not comparable with previous entries
124     */
125    public void addValue(Comparable<?> v) throws MathIllegalArgumentException {
126        incrementValue(v, 1);
127    }
128
129    /**
130     * Adds 1 to the frequency count for v.
131     *
132     * @param v the value to add.
133     * @throws MathIllegalArgumentException if the table contains entries not
134     * comparable to Long
135     */
136    public void addValue(int v) throws MathIllegalArgumentException {
137        addValue(Long.valueOf(v));
138    }
139
140    /**
141     * Adds 1 to the frequency count for v.
142     *
143     * @param v the value to add.
144     * @throws MathIllegalArgumentException if the table contains entries not
145     * comparable to Long
146     */
147    public void addValue(long v) throws MathIllegalArgumentException {
148        addValue(Long.valueOf(v));
149    }
150
151    /**
152     * Adds 1 to the frequency count for v.
153     *
154     * @param v the value to add.
155     * @throws MathIllegalArgumentException if the table contains entries not
156     * comparable to Char
157     */
158    public void addValue(char v) throws MathIllegalArgumentException {
159        addValue(Character.valueOf(v));
160    }
161
162    /**
163     * Increments the frequency count for v.
164     * <p>
165     * If other objects have already been added to this Frequency, v must
166     * be comparable to those that have already been added.
167     * </p>
168     *
169     * @param v the value to add.
170     * @param increment the amount by which the value should be incremented
171     * @throws MathIllegalArgumentException if <code>v</code> is not comparable with previous entries
172     * @since 3.1
173     */
174    public void incrementValue(Comparable<?> v, long increment) throws MathIllegalArgumentException {
175        Comparable<?> obj = v;
176        if (v instanceof Integer) {
177            obj = Long.valueOf(((Integer) v).longValue());
178        }
179        try {
180            Long count = freqTable.get(obj);
181            if (count == null) {
182                freqTable.put(obj, Long.valueOf(increment));
183            } else {
184                freqTable.put(obj, Long.valueOf(count.longValue() + increment));
185            }
186        } catch (ClassCastException ex) {
187            //TreeMap will throw ClassCastException if v is not comparable
188            throw new MathIllegalArgumentException(
189                  LocalizedFormats.INSTANCES_NOT_COMPARABLE_TO_EXISTING_VALUES,
190                  v.getClass().getName());
191        }
192    }
193
194    /**
195     * Increments the frequency count for v.
196     * <p>
197     * If other objects have already been added to this Frequency, v must
198     * be comparable to those that have already been added.
199     * </p>
200     *
201     * @param v the value to add.
202     * @param increment the amount by which the value should be incremented
203     * @throws MathIllegalArgumentException if the table contains entries not
204     * comparable to Long
205     * @since 3.3
206     */
207    public void incrementValue(int v, long increment) throws MathIllegalArgumentException {
208        incrementValue(Long.valueOf(v), increment);
209    }
210
211    /**
212     * Increments the frequency count for v.
213     * <p>
214     * If other objects have already been added to this Frequency, v must
215     * be comparable to those that have already been added.
216     * </p>
217     *
218     * @param v the value to add.
219     * @param increment the amount by which the value should be incremented
220     * @throws MathIllegalArgumentException if the table contains entries not
221     * comparable to Long
222     * @since 3.3
223     */
224    public void incrementValue(long v, long increment) throws MathIllegalArgumentException {
225        incrementValue(Long.valueOf(v), increment);
226    }
227
228    /**
229     * Increments the frequency count for v.
230     * <p>
231     * If other objects have already been added to this Frequency, v must
232     * be comparable to those that have already been added.
233     * </p>
234     *
235     * @param v the value to add.
236     * @param increment the amount by which the value should be incremented
237     * @throws MathIllegalArgumentException if the table contains entries not
238     * comparable to Char
239     * @since 3.3
240     */
241    public void incrementValue(char v, long increment) throws MathIllegalArgumentException {
242        incrementValue(Character.valueOf(v), increment);
243    }
244
245    /** Clears the frequency table */
246    public void clear() {
247        freqTable.clear();
248    }
249
250    /**
251     * Returns an Iterator over the set of values that have been added.
252     * <p>
253     * If added values are integral (i.e., integers, longs, Integers, or Longs),
254     * they are converted to Longs when they are added, so the objects returned
255     * by the Iterator will in this case be Longs.</p>
256     *
257     * @return values Iterator
258     */
259    public Iterator<Comparable<?>> valuesIterator() {
260        return freqTable.keySet().iterator();
261    }
262
263    /**
264     * Return an Iterator over the set of keys and values that have been added.
265     * Using the entry set to iterate is more efficient in the case where you
266     * need to access respective counts as well as values, since it doesn't
267     * require a "get" for every key...the value is provided in the Map.Entry.
268     * <p>
269     * If added values are integral (i.e., integers, longs, Integers, or Longs),
270     * they are converted to Longs when they are added, so the values of the
271     * map entries returned by the Iterator will in this case be Longs.</p>
272     *
273     * @return entry set Iterator
274     * @since 3.1
275     */
276    public Iterator<Map.Entry<Comparable<?>, Long>> entrySetIterator() {
277        return freqTable.entrySet().iterator();
278    }
279
280    //-------------------------------------------------------------------------
281
282    /**
283     * Returns the sum of all frequencies.
284     *
285     * @return the total frequency count.
286     */
287    public long getSumFreq() {
288        long result = 0;
289        Iterator<Long> iterator = freqTable.values().iterator();
290        while (iterator.hasNext())  {
291            result += iterator.next().longValue();
292        }
293        return result;
294    }
295
296    /**
297     * Returns the number of values equal to v.
298     * Returns 0 if the value is not comparable.
299     *
300     * @param v the value to lookup.
301     * @return the frequency of v.
302     */
303    public long getCount(Comparable<?> v) {
304        if (v instanceof Integer) {
305            return getCount(((Integer) v).longValue());
306        }
307        long result = 0;
308        try {
309            Long count =  freqTable.get(v);
310            if (count != null) {
311                result = count.longValue();
312            }
313        } catch (ClassCastException ex) { // NOPMD
314            // ignore and return 0 -- ClassCastException will be thrown if value is not comparable
315        }
316        return result;
317    }
318
319    /**
320     * Returns the number of values equal to v.
321     *
322     * @param v the value to lookup.
323     * @return the frequency of v.
324     */
325    public long getCount(int v) {
326        return getCount(Long.valueOf(v));
327    }
328
329    /**
330     * Returns the number of values equal to v.
331     *
332     * @param v the value to lookup.
333     * @return the frequency of v.
334     */
335    public long getCount(long v) {
336        return getCount(Long.valueOf(v));
337    }
338
339    /**
340     * Returns the number of values equal to v.
341     *
342     * @param v the value to lookup.
343     * @return the frequency of v.
344     */
345    public long getCount(char v) {
346        return getCount(Character.valueOf(v));
347    }
348
349    /**
350     * Returns the number of values in the frequency table.
351     *
352     * @return the number of unique values that have been added to the frequency table.
353     * @see #valuesIterator()
354     */
355    public int getUniqueCount(){
356        return freqTable.keySet().size();
357    }
358
359    /**
360     * Returns the percentage of values that are equal to v
361     * (as a proportion between 0 and 1).
362     * <p>
363     * Returns <code>Double.NaN</code> if no values have been added.
364     * Returns 0 if at least one value has been added, but v is not comparable
365     * to the values set.</p>
366     *
367     * @param v the value to lookup
368     * @return the proportion of values equal to v
369     */
370    public double getPct(Comparable<?> v) {
371        final long sumFreq = getSumFreq();
372        if (sumFreq == 0) {
373            return Double.NaN;
374        }
375        return (double) getCount(v) / (double) sumFreq;
376    }
377
378    /**
379     * Returns the percentage of values that are equal to v
380     * (as a proportion between 0 and 1).
381     *
382     * @param v the value to lookup
383     * @return the proportion of values equal to v
384     */
385    public double getPct(int v) {
386        return getPct(Long.valueOf(v));
387    }
388
389    /**
390     * Returns the percentage of values that are equal to v
391     * (as a proportion between 0 and 1).
392     *
393     * @param v the value to lookup
394     * @return the proportion of values equal to v
395     */
396    public double getPct(long v) {
397        return getPct(Long.valueOf(v));
398    }
399
400    /**
401     * Returns the percentage of values that are equal to v
402     * (as a proportion between 0 and 1).
403     *
404     * @param v the value to lookup
405     * @return the proportion of values equal to v
406     */
407    public double getPct(char v) {
408        return getPct(Character.valueOf(v));
409    }
410
411    //-----------------------------------------------------------------------------------------
412
413    /**
414     * Returns the cumulative frequency of values less than or equal to v.
415     * <p>
416     * Returns 0 if v is not comparable to the values set.</p>
417     *
418     * @param v the value to lookup.
419     * @return the proportion of values equal to v
420     */
421    @SuppressWarnings({ "rawtypes", "unchecked" })
422    public long getCumFreq(Comparable<?> v) {
423        if (getSumFreq() == 0) {
424            return 0;
425        }
426        if (v instanceof Integer) {
427            return getCumFreq(((Integer) v).longValue());
428        }
429        Comparator<Comparable<?>> c = (Comparator<Comparable<?>>) freqTable.comparator();
430        if (c == null) {
431            c = new NaturalComparator();
432        }
433        long result = 0;
434
435        try {
436            Long value = freqTable.get(v);
437            if (value != null) {
438                result = value.longValue();
439            }
440        } catch (ClassCastException ex) {
441            return result;   // v is not comparable
442        }
443
444        if (c.compare(v, freqTable.firstKey()) < 0) {
445            return 0;  // v is comparable, but less than first value
446        }
447
448        if (c.compare(v, freqTable.lastKey()) >= 0) {
449            return getSumFreq();    // v is comparable, but greater than the last value
450        }
451
452        Iterator<Comparable<?>> values = valuesIterator();
453        while (values.hasNext()) {
454            Comparable<?> nextValue = values.next();
455            if (c.compare(v, nextValue) > 0) {
456                result += getCount(nextValue);
457            } else {
458                return result;
459            }
460        }
461        return result;
462    }
463
464     /**
465     * Returns the cumulative frequency of values less than or equal to v.
466     * <p>
467     * Returns 0 if v is not comparable to the values set.</p>
468     *
469     * @param v the value to lookup
470     * @return the proportion of values equal to v
471     */
472    public long getCumFreq(int v) {
473        return getCumFreq(Long.valueOf(v));
474    }
475
476     /**
477     * Returns the cumulative frequency of values less than or equal to v.
478     * <p>
479     * Returns 0 if v is not comparable to the values set.</p>
480     *
481     * @param v the value to lookup
482     * @return the proportion of values equal to v
483     */
484    public long getCumFreq(long v) {
485        return getCumFreq(Long.valueOf(v));
486    }
487
488    /**
489     * Returns the cumulative frequency of values less than or equal to v.
490     * <p>
491     * Returns 0 if v is not comparable to the values set.</p>
492     *
493     * @param v the value to lookup
494     * @return the proportion of values equal to v
495     */
496    public long getCumFreq(char v) {
497        return getCumFreq(Character.valueOf(v));
498    }
499
500    //----------------------------------------------------------------------------------------------
501
502    /**
503     * Returns the cumulative percentage of values less than or equal to v
504     * (as a proportion between 0 and 1).
505     * <p>
506     * Returns <code>Double.NaN</code> if no values have been added.
507     * Returns 0 if at least one value has been added, but v is not comparable
508     * to the values set.</p>
509     *
510     * @param v the value to lookup
511     * @return the proportion of values less than or equal to v
512     */
513    public double getCumPct(Comparable<?> v) {
514        final long sumFreq = getSumFreq();
515        if (sumFreq == 0) {
516            return Double.NaN;
517        }
518        return (double) getCumFreq(v) / (double) sumFreq;
519    }
520
521    /**
522     * Returns the cumulative percentage of values less than or equal to v
523     * (as a proportion between 0 and 1).
524     * <p>
525     * Returns 0 if v is not comparable to the values set.</p>
526     *
527     * @param v the value to lookup
528     * @return the proportion of values less than or equal to v
529     */
530    public double getCumPct(int v) {
531        return getCumPct(Long.valueOf(v));
532    }
533
534    /**
535     * Returns the cumulative percentage of values less than or equal to v
536     * (as a proportion between 0 and 1).
537     * <p>
538     * Returns 0 if v is not comparable to the values set.</p>
539     *
540     * @param v the value to lookup
541     * @return the proportion of values less than or equal to v
542     */
543    public double getCumPct(long v) {
544        return getCumPct(Long.valueOf(v));
545    }
546
547    /**
548     * Returns the cumulative percentage of values less than or equal to v
549     * (as a proportion between 0 and 1).
550     * <p>
551     * Returns 0 if v is not comparable to the values set.</p>
552     *
553     * @param v the value to lookup
554     * @return the proportion of values less than or equal to v
555     */
556    public double getCumPct(char v) {
557        return getCumPct(Character.valueOf(v));
558    }
559
560    /**
561     * Returns the mode value(s) in comparator order.
562     *
563     * @return a list containing the value(s) which appear most often.
564     * @since 3.3
565     */
566    public List<Comparable<?>> getMode() {
567        long mostPopular = 0; // frequencies are always positive
568
569        // Get the max count first, so we avoid having to recreate the List each time
570        for(Long l : freqTable.values()) {
571            long frequency = l.longValue();
572            if (frequency > mostPopular) {
573                mostPopular = frequency;
574            }
575        }
576
577        List<Comparable<?>> modeList = new ArrayList<Comparable<?>>();
578        for (Entry<Comparable<?>, Long> ent : freqTable.entrySet()) {
579            long frequency = ent.getValue().longValue();
580            if (frequency == mostPopular) {
581               modeList.add(ent.getKey());
582            }
583        }
584        return modeList;
585    }
586
587    //----------------------------------------------------------------------------------------------
588
589    /**
590     * Merge another Frequency object's counts into this instance.
591     * This Frequency's counts will be incremented (or set when not already set)
592     * by the counts represented by other.
593     *
594     * @param other the other {@link Frequency} object to be merged
595     * @throws NullArgumentException if {@code other} is null
596     * @since 3.1
597     */
598    public void merge(final Frequency other) throws NullArgumentException {
599        MathUtils.checkNotNull(other, LocalizedFormats.NULL_NOT_ALLOWED);
600
601        final Iterator<Map.Entry<Comparable<?>, Long>> iter = other.entrySetIterator();
602        while (iter.hasNext()) {
603            final Map.Entry<Comparable<?>, Long> entry = iter.next();
604            incrementValue(entry.getKey(), entry.getValue().longValue());
605        }
606    }
607
608    /**
609     * Merge a {@link Collection} of {@link Frequency} objects into this instance.
610     * This Frequency's counts will be incremented (or set when not already set)
611     * by the counts represented by each of the others.
612     *
613     * @param others the other {@link Frequency} objects to be merged
614     * @throws NullArgumentException if the collection is null
615     * @since 3.1
616     */
617    public void merge(final Collection<Frequency> others) throws NullArgumentException {
618        MathUtils.checkNotNull(others, LocalizedFormats.NULL_NOT_ALLOWED);
619
620        for (final Frequency freq : others) {
621            merge(freq);
622        }
623    }
624
625    //----------------------------------------------------------------------------------------------
626
627    /**
628     * A Comparator that compares comparable objects using the
629     * natural order.  Copied from Commons Collections ComparableComparator.
630     * @param <T> the type of the objects compared
631     */
632    private static class NaturalComparator<T extends Comparable<T>> implements Comparator<Comparable<T>>, Serializable {
633
634        /** Serializable version identifier */
635        private static final long serialVersionUID = -3852193713161395148L;
636
637        /**
638         * Compare the two {@link Comparable Comparable} arguments.
639         * This method is equivalent to:
640         * <pre>(({@link Comparable Comparable})o1).{@link Comparable#compareTo compareTo}(o2)</pre>
641         *
642         * @param  o1 the first object
643         * @param  o2 the second object
644         * @return  result of comparison
645         * @throws NullPointerException when <i>o1</i> is <code>null</code>,
646         *         or when <code>((Comparable)o1).compareTo(o2)</code> does
647         * @throws ClassCastException when <i>o1</i> is not a {@link Comparable Comparable},
648         *         or when <code>((Comparable)o1).compareTo(o2)</code> does
649         */
650        @SuppressWarnings("unchecked") // cast to (T) may throw ClassCastException, see Javadoc
651        public int compare(Comparable<T> o1, Comparable<T> o2) {
652            return o1.compareTo((T) o2);
653        }
654    }
655
656    /** {@inheritDoc} */
657    @Override
658    public int hashCode() {
659        final int prime = 31;
660        int result = 1;
661        result = prime * result +
662                 ((freqTable == null) ? 0 : freqTable.hashCode());
663        return result;
664    }
665
666    /** {@inheritDoc} */
667    @Override
668    public boolean equals(Object obj) {
669        if (this == obj) {
670            return true;
671        }
672        if (!(obj instanceof Frequency)) {
673            return false;
674        }
675        Frequency other = (Frequency) obj;
676        if (freqTable == null) {
677            if (other.freqTable != null) {
678                return false;
679            }
680        } else if (!freqTable.equals(other.freqTable)) {
681            return false;
682        }
683        return true;
684    }
685
686}