001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.math3.stat;
018
019import java.io.Serializable;
020import java.text.NumberFormat;
021import java.util.ArrayList;
022import java.util.Collection;
023import java.util.Iterator;
024import java.util.Comparator;
025import java.util.List;
026import java.util.Map;
027import java.util.Map.Entry;
028import java.util.TreeMap;
029
030import org.apache.commons.math3.exception.MathIllegalArgumentException;
031import org.apache.commons.math3.exception.NullArgumentException;
032import org.apache.commons.math3.exception.util.LocalizedFormats;
033import org.apache.commons.math3.util.MathUtils;
034
035/**
036 * Maintains a frequency distribution.
037 * <p>
038 * Accepts int, long, char or Comparable values.  New values added must be
039 * comparable to those that have been added, otherwise the add method will
040 * throw an IllegalArgumentException.</p>
041 * <p>
042 * Integer values (int, long, Integer, Long) are not distinguished by type --
043 * i.e. <code>addValue(Long.valueOf(2)), addValue(2), addValue(2l)</code> all have
044 * the same effect (similarly for arguments to <code>getCount,</code> etc.).</p>
045 * <p>NOTE: byte and short values will be implicitly converted to int values
046 * by the compiler, thus there are no explicit overloaded methods for these
047 * primitive types.</p>
048 * <p>
049 * char values are converted by <code>addValue</code> to Character instances.
050 * As such, these values are not comparable to integral values, so attempts
051 * to combine integral types with chars in a frequency distribution will fail.
052 * </p>
053 * <p>
054 * Float is not coerced to Double.
055 * Since they are not Comparable with each other the user must do any necessary coercion.
056 * Float.NaN and Double.NaN are not treated specially; they may occur in input and will
057 * occur in output if appropriate.
058 * </b>
059 * <p>
060 * The values are ordered using the default (natural order), unless a
061 * <code>Comparator</code> is supplied in the constructor.</p>
062 *
063 */
064public class Frequency implements Serializable {
065
066    /** Serializable version identifier */
067    private static final long serialVersionUID = -3845586908418844111L;
068
069    /** underlying collection */
070    private final TreeMap<Comparable<?>, Long> freqTable;
071
072    /**
073     * Default constructor.
074     */
075    public Frequency() {
076        freqTable = new TreeMap<Comparable<?>, Long>();
077    }
078
079    /**
080     * Constructor allowing values Comparator to be specified.
081     *
082     * @param comparator Comparator used to order values
083     */
084    @SuppressWarnings("unchecked") // TODO is the cast OK?
085    public Frequency(Comparator<?> comparator) {
086        freqTable = new TreeMap<Comparable<?>, Long>((Comparator<? super Comparable<?>>) comparator);
087    }
088
089    /**
090     * Return a string representation of this frequency distribution.
091     *
092     * @return a string representation.
093     */
094    @Override
095    public String toString() {
096        NumberFormat nf = NumberFormat.getPercentInstance();
097        StringBuilder outBuffer = new StringBuilder();
098        outBuffer.append("Value \t Freq. \t Pct. \t Cum Pct. \n");
099        Iterator<Comparable<?>> iter = freqTable.keySet().iterator();
100        while (iter.hasNext()) {
101            Comparable<?> value = iter.next();
102            outBuffer.append(value);
103            outBuffer.append('\t');
104            outBuffer.append(getCount(value));
105            outBuffer.append('\t');
106            outBuffer.append(nf.format(getPct(value)));
107            outBuffer.append('\t');
108            outBuffer.append(nf.format(getCumPct(value)));
109            outBuffer.append('\n');
110        }
111        return outBuffer.toString();
112    }
113
114    /**
115     * Adds 1 to the frequency count for v.
116     * <p>
117     * If other objects have already been added to this Frequency, v must
118     * be comparable to those that have already been added.
119     * </p>
120     *
121     * @param v the value to add.
122     * @throws MathIllegalArgumentException if <code>v</code> is not comparable with previous entries
123     */
124    public void addValue(Comparable<?> v) throws MathIllegalArgumentException {
125        incrementValue(v, 1);
126    }
127
128    /**
129     * Adds 1 to the frequency count for v.
130     *
131     * @param v the value to add.
132     * @throws MathIllegalArgumentException if the table contains entries not
133     * comparable to Long
134     */
135    public void addValue(int v) throws MathIllegalArgumentException {
136        addValue(Long.valueOf(v));
137    }
138
139    /**
140     * Adds 1 to the frequency count for v.
141     *
142     * @param v the value to add.
143     * @throws MathIllegalArgumentException if the table contains entries not
144     * comparable to Long
145     */
146    public void addValue(long v) throws MathIllegalArgumentException {
147        addValue(Long.valueOf(v));
148    }
149
150    /**
151     * Adds 1 to the frequency count for v.
152     *
153     * @param v the value to add.
154     * @throws MathIllegalArgumentException if the table contains entries not
155     * comparable to Char
156     */
157    public void addValue(char v) throws MathIllegalArgumentException {
158        addValue(Character.valueOf(v));
159    }
160
161    /**
162     * Increments the frequency count for v.
163     * <p>
164     * If other objects have already been added to this Frequency, v must
165     * be comparable to those that have already been added.
166     * </p>
167     *
168     * @param v the value to add.
169     * @param increment the amount by which the value should be incremented
170     * @throws MathIllegalArgumentException if <code>v</code> is not comparable with previous entries
171     * @since 3.1
172     */
173    public void incrementValue(Comparable<?> v, long increment) throws MathIllegalArgumentException {
174        Comparable<?> obj = v;
175        if (v instanceof Integer) {
176            obj = Long.valueOf(((Integer) v).longValue());
177        }
178        try {
179            Long count = freqTable.get(obj);
180            if (count == null) {
181                freqTable.put(obj, Long.valueOf(increment));
182            } else {
183                freqTable.put(obj, Long.valueOf(count.longValue() + increment));
184            }
185        } catch (ClassCastException ex) {
186            //TreeMap will throw ClassCastException if v is not comparable
187            throw new MathIllegalArgumentException(
188                  LocalizedFormats.INSTANCES_NOT_COMPARABLE_TO_EXISTING_VALUES,
189                  v.getClass().getName());
190        }
191    }
192
193    /**
194     * Increments the frequency count for v.
195     * <p>
196     * If other objects have already been added to this Frequency, v must
197     * be comparable to those that have already been added.
198     * </p>
199     *
200     * @param v the value to add.
201     * @param increment the amount by which the value should be incremented
202     * @throws MathIllegalArgumentException if the table contains entries not
203     * comparable to Long
204     * @since 3.3
205     */
206    public void incrementValue(int v, long increment) throws MathIllegalArgumentException {
207        incrementValue(Long.valueOf(v), increment);
208    }
209
210    /**
211     * Increments the frequency count for v.
212     * <p>
213     * If other objects have already been added to this Frequency, v must
214     * be comparable to those that have already been added.
215     * </p>
216     *
217     * @param v the value to add.
218     * @param increment the amount by which the value should be incremented
219     * @throws MathIllegalArgumentException if the table contains entries not
220     * comparable to Long
221     * @since 3.3
222     */
223    public void incrementValue(long v, long increment) throws MathIllegalArgumentException {
224        incrementValue(Long.valueOf(v), increment);
225    }
226
227    /**
228     * Increments the frequency count for v.
229     * <p>
230     * If other objects have already been added to this Frequency, v must
231     * be comparable to those that have already been added.
232     * </p>
233     *
234     * @param v the value to add.
235     * @param increment the amount by which the value should be incremented
236     * @throws MathIllegalArgumentException if the table contains entries not
237     * comparable to Char
238     * @since 3.3
239     */
240    public void incrementValue(char v, long increment) throws MathIllegalArgumentException {
241        incrementValue(Character.valueOf(v), increment);
242    }
243
244    /** Clears the frequency table */
245    public void clear() {
246        freqTable.clear();
247    }
248
249    /**
250     * Returns an Iterator over the set of values that have been added.
251     * <p>
252     * If added values are integral (i.e., integers, longs, Integers, or Longs),
253     * they are converted to Longs when they are added, so the objects returned
254     * by the Iterator will in this case be Longs.</p>
255     *
256     * @return values Iterator
257     */
258    public Iterator<Comparable<?>> valuesIterator() {
259        return freqTable.keySet().iterator();
260    }
261
262    /**
263     * Return an Iterator over the set of keys and values that have been added.
264     * Using the entry set to iterate is more efficient in the case where you
265     * need to access respective counts as well as values, since it doesn't
266     * require a "get" for every key...the value is provided in the Map.Entry.
267     * <p>
268     * If added values are integral (i.e., integers, longs, Integers, or Longs),
269     * they are converted to Longs when they are added, so the values of the
270     * map entries returned by the Iterator will in this case be Longs.</p>
271     *
272     * @return entry set Iterator
273     * @since 3.1
274     */
275    public Iterator<Map.Entry<Comparable<?>, Long>> entrySetIterator() {
276        return freqTable.entrySet().iterator();
277    }
278
279    //-------------------------------------------------------------------------
280
281    /**
282     * Returns the sum of all frequencies.
283     *
284     * @return the total frequency count.
285     */
286    public long getSumFreq() {
287        long result = 0;
288        Iterator<Long> iterator = freqTable.values().iterator();
289        while (iterator.hasNext())  {
290            result += iterator.next().longValue();
291        }
292        return result;
293    }
294
295    /**
296     * Returns the number of values equal to v.
297     * Returns 0 if the value is not comparable.
298     *
299     * @param v the value to lookup.
300     * @return the frequency of v.
301     */
302    public long getCount(Comparable<?> v) {
303        if (v instanceof Integer) {
304            return getCount(((Integer) v).longValue());
305        }
306        long result = 0;
307        try {
308            Long count =  freqTable.get(v);
309            if (count != null) {
310                result = count.longValue();
311            }
312        } catch (ClassCastException ex) { // NOPMD
313            // ignore and return 0 -- ClassCastException will be thrown if value is not comparable
314        }
315        return result;
316    }
317
318    /**
319     * Returns the number of values equal to v.
320     *
321     * @param v the value to lookup.
322     * @return the frequency of v.
323     */
324    public long getCount(int v) {
325        return getCount(Long.valueOf(v));
326    }
327
328    /**
329     * Returns the number of values equal to v.
330     *
331     * @param v the value to lookup.
332     * @return the frequency of v.
333     */
334    public long getCount(long v) {
335        return getCount(Long.valueOf(v));
336    }
337
338    /**
339     * Returns the number of values equal to v.
340     *
341     * @param v the value to lookup.
342     * @return the frequency of v.
343     */
344    public long getCount(char v) {
345        return getCount(Character.valueOf(v));
346    }
347
348    /**
349     * Returns the number of values in the frequency table.
350     *
351     * @return the number of unique values that have been added to the frequency table.
352     * @see #valuesIterator()
353     */
354    public int getUniqueCount(){
355        return freqTable.keySet().size();
356    }
357
358    /**
359     * Returns the percentage of values that are equal to v
360     * (as a proportion between 0 and 1).
361     * <p>
362     * Returns <code>Double.NaN</code> if no values have been added.
363     * Returns 0 if at least one value has been added, but v is not comparable
364     * to the values set.</p>
365     *
366     * @param v the value to lookup
367     * @return the proportion of values equal to v
368     */
369    public double getPct(Comparable<?> v) {
370        final long sumFreq = getSumFreq();
371        if (sumFreq == 0) {
372            return Double.NaN;
373        }
374        return (double) getCount(v) / (double) sumFreq;
375    }
376
377    /**
378     * Returns the percentage of values that are equal to v
379     * (as a proportion between 0 and 1).
380     *
381     * @param v the value to lookup
382     * @return the proportion of values equal to v
383     */
384    public double getPct(int v) {
385        return getPct(Long.valueOf(v));
386    }
387
388    /**
389     * Returns the percentage of values that are equal to v
390     * (as a proportion between 0 and 1).
391     *
392     * @param v the value to lookup
393     * @return the proportion of values equal to v
394     */
395    public double getPct(long v) {
396        return getPct(Long.valueOf(v));
397    }
398
399    /**
400     * Returns the percentage of values that are equal to v
401     * (as a proportion between 0 and 1).
402     *
403     * @param v the value to lookup
404     * @return the proportion of values equal to v
405     */
406    public double getPct(char v) {
407        return getPct(Character.valueOf(v));
408    }
409
410    //-----------------------------------------------------------------------------------------
411
412    /**
413     * Returns the cumulative frequency of values less than or equal to v.
414     * <p>
415     * Returns 0 if v is not comparable to the values set.</p>
416     *
417     * @param v the value to lookup.
418     * @return the proportion of values equal to v
419     */
420    @SuppressWarnings({ "rawtypes", "unchecked" })
421    public long getCumFreq(Comparable<?> v) {
422        if (getSumFreq() == 0) {
423            return 0;
424        }
425        if (v instanceof Integer) {
426            return getCumFreq(((Integer) v).longValue());
427        }
428        Comparator<Comparable<?>> c = (Comparator<Comparable<?>>) freqTable.comparator();
429        if (c == null) {
430            c = new NaturalComparator();
431        }
432        long result = 0;
433
434        try {
435            Long value = freqTable.get(v);
436            if (value != null) {
437                result = value.longValue();
438            }
439        } catch (ClassCastException ex) {
440            return result;   // v is not comparable
441        }
442
443        if (c.compare(v, freqTable.firstKey()) < 0) {
444            return 0;  // v is comparable, but less than first value
445        }
446
447        if (c.compare(v, freqTable.lastKey()) >= 0) {
448            return getSumFreq();    // v is comparable, but greater than the last value
449        }
450
451        Iterator<Comparable<?>> values = valuesIterator();
452        while (values.hasNext()) {
453            Comparable<?> nextValue = values.next();
454            if (c.compare(v, nextValue) > 0) {
455                result += getCount(nextValue);
456            } else {
457                return result;
458            }
459        }
460        return result;
461    }
462
463     /**
464     * Returns the cumulative frequency of values less than or equal to v.
465     * <p>
466     * Returns 0 if v is not comparable to the values set.</p>
467     *
468     * @param v the value to lookup
469     * @return the proportion of values equal to v
470     */
471    public long getCumFreq(int v) {
472        return getCumFreq(Long.valueOf(v));
473    }
474
475     /**
476     * Returns the cumulative frequency of values less than or equal to v.
477     * <p>
478     * Returns 0 if v is not comparable to the values set.</p>
479     *
480     * @param v the value to lookup
481     * @return the proportion of values equal to v
482     */
483    public long getCumFreq(long v) {
484        return getCumFreq(Long.valueOf(v));
485    }
486
487    /**
488     * Returns the cumulative frequency of values less than or equal to v.
489     * <p>
490     * Returns 0 if v is not comparable to the values set.</p>
491     *
492     * @param v the value to lookup
493     * @return the proportion of values equal to v
494     */
495    public long getCumFreq(char v) {
496        return getCumFreq(Character.valueOf(v));
497    }
498
499    //----------------------------------------------------------------------------------------------
500
501    /**
502     * Returns the cumulative percentage of values less than or equal to v
503     * (as a proportion between 0 and 1).
504     * <p>
505     * Returns <code>Double.NaN</code> if no values have been added.
506     * Returns 0 if at least one value has been added, but v is not comparable
507     * to the values set.</p>
508     *
509     * @param v the value to lookup
510     * @return the proportion of values less than or equal to v
511     */
512    public double getCumPct(Comparable<?> v) {
513        final long sumFreq = getSumFreq();
514        if (sumFreq == 0) {
515            return Double.NaN;
516        }
517        return (double) getCumFreq(v) / (double) sumFreq;
518    }
519
520    /**
521     * Returns the cumulative percentage of values less than or equal to v
522     * (as a proportion between 0 and 1).
523     * <p>
524     * Returns 0 if v is not comparable to the values set.</p>
525     *
526     * @param v the value to lookup
527     * @return the proportion of values less than or equal to v
528     */
529    public double getCumPct(int v) {
530        return getCumPct(Long.valueOf(v));
531    }
532
533    /**
534     * Returns the cumulative percentage of values less than or equal to v
535     * (as a proportion between 0 and 1).
536     * <p>
537     * Returns 0 if v is not comparable to the values set.</p>
538     *
539     * @param v the value to lookup
540     * @return the proportion of values less than or equal to v
541     */
542    public double getCumPct(long v) {
543        return getCumPct(Long.valueOf(v));
544    }
545
546    /**
547     * Returns the cumulative percentage of values less than or equal to v
548     * (as a proportion between 0 and 1).
549     * <p>
550     * Returns 0 if v is not comparable to the values set.</p>
551     *
552     * @param v the value to lookup
553     * @return the proportion of values less than or equal to v
554     */
555    public double getCumPct(char v) {
556        return getCumPct(Character.valueOf(v));
557    }
558
559    /**
560     * Returns the mode value(s) in comparator order.
561     *
562     * @return a list containing the value(s) which appear most often.
563     * @since 3.3
564     */
565    public List<Comparable<?>> getMode() {
566        long mostPopular = 0; // frequencies are always positive
567
568        // Get the max count first, so we avoid having to recreate the List each time
569        for(Long l : freqTable.values()) {
570            long frequency = l.longValue();
571            if (frequency > mostPopular) {
572                mostPopular = frequency;
573            }
574        }
575
576        List<Comparable<?>> modeList = new ArrayList<Comparable<?>>();
577        for (Entry<Comparable<?>, Long> ent : freqTable.entrySet()) {
578            long frequency = ent.getValue().longValue();
579            if (frequency == mostPopular) {
580               modeList.add(ent.getKey());
581            }
582        }
583        return modeList;
584    }
585
586    //----------------------------------------------------------------------------------------------
587
588    /**
589     * Merge another Frequency object's counts into this instance.
590     * This Frequency's counts will be incremented (or set when not already set)
591     * by the counts represented by other.
592     *
593     * @param other the other {@link Frequency} object to be merged
594     * @throws NullArgumentException if {@code other} is null
595     * @since 3.1
596     */
597    public void merge(final Frequency other) throws NullArgumentException {
598        MathUtils.checkNotNull(other, LocalizedFormats.NULL_NOT_ALLOWED);
599
600        final Iterator<Map.Entry<Comparable<?>, Long>> iter = other.entrySetIterator();
601        while (iter.hasNext()) {
602            final Map.Entry<Comparable<?>, Long> entry = iter.next();
603            incrementValue(entry.getKey(), entry.getValue().longValue());
604        }
605    }
606
607    /**
608     * Merge a {@link Collection} of {@link Frequency} objects into this instance.
609     * This Frequency's counts will be incremented (or set when not already set)
610     * by the counts represented by each of the others.
611     *
612     * @param others the other {@link Frequency} objects to be merged
613     * @throws NullArgumentException if the collection is null
614     * @since 3.1
615     */
616    public void merge(final Collection<Frequency> others) throws NullArgumentException {
617        MathUtils.checkNotNull(others, LocalizedFormats.NULL_NOT_ALLOWED);
618
619        for (final Frequency freq : others) {
620            merge(freq);
621        }
622    }
623
624    //----------------------------------------------------------------------------------------------
625
626    /**
627     * A Comparator that compares comparable objects using the
628     * natural order.  Copied from Commons Collections ComparableComparator.
629     */
630    private static class NaturalComparator<T extends Comparable<T>> implements Comparator<Comparable<T>>, Serializable {
631
632        /** Serializable version identifier */
633        private static final long serialVersionUID = -3852193713161395148L;
634
635        /**
636         * Compare the two {@link Comparable Comparable} arguments.
637         * This method is equivalent to:
638         * <pre>(({@link Comparable Comparable})o1).{@link Comparable#compareTo compareTo}(o2)</pre>
639         *
640         * @param  o1 the first object
641         * @param  o2 the second object
642         * @return  result of comparison
643         * @throws NullPointerException when <i>o1</i> is <code>null</code>,
644         *         or when <code>((Comparable)o1).compareTo(o2)</code> does
645         * @throws ClassCastException when <i>o1</i> is not a {@link Comparable Comparable},
646         *         or when <code>((Comparable)o1).compareTo(o2)</code> does
647         */
648        @SuppressWarnings("unchecked") // cast to (T) may throw ClassCastException, see Javadoc
649        public int compare(Comparable<T> o1, Comparable<T> o2) {
650            return o1.compareTo((T) o2);
651        }
652    }
653
654    /** {@inheritDoc} */
655    @Override
656    public int hashCode() {
657        final int prime = 31;
658        int result = 1;
659        result = prime * result +
660                 ((freqTable == null) ? 0 : freqTable.hashCode());
661        return result;
662    }
663
664    /** {@inheritDoc} */
665    @Override
666    public boolean equals(Object obj) {
667        if (this == obj) {
668            return true;
669        }
670        if (!(obj instanceof Frequency)) {
671            return false;
672        }
673        Frequency other = (Frequency) obj;
674        if (freqTable == null) {
675            if (other.freqTable != null) {
676                return false;
677            }
678        } else if (!freqTable.equals(other.freqTable)) {
679            return false;
680        }
681        return true;
682    }
683
684}