001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.commons.math3.stat;
018    
019    import java.io.Serializable;
020    import java.text.NumberFormat;
021    import java.util.Collection;
022    import java.util.Iterator;
023    import java.util.Comparator;
024    import java.util.Map;
025    import java.util.TreeMap;
026    
027    import org.apache.commons.math3.exception.MathIllegalArgumentException;
028    import org.apache.commons.math3.exception.NullArgumentException;
029    import org.apache.commons.math3.exception.util.LocalizedFormats;
030    import org.apache.commons.math3.util.MathUtils;
031    
032    /**
033     * Maintains a frequency distribution.
034     * <p>
035     * Accepts int, long, char or Comparable values.  New values added must be
036     * comparable to those that have been added, otherwise the add method will
037     * throw an IllegalArgumentException.</p>
038     * <p>
039     * Integer values (int, long, Integer, Long) are not distinguished by type --
040     * i.e. <code>addValue(Long.valueOf(2)), addValue(2), addValue(2l)</code> all have
041     * the same effect (similarly for arguments to <code>getCount,</code> etc.).</p>
042     * <p>
043     * char values are converted by <code>addValue</code> to Character instances.
044     * As such, these values are not comparable to integral values, so attempts
045     * to combine integral types with chars in a frequency distribution will fail.
046     * </p>
047     * <p>
048     * The values are ordered using the default (natural order), unless a
049     * <code>Comparator</code> is supplied in the constructor.</p>
050     *
051     * @version $Id: Frequency.java 1455703 2013-03-12 20:46:23Z tn $
052     */
053    public class Frequency implements Serializable {
054    
055        /** Serializable version identifier */
056        private static final long serialVersionUID = -3845586908418844111L;
057    
058        /** underlying collection */
059        private final TreeMap<Comparable<?>, Long> freqTable;
060    
061        /**
062         * Default constructor.
063         */
064        public Frequency() {
065            freqTable = new TreeMap<Comparable<?>, Long>();
066        }
067    
068        /**
069         * Constructor allowing values Comparator to be specified.
070         *
071         * @param comparator Comparator used to order values
072         */
073        @SuppressWarnings("unchecked") // TODO is the cast OK?
074        public Frequency(Comparator<?> comparator) {
075            freqTable = new TreeMap<Comparable<?>, Long>((Comparator<? super Comparable<?>>) comparator);
076        }
077    
078        /**
079         * Return a string representation of this frequency
080         * distribution.
081         *
082         * @return a string representation.
083         */
084        @Override
085        public String toString() {
086            NumberFormat nf = NumberFormat.getPercentInstance();
087            StringBuilder outBuffer = new StringBuilder();
088            outBuffer.append("Value \t Freq. \t Pct. \t Cum Pct. \n");
089            Iterator<Comparable<?>> iter = freqTable.keySet().iterator();
090            while (iter.hasNext()) {
091                Comparable<?> value = iter.next();
092                outBuffer.append(value);
093                outBuffer.append('\t');
094                outBuffer.append(getCount(value));
095                outBuffer.append('\t');
096                outBuffer.append(nf.format(getPct(value)));
097                outBuffer.append('\t');
098                outBuffer.append(nf.format(getCumPct(value)));
099                outBuffer.append('\n');
100            }
101            return outBuffer.toString();
102        }
103    
104        /**
105         * Adds 1 to the frequency count for v.
106         * <p>
107         * If other objects have already been added to this Frequency, v must
108         * be comparable to those that have already been added.
109         * </p>
110         *
111         * @param v the value to add.
112         * @throws MathIllegalArgumentException if <code>v</code> is not comparable with previous entries
113         */
114        public void addValue(Comparable<?> v) throws MathIllegalArgumentException {
115            incrementValue(v, 1);
116        }
117    
118        /**
119         * Increments the frequency count for v.
120         * <p>
121         * If other objects have already been added to this Frequency, v must
122         * be comparable to those that have already been added.
123         * </p>
124         *
125         * @param v the value to add.
126         * @param increment the amount by which the value should be incremented
127         * @throws IllegalArgumentException if <code>v</code> is not comparable with previous entries
128         * @since 3.1
129         */
130        public void incrementValue(Comparable<?> v, long increment){
131            Comparable<?> obj = v;
132            if (v instanceof Integer) {
133                obj = Long.valueOf(((Integer) v).longValue());
134            }
135            try {
136                Long count = freqTable.get(obj);
137                if (count == null) {
138                    freqTable.put(obj, Long.valueOf(increment));
139                } else {
140                    freqTable.put(obj, Long.valueOf(count.longValue() + increment));
141                }
142            } catch (ClassCastException ex) {
143                //TreeMap will throw ClassCastException if v is not comparable
144                throw new MathIllegalArgumentException(
145                      LocalizedFormats.INSTANCES_NOT_COMPARABLE_TO_EXISTING_VALUES,
146                      v.getClass().getName());
147            }
148        }
149    
150        /**
151         * Adds 1 to the frequency count for v.
152         *
153         * @param v the value to add.
154         * @throws MathIllegalArgumentException if the table contains entries not
155         * comparable to Integer
156         */
157        public void addValue(int v) throws MathIllegalArgumentException {
158            addValue(Long.valueOf(v));
159        }
160    
161        /**
162         * Adds 1 to the frequency count for v.
163         *
164         * @param v the value to add.
165         * @throws MathIllegalArgumentException if the table contains entries not
166         * comparable to Long
167         */
168        public void addValue(long v) throws MathIllegalArgumentException {
169            addValue(Long.valueOf(v));
170        }
171    
172        /**
173         * Adds 1 to the frequency count for v.
174         *
175         * @param v the value to add.
176         * @throws MathIllegalArgumentException if the table contains entries not
177         * comparable to Char
178         */
179        public void addValue(char v) throws MathIllegalArgumentException {
180            addValue(Character.valueOf(v));
181        }
182    
183        /** Clears the frequency table */
184        public void clear() {
185            freqTable.clear();
186        }
187    
188        /**
189         * Returns an Iterator over the set of values that have been added.
190         * <p>
191         * If added values are integral (i.e., integers, longs, Integers, or Longs),
192         * they are converted to Longs when they are added, so the objects returned
193         * by the Iterator will in this case be Longs.</p>
194         *
195         * @return values Iterator
196         */
197        public Iterator<Comparable<?>> valuesIterator() {
198            return freqTable.keySet().iterator();
199        }
200    
201        /**
202         * Return an Iterator over the set of keys and values that have been added.
203         * Using the entry set to iterate is more efficient in the case where you
204         * need to access respective counts as well as values, since it doesn't
205         * require a "get" for every key...the value is provided in the Map.Entry.
206         * <p>
207         * If added values are integral (i.e., integers, longs, Integers, or Longs),
208         * they are converted to Longs when they are added, so the values of the
209         * map entries returned by the Iterator will in this case be Longs.</p>
210         *
211         * @return entry set Iterator
212         * @since 3.1
213         */
214        public Iterator<Map.Entry<Comparable<?>, Long>> entrySetIterator() {
215            return freqTable.entrySet().iterator();
216        }
217    
218        //-------------------------------------------------------------------------
219    
220        /**
221         * Returns the sum of all frequencies.
222         *
223         * @return the total frequency count.
224         */
225        public long getSumFreq() {
226            long result = 0;
227            Iterator<Long> iterator = freqTable.values().iterator();
228            while (iterator.hasNext())  {
229                result += iterator.next().longValue();
230            }
231            return result;
232        }
233    
234        /**
235         * Returns the number of values = v.
236         * Returns 0 if the value is not comparable.
237         *
238         * @param v the value to lookup.
239         * @return the frequency of v.
240         */
241        public long getCount(Comparable<?> v) {
242            if (v instanceof Integer) {
243                return getCount(((Integer) v).longValue());
244            }
245            long result = 0;
246            try {
247                Long count =  freqTable.get(v);
248                if (count != null) {
249                    result = count.longValue();
250                }
251            } catch (ClassCastException ex) { // NOPMD
252                // ignore and return 0 -- ClassCastException will be thrown if value is not comparable
253            }
254            return result;
255        }
256    
257        /**
258         * Returns the number of values = v.
259         *
260         * @param v the value to lookup.
261         * @return the frequency of v.
262         */
263        public long getCount(int v) {
264            return getCount(Long.valueOf(v));
265        }
266    
267        /**
268         * Returns the number of values = v.
269         *
270         * @param v the value to lookup.
271         * @return the frequency of v.
272         */
273        public long getCount(long v) {
274            return getCount(Long.valueOf(v));
275        }
276    
277        /**
278         * Returns the number of values = v.
279         *
280         * @param v the value to lookup.
281         * @return the frequency of v.
282         */
283        public long getCount(char v) {
284            return getCount(Character.valueOf(v));
285        }
286    
287        /**
288         * Returns the number of values in the frequency table.
289         *
290         * @return the number of unique values that have been added to the frequency table.
291         * @see #valuesIterator()
292         */
293        public int getUniqueCount(){
294            return freqTable.keySet().size();
295        }
296    
297        /**
298         * Returns the percentage of values that are equal to v
299         * (as a proportion between 0 and 1).
300         * <p>
301         * Returns <code>Double.NaN</code> if no values have been added.</p>
302         *
303         * @param v the value to lookup
304         * @return the proportion of values equal to v
305         */
306        public double getPct(Comparable<?> v) {
307            final long sumFreq = getSumFreq();
308            if (sumFreq == 0) {
309                return Double.NaN;
310            }
311            return (double) getCount(v) / (double) sumFreq;
312        }
313    
314        /**
315         * Returns the percentage of values that are equal to v
316         * (as a proportion between 0 and 1).
317         *
318         * @param v the value to lookup
319         * @return the proportion of values equal to v
320         */
321        public double getPct(int v) {
322            return getPct(Long.valueOf(v));
323        }
324    
325        /**
326         * Returns the percentage of values that are equal to v
327         * (as a proportion between 0 and 1).
328         *
329         * @param v the value to lookup
330         * @return the proportion of values equal to v
331         */
332        public double getPct(long v) {
333            return getPct(Long.valueOf(v));
334        }
335    
336        /**
337         * Returns the percentage of values that are equal to v
338         * (as a proportion between 0 and 1).
339         *
340         * @param v the value to lookup
341         * @return the proportion of values equal to v
342         */
343        public double getPct(char v) {
344            return getPct(Character.valueOf(v));
345        }
346    
347        //-----------------------------------------------------------------------------------------
348    
349        /**
350         * Returns the cumulative frequency of values less than or equal to v.
351         * <p>
352         * Returns 0 if v is not comparable to the values set.</p>
353         *
354         * @param v the value to lookup.
355         * @return the proportion of values equal to v
356         */
357        @SuppressWarnings({ "rawtypes", "unchecked" })
358        public long getCumFreq(Comparable<?> v) {
359            if (getSumFreq() == 0) {
360                return 0;
361            }
362            if (v instanceof Integer) {
363                return getCumFreq(((Integer) v).longValue());
364            }
365            Comparator<Comparable<?>> c = (Comparator<Comparable<?>>) freqTable.comparator();
366            if (c == null) {
367                c = new NaturalComparator();
368            }
369            long result = 0;
370    
371            try {
372                Long value = freqTable.get(v);
373                if (value != null) {
374                    result = value.longValue();
375                }
376            } catch (ClassCastException ex) {
377                return result;   // v is not comparable
378            }
379    
380            if (c.compare(v, freqTable.firstKey()) < 0) {
381                return 0;  // v is comparable, but less than first value
382            }
383    
384            if (c.compare(v, freqTable.lastKey()) >= 0) {
385                return getSumFreq();    // v is comparable, but greater than the last value
386            }
387    
388            Iterator<Comparable<?>> values = valuesIterator();
389            while (values.hasNext()) {
390                Comparable<?> nextValue = values.next();
391                if (c.compare(v, nextValue) > 0) {
392                    result += getCount(nextValue);
393                } else {
394                    return result;
395                }
396            }
397            return result;
398        }
399    
400         /**
401         * Returns the cumulative frequency of values less than or equal to v.
402         * <p>
403         * Returns 0 if v is not comparable to the values set.</p>
404         *
405         * @param v the value to lookup
406         * @return the proportion of values equal to v
407         */
408        public long getCumFreq(int v) {
409            return getCumFreq(Long.valueOf(v));
410        }
411    
412         /**
413         * Returns the cumulative frequency of values less than or equal to v.
414         * <p>
415         * Returns 0 if v is not comparable to the values set.</p>
416         *
417         * @param v the value to lookup
418         * @return the proportion of values equal to v
419         */
420        public long getCumFreq(long v) {
421            return getCumFreq(Long.valueOf(v));
422        }
423    
424        /**
425         * Returns the cumulative frequency of values less than or equal to v.
426         * <p>
427         * Returns 0 if v is not comparable to the values set.</p>
428         *
429         * @param v the value to lookup
430         * @return the proportion of values equal to v
431         */
432        public long getCumFreq(char v) {
433            return getCumFreq(Character.valueOf(v));
434        }
435    
436        //----------------------------------------------------------------------------------------------
437    
438        /**
439         * Returns the cumulative percentage of values less than or equal to v
440         * (as a proportion between 0 and 1).
441         * <p>
442         * Returns <code>Double.NaN</code> if no values have been added.
443         * Returns 0 if at least one value has been added, but v is not comparable
444         * to the values set.</p>
445         *
446         * @param v the value to lookup
447         * @return the proportion of values less than or equal to v
448         */
449        public double getCumPct(Comparable<?> v) {
450            final long sumFreq = getSumFreq();
451            if (sumFreq == 0) {
452                return Double.NaN;
453            }
454            return (double) getCumFreq(v) / (double) sumFreq;
455        }
456    
457        /**
458         * Returns the cumulative percentage of values less than or equal to v
459         * (as a proportion between 0 and 1).
460         * <p>
461         * Returns 0 if v is not comparable to the values set.</p>
462         *
463         * @param v the value to lookup
464         * @return the proportion of values less than or equal to v
465         */
466        public double getCumPct(int v) {
467            return getCumPct(Long.valueOf(v));
468        }
469    
470        /**
471         * Returns the cumulative percentage of values less than or equal to v
472         * (as a proportion between 0 and 1).
473         * <p>
474         * Returns 0 if v is not comparable to the values set.</p>
475         *
476         * @param v the value to lookup
477         * @return the proportion of values less than or equal to v
478         */
479        public double getCumPct(long v) {
480            return getCumPct(Long.valueOf(v));
481        }
482    
483        /**
484         * Returns the cumulative percentage of values less than or equal to v
485         * (as a proportion between 0 and 1).
486         * <p>
487         * Returns 0 if v is not comparable to the values set.</p>
488         *
489         * @param v the value to lookup
490         * @return the proportion of values less than or equal to v
491         */
492        public double getCumPct(char v) {
493            return getCumPct(Character.valueOf(v));
494        }
495    
496        //----------------------------------------------------------------------------------------------
497    
498        /**
499         * Merge another Frequency object's counts into this instance.
500         * This Frequency's counts will be incremented (or set when not already set)
501         * by the counts represented by other.
502         *
503         * @param other the other {@link Frequency} object to be merged
504         * @throws NullArgumentException if {@code other} is null
505         * @since 3.1
506         */
507        public void merge(final Frequency other) throws NullArgumentException {
508            MathUtils.checkNotNull(other, LocalizedFormats.NULL_NOT_ALLOWED);
509    
510            final Iterator<Map.Entry<Comparable<?>, Long>> iter = other.entrySetIterator();
511            while (iter.hasNext()) {
512                final Map.Entry<Comparable<?>, Long> entry = iter.next();
513                incrementValue(entry.getKey(), entry.getValue());
514            }
515        }
516    
517        /**
518         * Merge a {@link Collection} of {@link Frequency} objects into this instance.
519         * This Frequency's counts will be incremented (or set when not already set)
520         * by the counts represented by each of the others.
521         *
522         * @param others the other {@link Frequency} objects to be merged
523         * @throws NullArgumentException if the collection is null
524         * @since 3.1
525         */
526        public void merge(final Collection<Frequency> others) throws NullArgumentException {
527            MathUtils.checkNotNull(others, LocalizedFormats.NULL_NOT_ALLOWED);
528    
529            for (final Frequency freq : others) {
530                merge(freq);
531            }
532        }
533    
534        //----------------------------------------------------------------------------------------------
535    
536        /**
537         * A Comparator that compares comparable objects using the
538         * natural order.  Copied from Commons Collections ComparableComparator.
539         */
540        private static class NaturalComparator<T extends Comparable<T>> implements Comparator<Comparable<T>>, Serializable {
541    
542            /** Serializable version identifier */
543            private static final long serialVersionUID = -3852193713161395148L;
544    
545            /**
546             * Compare the two {@link Comparable Comparable} arguments.
547             * This method is equivalent to:
548             * <pre>(({@link Comparable Comparable})o1).{@link Comparable#compareTo compareTo}(o2)</pre>
549             *
550             * @param  o1 the first object
551             * @param  o2 the second object
552             * @return  result of comparison
553             * @throws NullPointerException when <i>o1</i> is <code>null</code>,
554             *         or when <code>((Comparable)o1).compareTo(o2)</code> does
555             * @throws ClassCastException when <i>o1</i> is not a {@link Comparable Comparable},
556             *         or when <code>((Comparable)o1).compareTo(o2)</code> does
557             */
558            @SuppressWarnings("unchecked") // cast to (T) may throw ClassCastException, see Javadoc
559            public int compare(Comparable<T> o1, Comparable<T> o2) {
560                return o1.compareTo((T) o2);
561            }
562        }
563    
564        /** {@inheritDoc} */
565        @Override
566        public int hashCode() {
567            final int prime = 31;
568            int result = 1;
569            result = prime * result +
570                     ((freqTable == null) ? 0 : freqTable.hashCode());
571            return result;
572        }
573    
574        /** {@inheritDoc} */
575        @Override
576        public boolean equals(Object obj) {
577            if (this == obj) {
578                return true;
579            }
580            if (!(obj instanceof Frequency)) {
581                return false;
582            }
583            Frequency other = (Frequency) obj;
584            if (freqTable == null) {
585                if (other.freqTable != null) {
586                    return false;
587                }
588            } else if (!freqTable.equals(other.freqTable)) {
589                return false;
590            }
591            return true;
592        }
593    
594    }