001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.commons.math3.stat;
018    
019    import java.io.Serializable;
020    import java.text.NumberFormat;
021    import java.util.Collection;
022    import java.util.Iterator;
023    import java.util.Comparator;
024    import java.util.Map;
025    import java.util.TreeMap;
026    
027    import org.apache.commons.math3.exception.MathIllegalArgumentException;
028    import org.apache.commons.math3.exception.util.LocalizedFormats;
029    
030    /**
031     * Maintains a frequency distribution.
032     * <p>
033     * Accepts int, long, char or Comparable values.  New values added must be
034     * comparable to those that have been added, otherwise the add method will
035     * throw an IllegalArgumentException.</p>
036     * <p>
037     * Integer values (int, long, Integer, Long) are not distinguished by type --
038     * i.e. <code>addValue(Long.valueOf(2)), addValue(2), addValue(2l)</code> all have
039     * the same effect (similarly for arguments to <code>getCount,</code> etc.).</p>
040     * <p>
041     * char values are converted by <code>addValue</code> to Character instances.
042     * As such, these values are not comparable to integral values, so attempts
043     * to combine integral types with chars in a frequency distribution will fail.
044     * </p>
045     * <p>
046     * The values are ordered using the default (natural order), unless a
047     * <code>Comparator</code> is supplied in the constructor.</p>
048     *
049     * @version $Id: Frequency.java 1422313 2012-12-15 18:53:41Z psteitz $
050     */
051    public class Frequency implements Serializable {
052    
053        /** Serializable version identifier */
054        private static final long serialVersionUID = -3845586908418844111L;
055    
056        /** underlying collection */
057        private final TreeMap<Comparable<?>, Long> freqTable;
058    
059        /**
060         * Default constructor.
061         */
062        public Frequency() {
063            freqTable = new TreeMap<Comparable<?>, Long>();
064        }
065    
066        /**
067         * Constructor allowing values Comparator to be specified.
068         *
069         * @param comparator Comparator used to order values
070         */
071        @SuppressWarnings("unchecked") // TODO is the cast OK?
072        public Frequency(Comparator<?> comparator) {
073            freqTable = new TreeMap<Comparable<?>, Long>((Comparator<? super Comparable<?>>) comparator);
074        }
075    
076        /**
077         * Return a string representation of this frequency
078         * distribution.
079         *
080         * @return a string representation.
081         */
082        @Override
083        public String toString() {
084            NumberFormat nf = NumberFormat.getPercentInstance();
085            StringBuilder outBuffer = new StringBuilder();
086            outBuffer.append("Value \t Freq. \t Pct. \t Cum Pct. \n");
087            Iterator<Comparable<?>> iter = freqTable.keySet().iterator();
088            while (iter.hasNext()) {
089                Comparable<?> value = iter.next();
090                outBuffer.append(value);
091                outBuffer.append('\t');
092                outBuffer.append(getCount(value));
093                outBuffer.append('\t');
094                outBuffer.append(nf.format(getPct(value)));
095                outBuffer.append('\t');
096                outBuffer.append(nf.format(getCumPct(value)));
097                outBuffer.append('\n');
098            }
099            return outBuffer.toString();
100        }
101    
102        /**
103         * Adds 1 to the frequency count for v.
104         * <p>
105         * If other objects have already been added to this Frequency, v must
106         * be comparable to those that have already been added.
107         * </p>
108         *
109         * @param v the value to add.
110         * @throws MathIllegalArgumentException if <code>v</code> is not comparable with previous entries
111         */
112        public void addValue(Comparable<?> v) throws MathIllegalArgumentException {
113            incrementValue(v, 1);
114        }
115    
116        /**
117         * Increments the frequency count for v.
118         * <p>
119         * If other objects have already been added to this Frequency, v must
120         * be comparable to those that have already been added.
121         * </p>
122         *
123         * @param v the value to add.
124         * @param increment the amount by which the value should be incremented
125         * @throws IllegalArgumentException if <code>v</code> is not comparable with previous entries
126         * @since 3.1
127         */
128        public void incrementValue(Comparable<?> v, long increment){
129            Comparable<?> obj = v;
130            if (v instanceof Integer) {
131                obj = Long.valueOf(((Integer) v).longValue());
132            }
133            try {
134                Long count = freqTable.get(obj);
135                if (count == null) {
136                    freqTable.put(obj, Long.valueOf(increment));
137                } else {
138                    freqTable.put(obj, Long.valueOf(count.longValue() + increment));
139                }
140            } catch (ClassCastException ex) {
141                //TreeMap will throw ClassCastException if v is not comparable
142                throw new MathIllegalArgumentException(
143                      LocalizedFormats.INSTANCES_NOT_COMPARABLE_TO_EXISTING_VALUES,
144                      v.getClass().getName());
145            }
146        }
147    
148        /**
149         * Adds 1 to the frequency count for v.
150         *
151         * @param v the value to add.
152         * @throws MathIllegalArgumentException if the table contains entries not
153         * comparable to Integer
154         */
155        public void addValue(int v) throws MathIllegalArgumentException {
156            addValue(Long.valueOf(v));
157        }
158    
159        /**
160         * Adds 1 to the frequency count for v.
161         *
162         * @param v the value to add.
163         * @throws MathIllegalArgumentException if the table contains entries not
164         * comparable to Long
165         */
166        public void addValue(long v) throws MathIllegalArgumentException {
167            addValue(Long.valueOf(v));
168        }
169    
170        /**
171         * Adds 1 to the frequency count for v.
172         *
173         * @param v the value to add.
174         * @throws MathIllegalArgumentException if the table contains entries not
175         * comparable to Char
176         */
177        public void addValue(char v) throws MathIllegalArgumentException {
178            addValue(Character.valueOf(v));
179        }
180    
181        /** Clears the frequency table */
182        public void clear() {
183            freqTable.clear();
184        }
185    
186        /**
187         * Returns an Iterator over the set of values that have been added.
188         * <p>
189         * If added values are integral (i.e., integers, longs, Integers, or Longs),
190         * they are converted to Longs when they are added, so the objects returned
191         * by the Iterator will in this case be Longs.</p>
192         *
193         * @return values Iterator
194         */
195        public Iterator<Comparable<?>> valuesIterator() {
196            return freqTable.keySet().iterator();
197        }
198    
199        /**
200         * Return an Iterator over the set of keys and values that have been added.
201         * Using the entry set to iterate is more efficient in the case where you
202         * need to access respective counts as well as values, since it doesn't
203         * require a "get" for every key...the value is provided in the Map.Entry.
204         * <p>
205         * If added values are integral (i.e., integers, longs, Integers, or Longs),
206         * they are converted to Longs when they are added, so the values of the
207         * map entries returned by the Iterator will in this case be Longs.</p>
208         *
209         * @return entry set Iterator
210         * @since 3.1
211         */
212        public Iterator<Map.Entry<Comparable<?>, Long>> entrySetIterator() {
213            return freqTable.entrySet().iterator();
214        }
215    
216        //-------------------------------------------------------------------------
217    
218        /**
219         * Returns the sum of all frequencies.
220         *
221         * @return the total frequency count.
222         */
223        public long getSumFreq() {
224            long result = 0;
225            Iterator<Long> iterator = freqTable.values().iterator();
226            while (iterator.hasNext())  {
227                result += iterator.next().longValue();
228            }
229            return result;
230        }
231    
232        /**
233         * Returns the number of values = v.
234         * Returns 0 if the value is not comparable.
235         *
236         * @param v the value to lookup.
237         * @return the frequency of v.
238         */
239        public long getCount(Comparable<?> v) {
240            if (v instanceof Integer) {
241                return getCount(((Integer) v).longValue());
242            }
243            long result = 0;
244            try {
245                Long count =  freqTable.get(v);
246                if (count != null) {
247                    result = count.longValue();
248                }
249            } catch (ClassCastException ex) { // NOPMD
250                // ignore and return 0 -- ClassCastException will be thrown if value is not comparable
251            }
252            return result;
253        }
254    
255        /**
256         * Returns the number of values = v.
257         *
258         * @param v the value to lookup.
259         * @return the frequency of v.
260         */
261        public long getCount(int v) {
262            return getCount(Long.valueOf(v));
263        }
264    
265        /**
266         * Returns the number of values = v.
267         *
268         * @param v the value to lookup.
269         * @return the frequency of v.
270         */
271        public long getCount(long v) {
272            return getCount(Long.valueOf(v));
273        }
274    
275        /**
276         * Returns the number of values = v.
277         *
278         * @param v the value to lookup.
279         * @return the frequency of v.
280         */
281        public long getCount(char v) {
282            return getCount(Character.valueOf(v));
283        }
284    
285        /**
286         * Returns the number of values in the frequency table.
287         *
288         * @return the number of unique values that have been added to the frequency table.
289         * @see #valuesIterator()
290         */
291        public int getUniqueCount(){
292            return freqTable.keySet().size();
293        }
294    
295        /**
296         * Returns the percentage of values that are equal to v
297         * (as a proportion between 0 and 1).
298         * <p>
299         * Returns <code>Double.NaN</code> if no values have been added.</p>
300         *
301         * @param v the value to lookup
302         * @return the proportion of values equal to v
303         */
304        public double getPct(Comparable<?> v) {
305            final long sumFreq = getSumFreq();
306            if (sumFreq == 0) {
307                return Double.NaN;
308            }
309            return (double) getCount(v) / (double) sumFreq;
310        }
311    
312        /**
313         * Returns the percentage of values that are equal to v
314         * (as a proportion between 0 and 1).
315         *
316         * @param v the value to lookup
317         * @return the proportion of values equal to v
318         */
319        public double getPct(int v) {
320            return getPct(Long.valueOf(v));
321        }
322    
323        /**
324         * Returns the percentage of values that are equal to v
325         * (as a proportion between 0 and 1).
326         *
327         * @param v the value to lookup
328         * @return the proportion of values equal to v
329         */
330        public double getPct(long v) {
331            return getPct(Long.valueOf(v));
332        }
333    
334        /**
335         * Returns the percentage of values that are equal to v
336         * (as a proportion between 0 and 1).
337         *
338         * @param v the value to lookup
339         * @return the proportion of values equal to v
340         */
341        public double getPct(char v) {
342            return getPct(Character.valueOf(v));
343        }
344    
345        //-----------------------------------------------------------------------------------------
346    
347        /**
348         * Returns the cumulative frequency of values less than or equal to v.
349         * <p>
350         * Returns 0 if v is not comparable to the values set.</p>
351         *
352         * @param v the value to lookup.
353         * @return the proportion of values equal to v
354         */
355        @SuppressWarnings({ "rawtypes", "unchecked" })
356        public long getCumFreq(Comparable<?> v) {
357            if (getSumFreq() == 0) {
358                return 0;
359            }
360            if (v instanceof Integer) {
361                return getCumFreq(((Integer) v).longValue());
362            }
363            Comparator<Comparable<?>> c = (Comparator<Comparable<?>>) freqTable.comparator();
364            if (c == null) {
365                c = new NaturalComparator();
366            }
367            long result = 0;
368    
369            try {
370                Long value = freqTable.get(v);
371                if (value != null) {
372                    result = value.longValue();
373                }
374            } catch (ClassCastException ex) {
375                return result;   // v is not comparable
376            }
377    
378            if (c.compare(v, freqTable.firstKey()) < 0) {
379                return 0;  // v is comparable, but less than first value
380            }
381    
382            if (c.compare(v, freqTable.lastKey()) >= 0) {
383                return getSumFreq();    // v is comparable, but greater than the last value
384            }
385    
386            Iterator<Comparable<?>> values = valuesIterator();
387            while (values.hasNext()) {
388                Comparable<?> nextValue = values.next();
389                if (c.compare(v, nextValue) > 0) {
390                    result += getCount(nextValue);
391                } else {
392                    return result;
393                }
394            }
395            return result;
396        }
397    
398         /**
399         * Returns the cumulative frequency of values less than or equal to v.
400         * <p>
401         * Returns 0 if v is not comparable to the values set.</p>
402         *
403         * @param v the value to lookup
404         * @return the proportion of values equal to v
405         */
406        public long getCumFreq(int v) {
407            return getCumFreq(Long.valueOf(v));
408        }
409    
410         /**
411         * Returns the cumulative frequency of values less than or equal to v.
412         * <p>
413         * Returns 0 if v is not comparable to the values set.</p>
414         *
415         * @param v the value to lookup
416         * @return the proportion of values equal to v
417         */
418        public long getCumFreq(long v) {
419            return getCumFreq(Long.valueOf(v));
420        }
421    
422        /**
423         * Returns the cumulative frequency of values less than or equal to v.
424         * <p>
425         * Returns 0 if v is not comparable to the values set.</p>
426         *
427         * @param v the value to lookup
428         * @return the proportion of values equal to v
429         */
430        public long getCumFreq(char v) {
431            return getCumFreq(Character.valueOf(v));
432        }
433    
434        //----------------------------------------------------------------------------------------------
435    
436        /**
437         * Returns the cumulative percentage of values less than or equal to v
438         * (as a proportion between 0 and 1).
439         * <p>
440         * Returns <code>Double.NaN</code> if no values have been added.
441         * Returns 0 if at least one value has been added, but v is not comparable
442         * to the values set.</p>
443         *
444         * @param v the value to lookup
445         * @return the proportion of values less than or equal to v
446         */
447        public double getCumPct(Comparable<?> v) {
448            final long sumFreq = getSumFreq();
449            if (sumFreq == 0) {
450                return Double.NaN;
451            }
452            return (double) getCumFreq(v) / (double) sumFreq;
453        }
454    
455        /**
456         * Returns the cumulative percentage of values less than or equal to v
457         * (as a proportion between 0 and 1).
458         * <p>
459         * Returns 0 if v is not comparable to the values set.</p>
460         *
461         * @param v the value to lookup
462         * @return the proportion of values less than or equal to v
463         */
464        public double getCumPct(int v) {
465            return getCumPct(Long.valueOf(v));
466        }
467    
468        /**
469         * Returns the cumulative percentage of values less than or equal to v
470         * (as a proportion between 0 and 1).
471         * <p>
472         * Returns 0 if v is not comparable to the values set.</p>
473         *
474         * @param v the value to lookup
475         * @return the proportion of values less than or equal to v
476         */
477        public double getCumPct(long v) {
478            return getCumPct(Long.valueOf(v));
479        }
480    
481        /**
482         * Returns the cumulative percentage of values less than or equal to v
483         * (as a proportion between 0 and 1).
484         * <p>
485         * Returns 0 if v is not comparable to the values set.</p>
486         *
487         * @param v the value to lookup
488         * @return the proportion of values less than or equal to v
489         */
490        public double getCumPct(char v) {
491            return getCumPct(Character.valueOf(v));
492        }
493    
494        //----------------------------------------------------------------------------------------------
495    
496        /**
497         * Merge another Frequency object's counts into this instance.
498         * This Frequency's counts will be incremented (or set when not already set)
499         * by the counts represented by other.
500         *
501         * @param other the other {@link Frequency} object to be merged
502         * @since 3.1
503         */
504        public void merge(Frequency other) {
505            for (Iterator<Map.Entry<Comparable<?>, Long>> iter = other.entrySetIterator(); iter.hasNext();) {
506                Map.Entry<Comparable<?>, Long> entry = iter.next();
507                incrementValue(entry.getKey(), entry.getValue());
508            }
509        }
510    
511        /**
512         * Merge a {@link Collection} of {@link Frequency} objects into this instance.
513         * This Frequency's counts will be incremented (or set when not already set)
514         * by the counts represented by each of the others.
515         *
516         * @param others the other {@link Frequency} objects to be merged
517         * @since 3.1
518         */
519        public void merge(Collection<Frequency> others) {
520            for (Iterator<Frequency> iter = others.iterator(); iter.hasNext();) {
521                merge(iter.next());
522            }
523        }
524    
525        //----------------------------------------------------------------------------------------------
526    
527        /**
528         * A Comparator that compares comparable objects using the
529         * natural order.  Copied from Commons Collections ComparableComparator.
530         */
531        private static class NaturalComparator<T extends Comparable<T>> implements Comparator<Comparable<T>>, Serializable {
532    
533            /** Serializable version identifier */
534            private static final long serialVersionUID = -3852193713161395148L;
535    
536            /**
537             * Compare the two {@link Comparable Comparable} arguments.
538             * This method is equivalent to:
539             * <pre>(({@link Comparable Comparable})o1).{@link Comparable#compareTo compareTo}(o2)</pre>
540             *
541             * @param  o1 the first object
542             * @param  o2 the second object
543             * @return  result of comparison
544             * @throws NullPointerException when <i>o1</i> is <code>null</code>,
545             *         or when <code>((Comparable)o1).compareTo(o2)</code> does
546             * @throws ClassCastException when <i>o1</i> is not a {@link Comparable Comparable},
547             *         or when <code>((Comparable)o1).compareTo(o2)</code> does
548             */
549            @SuppressWarnings("unchecked") // cast to (T) may throw ClassCastException, see Javadoc
550            public int compare(Comparable<T> o1, Comparable<T> o2) {
551                return o1.compareTo((T) o2);
552            }
553        }
554    
555        /** {@inheritDoc} */
556        @Override
557        public int hashCode() {
558            final int prime = 31;
559            int result = 1;
560            result = prime * result +
561                     ((freqTable == null) ? 0 : freqTable.hashCode());
562            return result;
563        }
564    
565        /** {@inheritDoc} */
566        @Override
567        public boolean equals(Object obj) {
568            if (this == obj) {
569                return true;
570            }
571            if (!(obj instanceof Frequency)) {
572                return false;
573            }
574            Frequency other = (Frequency) obj;
575            if (freqTable == null) {
576                if (other.freqTable != null) {
577                    return false;
578                }
579            } else if (!freqTable.equals(other.freqTable)) {
580                return false;
581            }
582            return true;
583        }
584    
585    }