001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.math4.stat;
018
019import java.io.Serializable;
020import java.text.NumberFormat;
021import java.util.ArrayList;
022import java.util.Collection;
023import java.util.Comparator;
024import java.util.Iterator;
025import java.util.List;
026import java.util.Map;
027import java.util.Map.Entry;
028import java.util.SortedMap;
029import java.util.TreeMap;
030
031import org.apache.commons.math4.exception.NullArgumentException;
032import org.apache.commons.math4.exception.util.LocalizedFormats;
033import org.apache.commons.math4.util.MathUtils;
034
035/**
036 * Maintains a frequency distribution.
037 *
038 * <p>The values are ordered using the default (natural order), unless a
039 * <code>Comparator</code> is supplied in the constructor.</p>
040 *
041 * @param <T> a comparable type used in the frequency distribution
042 */
043public class Frequency<T extends Comparable<T>> implements Serializable {
044
045    /** Serializable version identifier */
046    private static final long serialVersionUID = 605878194679954450L;
047    /** underlying collection */
048    private final SortedMap<T, Long> freqTable;
049
050    /**
051     * Default constructor.
052     */
053    public Frequency() {
054        freqTable = new TreeMap<>();
055    }
056
057    /**
058     * Constructor allowing values Comparator to be specified.
059     *
060     * @param comparator Comparator used to order values
061     */
062    public Frequency(Comparator<T> comparator) {
063        freqTable = new TreeMap<>(comparator);
064    }
065
066    /**
067     * Return a string representation of this frequency distribution.
068     *
069     * @return a string representation.
070     */
071    @Override
072    public String toString() {
073        NumberFormat nf = NumberFormat.getPercentInstance();
074        StringBuilder outBuffer = new StringBuilder();
075        outBuffer.append("Value \t Freq. \t Pct. \t Cum Pct. \n");
076        Iterator<T> iter = freqTable.keySet().iterator();
077        while (iter.hasNext()) {
078            T value = iter.next();
079            outBuffer.append(value);
080            outBuffer.append('\t');
081            outBuffer.append(getCount(value));
082            outBuffer.append('\t');
083            outBuffer.append(nf.format(getPct(value)));
084            outBuffer.append('\t');
085            outBuffer.append(nf.format(getCumPct(value)));
086            outBuffer.append('\n');
087        }
088        return outBuffer.toString();
089    }
090
091    /**
092     * Adds 1 to the frequency count for v.
093     *
094     * @param v the value to add.
095     */
096    public void addValue(T v) {
097        incrementValue(v, 1);
098    }
099
100    /**
101     * Increments the frequency count for v.
102     *
103     * @param v the value to add.
104     * @param increment the amount by which the value should be incremented
105     * @since 3.1
106     */
107    public void incrementValue(T v, long increment) {
108        Long count = freqTable.get(v);
109        if (count == null) {
110            freqTable.put(v, Long.valueOf(increment));
111        } else {
112            freqTable.put(v, Long.valueOf(count.longValue() + increment));
113        }
114    }
115
116    /** Clears the frequency table */
117    public void clear() {
118        freqTable.clear();
119    }
120
121    /**
122     * Returns an Iterator over the set of values that have been added.
123     *
124     * @return values Iterator
125     */
126    public Iterator<T> valuesIterator() {
127        return freqTable.keySet().iterator();
128    }
129
130    /**
131     * Return an Iterator over the set of keys and values that have been added.
132     * Using the entry set to iterate is more efficient in the case where you
133     * need to access respective counts as well as values, since it doesn't
134     * require a "get" for every key...the value is provided in the Map.Entry.
135     *
136     * @return entry set Iterator
137     * @since 3.1
138     */
139    public Iterator<Map.Entry<T, Long>> entrySetIterator() {
140        return freqTable.entrySet().iterator();
141    }
142
143    //-------------------------------------------------------------------------
144
145    /**
146     * Returns the sum of all frequencies.
147     *
148     * @return the total frequency count.
149     */
150    public long getSumFreq() {
151        long result = 0;
152        Iterator<Long> iterator = freqTable.values().iterator();
153        while (iterator.hasNext())  {
154            result += iterator.next().longValue();
155        }
156        return result;
157    }
158
159    /**
160     * Returns the number of values equal to v.
161     *
162     * @param v the value to lookup.
163     * @return the frequency of v.
164     */
165    public long getCount(T v) {
166        long result = 0;
167        Long count =  freqTable.get(v);
168        if (count != null) {
169            result = count.longValue();
170        }
171        return result;
172    }
173
174    /**
175     * Returns the number of values in the frequency table.
176     *
177     * @return the number of unique values that have been added to the frequency table.
178     * @see #valuesIterator()
179     */
180    public int getUniqueCount(){
181        return freqTable.keySet().size();
182    }
183
184    /**
185     * Returns the percentage of values that are equal to v
186     * (as a proportion between 0 and 1).
187     * <p>
188     * Returns <code>Double.NaN</code> if no values have been added.
189     * </p>
190     *
191     * @param v the value to lookup
192     * @return the proportion of values equal to v
193     */
194    public double getPct(T v) {
195        final long sumFreq = getSumFreq();
196        if (sumFreq == 0) {
197            return Double.NaN;
198        }
199        return (double) getCount(v) / (double) sumFreq;
200    }
201
202    //-----------------------------------------------------------------------------------------
203
204    /**
205     * Returns the cumulative frequency of values less than or equal to v.
206     *
207     * @param v the value to lookup.
208     * @return the proportion of values equal to v
209     */
210    public long getCumFreq(T v) {
211        if (getSumFreq() == 0) {
212            return 0;
213        }
214        Comparator<? super T> c = freqTable.comparator();
215        if (c == null) {
216            c = new NaturalComparator<T>();
217        }
218        long result = 0;
219
220        Long value = freqTable.get(v);
221        if (value != null) {
222            result = value.longValue();
223        }
224
225        if (c.compare(v, freqTable.firstKey()) < 0) {
226            return 0;  // v is comparable, but less than first value
227        }
228
229        if (c.compare(v, freqTable.lastKey()) >= 0) {
230            return getSumFreq();    // v is comparable, but greater than the last value
231        }
232
233        Iterator<T> values = valuesIterator();
234        while (values.hasNext()) {
235            T nextValue = values.next();
236            if (c.compare(v, nextValue) > 0) {
237                result += getCount(nextValue);
238            } else {
239                return result;
240            }
241        }
242        return result;
243    }
244
245    //----------------------------------------------------------------------------------------------
246
247    /**
248     * Returns the cumulative percentage of values less than or equal to v
249     * (as a proportion between 0 and 1).
250     * <p>
251     * Returns <code>Double.NaN</code> if no values have been added.
252     * </p>
253     *
254     * @param v the value to lookup
255     * @return the proportion of values less than or equal to v
256     */
257    public double getCumPct(T v) {
258        final long sumFreq = getSumFreq();
259        if (sumFreq == 0) {
260            return Double.NaN;
261        }
262        return (double) getCumFreq(v) / (double) sumFreq;
263    }
264
265    /**
266     * Returns the mode value(s) in comparator order.
267     *
268     * @return a list containing the value(s) which appear most often.
269     * @since 3.3
270     */
271    public List<T> getMode() {
272        long mostPopular = 0; // frequencies are always positive
273
274        // Get the max count first, so we avoid having to recreate the List each time
275        for(Long l : freqTable.values()) {
276            long frequency = l.longValue();
277            if (frequency > mostPopular) {
278                mostPopular = frequency;
279            }
280        }
281
282        List<T> modeList = new ArrayList<>();
283        for (Entry<T, Long> ent : freqTable.entrySet()) {
284            long frequency = ent.getValue().longValue();
285            if (frequency == mostPopular) {
286               modeList.add(ent.getKey());
287            }
288        }
289        return modeList;
290    }
291
292    //----------------------------------------------------------------------------------------------
293
294    /**
295     * Merge another Frequency object's counts into this instance.
296     * This Frequency's counts will be incremented (or set when not already set)
297     * by the counts represented by other.
298     *
299     * @param other the other {@link Frequency} object to be merged
300     * @throws NullArgumentException if {@code other} is null
301     * @since 3.1
302     */
303    public void merge(final Frequency<T> other) throws NullArgumentException {
304        MathUtils.checkNotNull(other, LocalizedFormats.NULL_NOT_ALLOWED);
305
306        final Iterator<Map.Entry<T, Long>> iter = other.entrySetIterator();
307        while (iter.hasNext()) {
308            final Map.Entry<T, Long> entry = iter.next();
309            incrementValue(entry.getKey(), entry.getValue().longValue());
310        }
311    }
312
313    /**
314     * Merge a {@link Collection} of {@link Frequency} objects into this instance.
315     * This Frequency's counts will be incremented (or set when not already set)
316     * by the counts represented by each of the others.
317     *
318     * @param others the other {@link Frequency} objects to be merged
319     * @throws NullArgumentException if the collection is null
320     * @since 3.1
321     */
322    public void merge(final Collection<Frequency<T>> others) throws NullArgumentException {
323        MathUtils.checkNotNull(others, LocalizedFormats.NULL_NOT_ALLOWED);
324
325        for (final Frequency<T> freq : others) {
326            merge(freq);
327        }
328    }
329
330    //----------------------------------------------------------------------------------------------
331
332    /**
333     * A Comparator that compares comparable objects using the
334     * natural order. Copied from Commons Collections ComparableComparator.
335     *
336     * @param <U> the type of the objects compared
337     */
338    private static class NaturalComparator<U extends Comparable<U>> implements Comparator<U>, Serializable {
339
340        /** Serializable version identifier */
341        private static final long serialVersionUID = -3852193713161395148L;
342
343        /**
344         * Compare the two {@link Comparable Comparable} arguments.
345         * This method is equivalent to:
346         * <pre>(({@link Comparable Comparable})o1).{@link Comparable#compareTo compareTo}(o2)</pre>
347         *
348         * @param  o1 the first object
349         * @param  o2 the second object
350         * @return  result of comparison
351         * @throws NullPointerException when <i>o1</i> is <code>null</code>,
352         *         or when <code>((Comparable)o1).compareTo(o2)</code> does
353         */
354        @Override
355        public int compare(U o1, U o2) {
356            return o1.compareTo(o2);
357        }
358    }
359
360    /** {@inheritDoc} */
361    @Override
362    public int hashCode() {
363        final int prime = 31;
364        int result = 1;
365        result = prime * result +
366                 ((freqTable == null) ? 0 : freqTable.hashCode());
367        return result;
368    }
369
370    /** {@inheritDoc} */
371    @Override
372    public boolean equals(Object obj) {
373        if (this == obj) {
374            return true;
375        }
376        if (!(obj instanceof Frequency<?>)) {
377            return false;
378        }
379        Frequency<?> other = (Frequency<?>) obj;
380        if (freqTable == null) {
381            if (other.freqTable != null) {
382                return false;
383            }
384        } else if (!freqTable.equals(other.freqTable)) {
385            return false;
386        }
387        return true;
388    }
389
390}