View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.math4.legacy.stat;
18  
19  import java.text.NumberFormat;
20  import java.util.ArrayList;
21  import java.util.Collection;
22  import java.util.Comparator;
23  import java.util.Iterator;
24  import java.util.List;
25  import java.util.Map;
26  import java.util.Map.Entry;
27  import java.util.SortedMap;
28  import java.util.TreeMap;
29  
30  import org.apache.commons.math4.legacy.exception.NullArgumentException;
31  import org.apache.commons.math4.legacy.exception.util.LocalizedFormats;
32  
33  /**
34   * Maintains a frequency distribution.
35   *
36   * <p>The values are ordered using the default (natural order), unless a
37   * <code>Comparator</code> is supplied in the constructor.</p>
38   *
39   * @param <T> a comparable type used in the frequency distribution
40   */
41  public class Frequency<T extends Comparable<T>> {
42      /** underlying collection. */
43      private final SortedMap<T, Long> freqTable;
44  
45      /**
46       * Default constructor.
47       */
48      public Frequency() {
49          freqTable = new TreeMap<>();
50      }
51  
52      /**
53       * Constructor allowing values Comparator to be specified.
54       *
55       * @param comparator Comparator used to order values
56       */
57      public Frequency(Comparator<T> comparator) {
58          freqTable = new TreeMap<>(comparator);
59      }
60  
61      /**
62       * Return a string representation of this frequency distribution.
63       *
64       * @return a string representation.
65       */
66      @Override
67      public String toString() {
68          NumberFormat nf = NumberFormat.getPercentInstance();
69          StringBuilder outBuffer = new StringBuilder();
70          outBuffer.append("Value \t Freq. \t Pct. \t Cum Pct. \n");
71          Iterator<T> iter = freqTable.keySet().iterator();
72          while (iter.hasNext()) {
73              T value = iter.next();
74              outBuffer.append(value);
75              outBuffer.append('\t');
76              outBuffer.append(getCount(value));
77              outBuffer.append('\t');
78              outBuffer.append(nf.format(getPct(value)));
79              outBuffer.append('\t');
80              outBuffer.append(nf.format(getCumPct(value)));
81              outBuffer.append('\n');
82          }
83          return outBuffer.toString();
84      }
85  
86      /**
87       * Adds 1 to the frequency count for v.
88       *
89       * @param v the value to add.
90       */
91      public void addValue(T v) {
92          incrementValue(v, 1);
93      }
94  
95      /**
96       * Increments the frequency count for v.
97       *
98       * @param v the value to add.
99       * @param increment the amount by which the value should be incremented
100      * @since 3.1
101      */
102     public void incrementValue(T v, long increment) {
103         Long count = freqTable.get(v);
104         if (count == null) {
105             freqTable.put(v, Long.valueOf(increment));
106         } else {
107             freqTable.put(v, Long.valueOf(count.longValue() + increment));
108         }
109     }
110 
111     /** Clears the frequency table. */
112     public void clear() {
113         freqTable.clear();
114     }
115 
116     /**
117      * Returns an Iterator over the set of values that have been added.
118      *
119      * @return values Iterator
120      */
121     public Iterator<T> valuesIterator() {
122         return freqTable.keySet().iterator();
123     }
124 
125     /**
126      * Return an Iterator over the set of keys and values that have been added.
127      * Using the entry set to iterate is more efficient in the case where you
128      * need to access respective counts as well as values, since it doesn't
129      * require a "get" for every key...the value is provided in the Map.Entry.
130      *
131      * @return entry set Iterator
132      * @since 3.1
133      */
134     public Iterator<Map.Entry<T, Long>> entrySetIterator() {
135         return freqTable.entrySet().iterator();
136     }
137 
138     //-------------------------------------------------------------------------
139 
140     /**
141      * Returns the sum of all frequencies.
142      *
143      * @return the total frequency count.
144      */
145     public long getSumFreq() {
146         long result = 0;
147         Iterator<Long> iterator = freqTable.values().iterator();
148         while (iterator.hasNext())  {
149             result += iterator.next().longValue();
150         }
151         return result;
152     }
153 
154     /**
155      * Returns the number of values equal to v.
156      *
157      * @param v the value to lookup.
158      * @return the frequency of v.
159      */
160     public long getCount(T v) {
161         long result = 0;
162         Long count =  freqTable.get(v);
163         if (count != null) {
164             result = count.longValue();
165         }
166         return result;
167     }
168 
169     /**
170      * Returns the number of values in the frequency table.
171      *
172      * @return the number of unique values that have been added to the frequency table.
173      * @see #valuesIterator()
174      */
175     public int getUniqueCount(){
176         return freqTable.keySet().size();
177     }
178 
179     /**
180      * Returns the percentage of values that are equal to v
181      * (as a proportion between 0 and 1).
182      * <p>
183      * Returns <code>Double.NaN</code> if no values have been added.
184      * </p>
185      *
186      * @param v the value to lookup
187      * @return the proportion of values equal to v
188      */
189     public double getPct(T v) {
190         final long sumFreq = getSumFreq();
191         if (sumFreq == 0) {
192             return Double.NaN;
193         }
194         return (double) getCount(v) / (double) sumFreq;
195     }
196 
197     //-----------------------------------------------------------------------------------------
198 
199     /**
200      * Returns the cumulative frequency of values less than or equal to v.
201      *
202      * @param v the value to lookup.
203      * @return the proportion of values equal to v
204      */
205     public long getCumFreq(T v) {
206         if (getSumFreq() == 0) {
207             return 0;
208         }
209         Comparator<? super T> c = freqTable.comparator();
210         if (c == null) {
211             c = new NaturalComparator<>();
212         }
213         long result = 0;
214 
215         Long value = freqTable.get(v);
216         if (value != null) {
217             result = value.longValue();
218         }
219 
220         if (c.compare(v, freqTable.firstKey()) < 0) {
221             return 0;  // v is comparable, but less than first value
222         }
223 
224         if (c.compare(v, freqTable.lastKey()) >= 0) {
225             return getSumFreq();    // v is comparable, but greater than the last value
226         }
227 
228         Iterator<T> values = valuesIterator();
229         while (values.hasNext()) {
230             T nextValue = values.next();
231             if (c.compare(v, nextValue) > 0) {
232                 result += getCount(nextValue);
233             } else {
234                 return result;
235             }
236         }
237         return result;
238     }
239 
240     //----------------------------------------------------------------------------------------------
241 
242     /**
243      * Returns the cumulative percentage of values less than or equal to v
244      * (as a proportion between 0 and 1).
245      * <p>
246      * Returns <code>Double.NaN</code> if no values have been added.
247      * </p>
248      *
249      * @param v the value to lookup
250      * @return the proportion of values less than or equal to v
251      */
252     public double getCumPct(T v) {
253         final long sumFreq = getSumFreq();
254         if (sumFreq == 0) {
255             return Double.NaN;
256         }
257         return (double) getCumFreq(v) / (double) sumFreq;
258     }
259 
260     /**
261      * Returns the mode value(s) in comparator order.
262      *
263      * @return a list containing the value(s) which appear most often.
264      * @since 3.3
265      */
266     public List<T> getMode() {
267         long mostPopular = 0; // frequencies are always positive
268 
269         // Get the max count first, so we avoid having to recreate the List each time
270         for(Long l : freqTable.values()) {
271             long frequency = l.longValue();
272             if (frequency > mostPopular) {
273                 mostPopular = frequency;
274             }
275         }
276 
277         List<T> modeList = new ArrayList<>();
278         for (Entry<T, Long> ent : freqTable.entrySet()) {
279             long frequency = ent.getValue().longValue();
280             if (frequency == mostPopular) {
281                modeList.add(ent.getKey());
282             }
283         }
284         return modeList;
285     }
286 
287     //----------------------------------------------------------------------------------------------
288 
289     /**
290      * Merge another Frequency object's counts into this instance.
291      * This Frequency's counts will be incremented (or set when not already set)
292      * by the counts represented by other.
293      *
294      * @param other the other {@link Frequency} object to be merged
295      * @throws NullArgumentException if {@code other} is null
296      * @since 3.1
297      */
298     public void merge(final Frequency<T> other) throws NullArgumentException {
299         NullArgumentException.check(other, LocalizedFormats.NULL_NOT_ALLOWED);
300 
301         final Iterator<Map.Entry<T, Long>> iter = other.entrySetIterator();
302         while (iter.hasNext()) {
303             final Map.Entry<T, Long> entry = iter.next();
304             incrementValue(entry.getKey(), entry.getValue().longValue());
305         }
306     }
307 
308     /**
309      * Merge a {@link Collection} of {@link Frequency} objects into this instance.
310      * This Frequency's counts will be incremented (or set when not already set)
311      * by the counts represented by each of the others.
312      *
313      * @param others the other {@link Frequency} objects to be merged
314      * @throws NullArgumentException if the collection is null
315      * @since 3.1
316      */
317     public void merge(final Collection<Frequency<T>> others) throws NullArgumentException {
318         NullArgumentException.check(others, LocalizedFormats.NULL_NOT_ALLOWED);
319 
320         for (final Frequency<T> freq : others) {
321             merge(freq);
322         }
323     }
324 
325     //----------------------------------------------------------------------------------------------
326 
327     /**
328      * A Comparator that compares comparable objects using the
329      * natural order. Copied from Commons Collections ComparableComparator.
330      *
331      * @param <U> the type of the objects compared
332      */
333     private static final class NaturalComparator<U extends Comparable<U>> implements Comparator<U> {
334         /**
335          * Compare the two {@link Comparable Comparable} arguments.
336          * This method is equivalent to:
337          * <pre>(({@link Comparable Comparable})o1).{@link Comparable#compareTo compareTo}(o2)</pre>
338          *
339          * @param  o1 the first object
340          * @param  o2 the second object
341          * @return  result of comparison
342          * @throws NullPointerException when <i>o1</i> is <code>null</code>,
343          *         or when <code>((Comparable)o1).compareTo(o2)</code> does
344          */
345         @Override
346         public int compare(U o1, U o2) {
347             return o1.compareTo(o2);
348         }
349     }
350 
351     /** {@inheritDoc} */
352     @Override
353     public int hashCode() {
354         final int prime = 31;
355         int result = 1;
356         result = prime * result +
357                  ((freqTable == null) ? 0 : freqTable.hashCode());
358         return result;
359     }
360 
361     /** {@inheritDoc} */
362     @Override
363     public boolean equals(Object obj) {
364         if (this == obj) {
365             return true;
366         }
367         if (!(obj instanceof Frequency<?>)) {
368             return false;
369         }
370         Frequency<?> other = (Frequency<?>) obj;
371         return freqTable.equals(other.freqTable);
372     }
373 }