1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.math4.legacy.stat;
18
19 import java.text.NumberFormat;
20 import java.util.ArrayList;
21 import java.util.Collection;
22 import java.util.Comparator;
23 import java.util.Iterator;
24 import java.util.List;
25 import java.util.Map;
26 import java.util.Map.Entry;
27 import java.util.SortedMap;
28 import java.util.TreeMap;
29
30 import org.apache.commons.math4.legacy.exception.NullArgumentException;
31 import org.apache.commons.math4.legacy.exception.util.LocalizedFormats;
32
33 /**
34 * Maintains a frequency distribution.
35 *
36 * <p>The values are ordered using the default (natural order), unless a
37 * <code>Comparator</code> is supplied in the constructor.</p>
38 *
39 * @param <T> a comparable type used in the frequency distribution
40 */
41 public class Frequency<T extends Comparable<T>> {
42 /** underlying collection. */
43 private final SortedMap<T, Long> freqTable;
44
45 /**
46 * Default constructor.
47 */
48 public Frequency() {
49 freqTable = new TreeMap<>();
50 }
51
52 /**
53 * Constructor allowing values Comparator to be specified.
54 *
55 * @param comparator Comparator used to order values
56 */
57 public Frequency(Comparator<T> comparator) {
58 freqTable = new TreeMap<>(comparator);
59 }
60
61 /**
62 * Return a string representation of this frequency distribution.
63 *
64 * @return a string representation.
65 */
66 @Override
67 public String toString() {
68 NumberFormat nf = NumberFormat.getPercentInstance();
69 StringBuilder outBuffer = new StringBuilder();
70 outBuffer.append("Value \t Freq. \t Pct. \t Cum Pct. \n");
71 Iterator<T> iter = freqTable.keySet().iterator();
72 while (iter.hasNext()) {
73 T value = iter.next();
74 outBuffer.append(value);
75 outBuffer.append('\t');
76 outBuffer.append(getCount(value));
77 outBuffer.append('\t');
78 outBuffer.append(nf.format(getPct(value)));
79 outBuffer.append('\t');
80 outBuffer.append(nf.format(getCumPct(value)));
81 outBuffer.append('\n');
82 }
83 return outBuffer.toString();
84 }
85
86 /**
87 * Adds 1 to the frequency count for v.
88 *
89 * @param v the value to add.
90 */
91 public void addValue(T v) {
92 incrementValue(v, 1);
93 }
94
95 /**
96 * Increments the frequency count for v.
97 *
98 * @param v the value to add.
99 * @param increment the amount by which the value should be incremented
100 * @since 3.1
101 */
102 public void incrementValue(T v, long increment) {
103 Long count = freqTable.get(v);
104 if (count == null) {
105 freqTable.put(v, Long.valueOf(increment));
106 } else {
107 freqTable.put(v, Long.valueOf(count.longValue() + increment));
108 }
109 }
110
111 /** Clears the frequency table. */
112 public void clear() {
113 freqTable.clear();
114 }
115
116 /**
117 * Returns an Iterator over the set of values that have been added.
118 *
119 * @return values Iterator
120 */
121 public Iterator<T> valuesIterator() {
122 return freqTable.keySet().iterator();
123 }
124
125 /**
126 * Return an Iterator over the set of keys and values that have been added.
127 * Using the entry set to iterate is more efficient in the case where you
128 * need to access respective counts as well as values, since it doesn't
129 * require a "get" for every key...the value is provided in the Map.Entry.
130 *
131 * @return entry set Iterator
132 * @since 3.1
133 */
134 public Iterator<Map.Entry<T, Long>> entrySetIterator() {
135 return freqTable.entrySet().iterator();
136 }
137
138 //-------------------------------------------------------------------------
139
140 /**
141 * Returns the sum of all frequencies.
142 *
143 * @return the total frequency count.
144 */
145 public long getSumFreq() {
146 long result = 0;
147 Iterator<Long> iterator = freqTable.values().iterator();
148 while (iterator.hasNext()) {
149 result += iterator.next().longValue();
150 }
151 return result;
152 }
153
154 /**
155 * Returns the number of values equal to v.
156 *
157 * @param v the value to lookup.
158 * @return the frequency of v.
159 */
160 public long getCount(T v) {
161 long result = 0;
162 Long count = freqTable.get(v);
163 if (count != null) {
164 result = count.longValue();
165 }
166 return result;
167 }
168
169 /**
170 * Returns the number of values in the frequency table.
171 *
172 * @return the number of unique values that have been added to the frequency table.
173 * @see #valuesIterator()
174 */
175 public int getUniqueCount(){
176 return freqTable.keySet().size();
177 }
178
179 /**
180 * Returns the percentage of values that are equal to v
181 * (as a proportion between 0 and 1).
182 * <p>
183 * Returns <code>Double.NaN</code> if no values have been added.
184 * </p>
185 *
186 * @param v the value to lookup
187 * @return the proportion of values equal to v
188 */
189 public double getPct(T v) {
190 final long sumFreq = getSumFreq();
191 if (sumFreq == 0) {
192 return Double.NaN;
193 }
194 return (double) getCount(v) / (double) sumFreq;
195 }
196
197 //-----------------------------------------------------------------------------------------
198
199 /**
200 * Returns the cumulative frequency of values less than or equal to v.
201 *
202 * @param v the value to lookup.
203 * @return the proportion of values equal to v
204 */
205 public long getCumFreq(T v) {
206 if (getSumFreq() == 0) {
207 return 0;
208 }
209 Comparator<? super T> c = freqTable.comparator();
210 if (c == null) {
211 c = new NaturalComparator<>();
212 }
213 long result = 0;
214
215 Long value = freqTable.get(v);
216 if (value != null) {
217 result = value.longValue();
218 }
219
220 if (c.compare(v, freqTable.firstKey()) < 0) {
221 return 0; // v is comparable, but less than first value
222 }
223
224 if (c.compare(v, freqTable.lastKey()) >= 0) {
225 return getSumFreq(); // v is comparable, but greater than the last value
226 }
227
228 Iterator<T> values = valuesIterator();
229 while (values.hasNext()) {
230 T nextValue = values.next();
231 if (c.compare(v, nextValue) > 0) {
232 result += getCount(nextValue);
233 } else {
234 return result;
235 }
236 }
237 return result;
238 }
239
240 //----------------------------------------------------------------------------------------------
241
242 /**
243 * Returns the cumulative percentage of values less than or equal to v
244 * (as a proportion between 0 and 1).
245 * <p>
246 * Returns <code>Double.NaN</code> if no values have been added.
247 * </p>
248 *
249 * @param v the value to lookup
250 * @return the proportion of values less than or equal to v
251 */
252 public double getCumPct(T v) {
253 final long sumFreq = getSumFreq();
254 if (sumFreq == 0) {
255 return Double.NaN;
256 }
257 return (double) getCumFreq(v) / (double) sumFreq;
258 }
259
260 /**
261 * Returns the mode value(s) in comparator order.
262 *
263 * @return a list containing the value(s) which appear most often.
264 * @since 3.3
265 */
266 public List<T> getMode() {
267 long mostPopular = 0; // frequencies are always positive
268
269 // Get the max count first, so we avoid having to recreate the List each time
270 for(Long l : freqTable.values()) {
271 long frequency = l.longValue();
272 if (frequency > mostPopular) {
273 mostPopular = frequency;
274 }
275 }
276
277 List<T> modeList = new ArrayList<>();
278 for (Entry<T, Long> ent : freqTable.entrySet()) {
279 long frequency = ent.getValue().longValue();
280 if (frequency == mostPopular) {
281 modeList.add(ent.getKey());
282 }
283 }
284 return modeList;
285 }
286
287 //----------------------------------------------------------------------------------------------
288
289 /**
290 * Merge another Frequency object's counts into this instance.
291 * This Frequency's counts will be incremented (or set when not already set)
292 * by the counts represented by other.
293 *
294 * @param other the other {@link Frequency} object to be merged
295 * @throws NullArgumentException if {@code other} is null
296 * @since 3.1
297 */
298 public void merge(final Frequency<T> other) throws NullArgumentException {
299 NullArgumentException.check(other, LocalizedFormats.NULL_NOT_ALLOWED);
300
301 final Iterator<Map.Entry<T, Long>> iter = other.entrySetIterator();
302 while (iter.hasNext()) {
303 final Map.Entry<T, Long> entry = iter.next();
304 incrementValue(entry.getKey(), entry.getValue().longValue());
305 }
306 }
307
308 /**
309 * Merge a {@link Collection} of {@link Frequency} objects into this instance.
310 * This Frequency's counts will be incremented (or set when not already set)
311 * by the counts represented by each of the others.
312 *
313 * @param others the other {@link Frequency} objects to be merged
314 * @throws NullArgumentException if the collection is null
315 * @since 3.1
316 */
317 public void merge(final Collection<Frequency<T>> others) throws NullArgumentException {
318 NullArgumentException.check(others, LocalizedFormats.NULL_NOT_ALLOWED);
319
320 for (final Frequency<T> freq : others) {
321 merge(freq);
322 }
323 }
324
325 //----------------------------------------------------------------------------------------------
326
327 /**
328 * A Comparator that compares comparable objects using the
329 * natural order. Copied from Commons Collections ComparableComparator.
330 *
331 * @param <U> the type of the objects compared
332 */
333 private static final class NaturalComparator<U extends Comparable<U>> implements Comparator<U> {
334 /**
335 * Compare the two {@link Comparable Comparable} arguments.
336 * This method is equivalent to:
337 * <pre>(({@link Comparable Comparable})o1).{@link Comparable#compareTo compareTo}(o2)</pre>
338 *
339 * @param o1 the first object
340 * @param o2 the second object
341 * @return result of comparison
342 * @throws NullPointerException when <i>o1</i> is <code>null</code>,
343 * or when <code>((Comparable)o1).compareTo(o2)</code> does
344 */
345 @Override
346 public int compare(U o1, U o2) {
347 return o1.compareTo(o2);
348 }
349 }
350
351 /** {@inheritDoc} */
352 @Override
353 public int hashCode() {
354 final int prime = 31;
355 int result = 1;
356 result = prime * result +
357 ((freqTable == null) ? 0 : freqTable.hashCode());
358 return result;
359 }
360
361 /** {@inheritDoc} */
362 @Override
363 public boolean equals(Object obj) {
364 if (this == obj) {
365 return true;
366 }
367 if (!(obj instanceof Frequency<?>)) {
368 return false;
369 }
370 Frequency<?> other = (Frequency<?>) obj;
371 return freqTable.equals(other.freqTable);
372 }
373 }