1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.math.stat;
18
19 import java.io.Serializable;
20 import java.text.NumberFormat;
21 import java.util.Iterator;
22 import java.util.Comparator;
23 import java.util.TreeMap;
24
25 import org.apache.commons.math.exception.MathIllegalArgumentException;
26 import org.apache.commons.math.exception.util.LocalizedFormats;
27
28 /**
29 * Maintains a frequency distribution.
30 * <p>
31 * Accepts int, long, char or Comparable values. New values added must be
32 * comparable to those that have been added, otherwise the add method will
33 * throw an IllegalArgumentException.</p>
34 * <p>
35 * Integer values (int, long, Integer, Long) are not distinguished by type --
36 * i.e. <code>addValue(Long.valueOf(2)), addValue(2), addValue(2l)</code> all have
37 * the same effect (similarly for arguments to <code>getCount,</code> etc.).</p>
38 * <p>
39 * char values are converted by <code>addValue</code> to Character instances.
40 * As such, these values are not comparable to integral values, so attempts
41 * to combine integral types with chars in a frequency distribution will fail.
42 * </p>
43 * <p>
44 * The values are ordered using the default (natural order), unless a
45 * <code>Comparator</code> is supplied in the constructor.</p>
46 *
47 * @version $Id: Frequency.java 1178223 2011-10-02 19:03:49Z psteitz $
48 */
49 public class Frequency implements Serializable {
50
51 /** Serializable version identifier */
52 private static final long serialVersionUID = -3845586908418844111L;
53
54 /** underlying collection */
55 private final TreeMap<Comparable<?>, Long> freqTable;
56
57 /**
58 * Default constructor.
59 */
60 public Frequency() {
61 freqTable = new TreeMap<Comparable<?>, Long>();
62 }
63
64 /**
65 * Constructor allowing values Comparator to be specified.
66 *
67 * @param comparator Comparator used to order values
68 */
69 @SuppressWarnings("unchecked") // TODO is the cast OK?
70 public Frequency(Comparator<?> comparator) {
71 freqTable = new TreeMap<Comparable<?>, Long>((Comparator<? super Comparable<?>>) comparator);
72 }
73
74 /**
75 * Return a string representation of this frequency
76 * distribution.
77 *
78 * @return a string representation.
79 */
80 @Override
81 public String toString() {
82 NumberFormat nf = NumberFormat.getPercentInstance();
83 StringBuilder outBuffer = new StringBuilder();
84 outBuffer.append("Value \t Freq. \t Pct. \t Cum Pct. \n");
85 Iterator<Comparable<?>> iter = freqTable.keySet().iterator();
86 while (iter.hasNext()) {
87 Comparable<?> value = iter.next();
88 outBuffer.append(value);
89 outBuffer.append('\t');
90 outBuffer.append(getCount(value));
91 outBuffer.append('\t');
92 outBuffer.append(nf.format(getPct(value)));
93 outBuffer.append('\t');
94 outBuffer.append(nf.format(getCumPct(value)));
95 outBuffer.append('\n');
96 }
97 return outBuffer.toString();
98 }
99
100 /**
101 * Adds 1 to the frequency count for v.
102 * <p>
103 * If other objects have already been added to this Frequency, v must
104 * be comparable to those that have already been added.
105 * </p>
106 *
107 * @param v the value to add.
108 * @throws IllegalArgumentException if <code>v</code> is not comparable with previous entries
109 */
110 public void addValue(Comparable<?> v){
111 Comparable<?> obj = v;
112 if (v instanceof Integer) {
113 obj = Long.valueOf(((Integer) v).longValue());
114 }
115 try {
116 Long count = freqTable.get(obj);
117 if (count == null) {
118 freqTable.put(obj, Long.valueOf(1));
119 } else {
120 freqTable.put(obj, Long.valueOf(count.longValue() + 1));
121 }
122 } catch (ClassCastException ex) {
123 //TreeMap will throw ClassCastException if v is not comparable
124 throw new MathIllegalArgumentException(
125 LocalizedFormats.INSTANCES_NOT_COMPARABLE_TO_EXISTING_VALUES,
126 v.getClass().getName());
127 }
128 }
129
130 /**
131 * Adds 1 to the frequency count for v.
132 *
133 * @param v the value to add.
134 */
135 public void addValue(int v) {
136 addValue(Long.valueOf(v));
137 }
138
139 /**
140 * Adds 1 to the frequency count for v.
141 *
142 * @param v the value to add.
143 */
144 public void addValue(long v) {
145 addValue(Long.valueOf(v));
146 }
147
148 /**
149 * Adds 1 to the frequency count for v.
150 *
151 * @param v the value to add.
152 */
153 public void addValue(char v) {
154 addValue(Character.valueOf(v));
155 }
156
157 /** Clears the frequency table */
158 public void clear() {
159 freqTable.clear();
160 }
161
162 /**
163 * Returns an Iterator over the set of values that have been added.
164 * <p>
165 * If added values are integral (i.e., integers, longs, Integers, or Longs),
166 * they are converted to Longs when they are added, so the objects returned
167 * by the Iterator will in this case be Longs.</p>
168 *
169 * @return values Iterator
170 */
171 public Iterator<Comparable<?>> valuesIterator() {
172 return freqTable.keySet().iterator();
173 }
174
175 //-------------------------------------------------------------------------
176
177 /**
178 * Returns the sum of all frequencies.
179 *
180 * @return the total frequency count.
181 */
182 public long getSumFreq() {
183 long result = 0;
184 Iterator<Long> iterator = freqTable.values().iterator();
185 while (iterator.hasNext()) {
186 result += iterator.next().longValue();
187 }
188 return result;
189 }
190
191 /**
192 * Returns the number of values = v.
193 * Returns 0 if the value is not comparable.
194 *
195 * @param v the value to lookup.
196 * @return the frequency of v.
197 */
198 public long getCount(Comparable<?> v) {
199 if (v instanceof Integer) {
200 return getCount(((Integer) v).longValue());
201 }
202 long result = 0;
203 try {
204 Long count = freqTable.get(v);
205 if (count != null) {
206 result = count.longValue();
207 }
208 } catch (ClassCastException ex) {
209 // ignore and return 0 -- ClassCastException will be thrown if value is not comparable
210 }
211 return result;
212 }
213
214 /**
215 * Returns the number of values = v.
216 *
217 * @param v the value to lookup.
218 * @return the frequency of v.
219 */
220 public long getCount(int v) {
221 return getCount(Long.valueOf(v));
222 }
223
224 /**
225 * Returns the number of values = v.
226 *
227 * @param v the value to lookup.
228 * @return the frequency of v.
229 */
230 public long getCount(long v) {
231 return getCount(Long.valueOf(v));
232 }
233
234 /**
235 * Returns the number of values = v.
236 *
237 * @param v the value to lookup.
238 * @return the frequency of v.
239 */
240 public long getCount(char v) {
241 return getCount(Character.valueOf(v));
242 }
243
244 /**
245 * Returns the number of values in the frequency table.
246 *
247 * @return the number of unique values that have been added to the frequency table.
248 * @see #valuesIterator()
249 */
250 public int getUniqueCount(){
251 return freqTable.keySet().size();
252 }
253
254 /**
255 * Returns the percentage of values that are equal to v
256 * (as a proportion between 0 and 1).
257 * <p>
258 * Returns <code>Double.NaN</code> if no values have been added.</p>
259 *
260 * @param v the value to lookup
261 * @return the proportion of values equal to v
262 */
263 public double getPct(Comparable<?> v) {
264 final long sumFreq = getSumFreq();
265 if (sumFreq == 0) {
266 return Double.NaN;
267 }
268 return (double) getCount(v) / (double) sumFreq;
269 }
270
271 /**
272 * Returns the percentage of values that are equal to v
273 * (as a proportion between 0 and 1).
274 *
275 * @param v the value to lookup
276 * @return the proportion of values equal to v
277 */
278 public double getPct(int v) {
279 return getPct(Long.valueOf(v));
280 }
281
282 /**
283 * Returns the percentage of values that are equal to v
284 * (as a proportion between 0 and 1).
285 *
286 * @param v the value to lookup
287 * @return the proportion of values equal to v
288 */
289 public double getPct(long v) {
290 return getPct(Long.valueOf(v));
291 }
292
293 /**
294 * Returns the percentage of values that are equal to v
295 * (as a proportion between 0 and 1).
296 *
297 * @param v the value to lookup
298 * @return the proportion of values equal to v
299 */
300 public double getPct(char v) {
301 return getPct(Character.valueOf(v));
302 }
303
304 //-----------------------------------------------------------------------------------------
305
306 /**
307 * Returns the cumulative frequency of values less than or equal to v.
308 * <p>
309 * Returns 0 if v is not comparable to the values set.</p>
310 *
311 * @param v the value to lookup.
312 * @return the proportion of values equal to v
313 */
314 public long getCumFreq(Comparable<?> v) {
315 if (getSumFreq() == 0) {
316 return 0;
317 }
318 if (v instanceof Integer) {
319 return getCumFreq(((Integer) v).longValue());
320 }
321 @SuppressWarnings("unchecked") // OK, freqTable is Comparable<?>
322 Comparator<Comparable<?>> c = (Comparator<Comparable<?>>) freqTable.comparator();
323 if (c == null) {
324 c = new NaturalComparator();
325 }
326 long result = 0;
327
328 try {
329 Long value = freqTable.get(v);
330 if (value != null) {
331 result = value.longValue();
332 }
333 } catch (ClassCastException ex) {
334 return result; // v is not comparable
335 }
336
337 if (c.compare(v, freqTable.firstKey()) < 0) {
338 return 0; // v is comparable, but less than first value
339 }
340
341 if (c.compare(v, freqTable.lastKey()) >= 0) {
342 return getSumFreq(); // v is comparable, but greater than the last value
343 }
344
345 Iterator<Comparable<?>> values = valuesIterator();
346 while (values.hasNext()) {
347 Comparable<?> nextValue = values.next();
348 if (c.compare(v, nextValue) > 0) {
349 result += getCount(nextValue);
350 } else {
351 return result;
352 }
353 }
354 return result;
355 }
356
357 /**
358 * Returns the cumulative frequency of values less than or equal to v.
359 * <p>
360 * Returns 0 if v is not comparable to the values set.</p>
361 *
362 * @param v the value to lookup
363 * @return the proportion of values equal to v
364 */
365 public long getCumFreq(int v) {
366 return getCumFreq(Long.valueOf(v));
367 }
368
369 /**
370 * Returns the cumulative frequency of values less than or equal to v.
371 * <p>
372 * Returns 0 if v is not comparable to the values set.</p>
373 *
374 * @param v the value to lookup
375 * @return the proportion of values equal to v
376 */
377 public long getCumFreq(long v) {
378 return getCumFreq(Long.valueOf(v));
379 }
380
381 /**
382 * Returns the cumulative frequency of values less than or equal to v.
383 * <p>
384 * Returns 0 if v is not comparable to the values set.</p>
385 *
386 * @param v the value to lookup
387 * @return the proportion of values equal to v
388 */
389 public long getCumFreq(char v) {
390 return getCumFreq(Character.valueOf(v));
391 }
392
393 //----------------------------------------------------------------------------------------------
394
395 /**
396 * Returns the cumulative percentage of values less than or equal to v
397 * (as a proportion between 0 and 1).
398 * <p>
399 * Returns <code>Double.NaN</code> if no values have been added.
400 * Returns 0 if at least one value has been added, but v is not comparable
401 * to the values set.</p>
402 *
403 * @param v the value to lookup
404 * @return the proportion of values less than or equal to v
405 */
406 public double getCumPct(Comparable<?> v) {
407 final long sumFreq = getSumFreq();
408 if (sumFreq == 0) {
409 return Double.NaN;
410 }
411 return (double) getCumFreq(v) / (double) sumFreq;
412 }
413
414 /**
415 * Returns the cumulative percentage of values less than or equal to v
416 * (as a proportion between 0 and 1).
417 * <p>
418 * Returns 0 if v is not comparable to the values set.</p>
419 *
420 * @param v the value to lookup
421 * @return the proportion of values less than or equal to v
422 */
423 public double getCumPct(int v) {
424 return getCumPct(Long.valueOf(v));
425 }
426
427 /**
428 * Returns the cumulative percentage of values less than or equal to v
429 * (as a proportion between 0 and 1).
430 * <p>
431 * Returns 0 if v is not comparable to the values set.</p>
432 *
433 * @param v the value to lookup
434 * @return the proportion of values less than or equal to v
435 */
436 public double getCumPct(long v) {
437 return getCumPct(Long.valueOf(v));
438 }
439
440 /**
441 * Returns the cumulative percentage of values less than or equal to v
442 * (as a proportion between 0 and 1).
443 * <p>
444 * Returns 0 if v is not comparable to the values set.</p>
445 *
446 * @param v the value to lookup
447 * @return the proportion of values less than or equal to v
448 */
449 public double getCumPct(char v) {
450 return getCumPct(Character.valueOf(v));
451 }
452
453 /**
454 * A Comparator that compares comparable objects using the
455 * natural order. Copied from Commons Collections ComparableComparator.
456 */
457 private static class NaturalComparator<T extends Comparable<T>> implements Comparator<Comparable<T>>, Serializable {
458
459 /** Serializable version identifier */
460 private static final long serialVersionUID = -3852193713161395148L;
461
462 /**
463 * Compare the two {@link Comparable Comparable} arguments.
464 * This method is equivalent to:
465 * <pre>(({@link Comparable Comparable})o1).{@link Comparable#compareTo compareTo}(o2)</pre>
466 *
467 * @param o1 the first object
468 * @param o2 the second object
469 * @return result of comparison
470 * @throws NullPointerException when <i>o1</i> is <code>null</code>,
471 * or when <code>((Comparable)o1).compareTo(o2)</code> does
472 * @throws ClassCastException when <i>o1</i> is not a {@link Comparable Comparable},
473 * or when <code>((Comparable)o1).compareTo(o2)</code> does
474 */
475 @SuppressWarnings("unchecked") // cast to (T) may throw ClassCastException, see Javadoc
476 public int compare(Comparable<T> o1, Comparable<T> o2) {
477 return o1.compareTo((T) o2);
478 }
479 }
480
481 /** {@inheritDoc} */
482 @Override
483 public int hashCode() {
484 final int prime = 31;
485 int result = 1;
486 result = prime * result +
487 ((freqTable == null) ? 0 : freqTable.hashCode());
488 return result;
489 }
490
491 /** {@inheritDoc} */
492 @Override
493 public boolean equals(Object obj) {
494 if (this == obj) {
495 return true;
496 }
497 if (!(obj instanceof Frequency)) {
498 return false;
499 }
500 Frequency other = (Frequency) obj;
501 if (freqTable == null) {
502 if (other.freqTable != null) {
503 return false;
504 }
505 } else if (!freqTable.equals(other.freqTable)) {
506 return false;
507 }
508 return true;
509 }
510
511 }