001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017 package org.apache.commons.math3.stat;
018
019 import java.io.Serializable;
020 import java.text.NumberFormat;
021 import java.util.Collection;
022 import java.util.Iterator;
023 import java.util.Comparator;
024 import java.util.Map;
025 import java.util.TreeMap;
026
027 import org.apache.commons.math3.exception.MathIllegalArgumentException;
028 import org.apache.commons.math3.exception.NullArgumentException;
029 import org.apache.commons.math3.exception.util.LocalizedFormats;
030 import org.apache.commons.math3.util.MathUtils;
031
032 /**
033 * Maintains a frequency distribution.
034 * <p>
035 * Accepts int, long, char or Comparable values. New values added must be
036 * comparable to those that have been added, otherwise the add method will
037 * throw an IllegalArgumentException.</p>
038 * <p>
039 * Integer values (int, long, Integer, Long) are not distinguished by type --
040 * i.e. <code>addValue(Long.valueOf(2)), addValue(2), addValue(2l)</code> all have
041 * the same effect (similarly for arguments to <code>getCount,</code> etc.).</p>
042 * <p>
043 * char values are converted by <code>addValue</code> to Character instances.
044 * As such, these values are not comparable to integral values, so attempts
045 * to combine integral types with chars in a frequency distribution will fail.
046 * </p>
047 * <p>
048 * The values are ordered using the default (natural order), unless a
049 * <code>Comparator</code> is supplied in the constructor.</p>
050 *
051 * @version $Id: Frequency.java 1455703 2013-03-12 20:46:23Z tn $
052 */
053 public class Frequency implements Serializable {
054
055 /** Serializable version identifier */
056 private static final long serialVersionUID = -3845586908418844111L;
057
058 /** underlying collection */
059 private final TreeMap<Comparable<?>, Long> freqTable;
060
061 /**
062 * Default constructor.
063 */
064 public Frequency() {
065 freqTable = new TreeMap<Comparable<?>, Long>();
066 }
067
068 /**
069 * Constructor allowing values Comparator to be specified.
070 *
071 * @param comparator Comparator used to order values
072 */
073 @SuppressWarnings("unchecked") // TODO is the cast OK?
074 public Frequency(Comparator<?> comparator) {
075 freqTable = new TreeMap<Comparable<?>, Long>((Comparator<? super Comparable<?>>) comparator);
076 }
077
078 /**
079 * Return a string representation of this frequency
080 * distribution.
081 *
082 * @return a string representation.
083 */
084 @Override
085 public String toString() {
086 NumberFormat nf = NumberFormat.getPercentInstance();
087 StringBuilder outBuffer = new StringBuilder();
088 outBuffer.append("Value \t Freq. \t Pct. \t Cum Pct. \n");
089 Iterator<Comparable<?>> iter = freqTable.keySet().iterator();
090 while (iter.hasNext()) {
091 Comparable<?> value = iter.next();
092 outBuffer.append(value);
093 outBuffer.append('\t');
094 outBuffer.append(getCount(value));
095 outBuffer.append('\t');
096 outBuffer.append(nf.format(getPct(value)));
097 outBuffer.append('\t');
098 outBuffer.append(nf.format(getCumPct(value)));
099 outBuffer.append('\n');
100 }
101 return outBuffer.toString();
102 }
103
104 /**
105 * Adds 1 to the frequency count for v.
106 * <p>
107 * If other objects have already been added to this Frequency, v must
108 * be comparable to those that have already been added.
109 * </p>
110 *
111 * @param v the value to add.
112 * @throws MathIllegalArgumentException if <code>v</code> is not comparable with previous entries
113 */
114 public void addValue(Comparable<?> v) throws MathIllegalArgumentException {
115 incrementValue(v, 1);
116 }
117
118 /**
119 * Increments the frequency count for v.
120 * <p>
121 * If other objects have already been added to this Frequency, v must
122 * be comparable to those that have already been added.
123 * </p>
124 *
125 * @param v the value to add.
126 * @param increment the amount by which the value should be incremented
127 * @throws IllegalArgumentException if <code>v</code> is not comparable with previous entries
128 * @since 3.1
129 */
130 public void incrementValue(Comparable<?> v, long increment){
131 Comparable<?> obj = v;
132 if (v instanceof Integer) {
133 obj = Long.valueOf(((Integer) v).longValue());
134 }
135 try {
136 Long count = freqTable.get(obj);
137 if (count == null) {
138 freqTable.put(obj, Long.valueOf(increment));
139 } else {
140 freqTable.put(obj, Long.valueOf(count.longValue() + increment));
141 }
142 } catch (ClassCastException ex) {
143 //TreeMap will throw ClassCastException if v is not comparable
144 throw new MathIllegalArgumentException(
145 LocalizedFormats.INSTANCES_NOT_COMPARABLE_TO_EXISTING_VALUES,
146 v.getClass().getName());
147 }
148 }
149
150 /**
151 * Adds 1 to the frequency count for v.
152 *
153 * @param v the value to add.
154 * @throws MathIllegalArgumentException if the table contains entries not
155 * comparable to Integer
156 */
157 public void addValue(int v) throws MathIllegalArgumentException {
158 addValue(Long.valueOf(v));
159 }
160
161 /**
162 * Adds 1 to the frequency count for v.
163 *
164 * @param v the value to add.
165 * @throws MathIllegalArgumentException if the table contains entries not
166 * comparable to Long
167 */
168 public void addValue(long v) throws MathIllegalArgumentException {
169 addValue(Long.valueOf(v));
170 }
171
172 /**
173 * Adds 1 to the frequency count for v.
174 *
175 * @param v the value to add.
176 * @throws MathIllegalArgumentException if the table contains entries not
177 * comparable to Char
178 */
179 public void addValue(char v) throws MathIllegalArgumentException {
180 addValue(Character.valueOf(v));
181 }
182
183 /** Clears the frequency table */
184 public void clear() {
185 freqTable.clear();
186 }
187
188 /**
189 * Returns an Iterator over the set of values that have been added.
190 * <p>
191 * If added values are integral (i.e., integers, longs, Integers, or Longs),
192 * they are converted to Longs when they are added, so the objects returned
193 * by the Iterator will in this case be Longs.</p>
194 *
195 * @return values Iterator
196 */
197 public Iterator<Comparable<?>> valuesIterator() {
198 return freqTable.keySet().iterator();
199 }
200
201 /**
202 * Return an Iterator over the set of keys and values that have been added.
203 * Using the entry set to iterate is more efficient in the case where you
204 * need to access respective counts as well as values, since it doesn't
205 * require a "get" for every key...the value is provided in the Map.Entry.
206 * <p>
207 * If added values are integral (i.e., integers, longs, Integers, or Longs),
208 * they are converted to Longs when they are added, so the values of the
209 * map entries returned by the Iterator will in this case be Longs.</p>
210 *
211 * @return entry set Iterator
212 * @since 3.1
213 */
214 public Iterator<Map.Entry<Comparable<?>, Long>> entrySetIterator() {
215 return freqTable.entrySet().iterator();
216 }
217
218 //-------------------------------------------------------------------------
219
220 /**
221 * Returns the sum of all frequencies.
222 *
223 * @return the total frequency count.
224 */
225 public long getSumFreq() {
226 long result = 0;
227 Iterator<Long> iterator = freqTable.values().iterator();
228 while (iterator.hasNext()) {
229 result += iterator.next().longValue();
230 }
231 return result;
232 }
233
234 /**
235 * Returns the number of values = v.
236 * Returns 0 if the value is not comparable.
237 *
238 * @param v the value to lookup.
239 * @return the frequency of v.
240 */
241 public long getCount(Comparable<?> v) {
242 if (v instanceof Integer) {
243 return getCount(((Integer) v).longValue());
244 }
245 long result = 0;
246 try {
247 Long count = freqTable.get(v);
248 if (count != null) {
249 result = count.longValue();
250 }
251 } catch (ClassCastException ex) { // NOPMD
252 // ignore and return 0 -- ClassCastException will be thrown if value is not comparable
253 }
254 return result;
255 }
256
257 /**
258 * Returns the number of values = v.
259 *
260 * @param v the value to lookup.
261 * @return the frequency of v.
262 */
263 public long getCount(int v) {
264 return getCount(Long.valueOf(v));
265 }
266
267 /**
268 * Returns the number of values = v.
269 *
270 * @param v the value to lookup.
271 * @return the frequency of v.
272 */
273 public long getCount(long v) {
274 return getCount(Long.valueOf(v));
275 }
276
277 /**
278 * Returns the number of values = v.
279 *
280 * @param v the value to lookup.
281 * @return the frequency of v.
282 */
283 public long getCount(char v) {
284 return getCount(Character.valueOf(v));
285 }
286
287 /**
288 * Returns the number of values in the frequency table.
289 *
290 * @return the number of unique values that have been added to the frequency table.
291 * @see #valuesIterator()
292 */
293 public int getUniqueCount(){
294 return freqTable.keySet().size();
295 }
296
297 /**
298 * Returns the percentage of values that are equal to v
299 * (as a proportion between 0 and 1).
300 * <p>
301 * Returns <code>Double.NaN</code> if no values have been added.</p>
302 *
303 * @param v the value to lookup
304 * @return the proportion of values equal to v
305 */
306 public double getPct(Comparable<?> v) {
307 final long sumFreq = getSumFreq();
308 if (sumFreq == 0) {
309 return Double.NaN;
310 }
311 return (double) getCount(v) / (double) sumFreq;
312 }
313
314 /**
315 * Returns the percentage of values that are equal to v
316 * (as a proportion between 0 and 1).
317 *
318 * @param v the value to lookup
319 * @return the proportion of values equal to v
320 */
321 public double getPct(int v) {
322 return getPct(Long.valueOf(v));
323 }
324
325 /**
326 * Returns the percentage of values that are equal to v
327 * (as a proportion between 0 and 1).
328 *
329 * @param v the value to lookup
330 * @return the proportion of values equal to v
331 */
332 public double getPct(long v) {
333 return getPct(Long.valueOf(v));
334 }
335
336 /**
337 * Returns the percentage of values that are equal to v
338 * (as a proportion between 0 and 1).
339 *
340 * @param v the value to lookup
341 * @return the proportion of values equal to v
342 */
343 public double getPct(char v) {
344 return getPct(Character.valueOf(v));
345 }
346
347 //-----------------------------------------------------------------------------------------
348
349 /**
350 * Returns the cumulative frequency of values less than or equal to v.
351 * <p>
352 * Returns 0 if v is not comparable to the values set.</p>
353 *
354 * @param v the value to lookup.
355 * @return the proportion of values equal to v
356 */
357 @SuppressWarnings({ "rawtypes", "unchecked" })
358 public long getCumFreq(Comparable<?> v) {
359 if (getSumFreq() == 0) {
360 return 0;
361 }
362 if (v instanceof Integer) {
363 return getCumFreq(((Integer) v).longValue());
364 }
365 Comparator<Comparable<?>> c = (Comparator<Comparable<?>>) freqTable.comparator();
366 if (c == null) {
367 c = new NaturalComparator();
368 }
369 long result = 0;
370
371 try {
372 Long value = freqTable.get(v);
373 if (value != null) {
374 result = value.longValue();
375 }
376 } catch (ClassCastException ex) {
377 return result; // v is not comparable
378 }
379
380 if (c.compare(v, freqTable.firstKey()) < 0) {
381 return 0; // v is comparable, but less than first value
382 }
383
384 if (c.compare(v, freqTable.lastKey()) >= 0) {
385 return getSumFreq(); // v is comparable, but greater than the last value
386 }
387
388 Iterator<Comparable<?>> values = valuesIterator();
389 while (values.hasNext()) {
390 Comparable<?> nextValue = values.next();
391 if (c.compare(v, nextValue) > 0) {
392 result += getCount(nextValue);
393 } else {
394 return result;
395 }
396 }
397 return result;
398 }
399
400 /**
401 * Returns the cumulative frequency of values less than or equal to v.
402 * <p>
403 * Returns 0 if v is not comparable to the values set.</p>
404 *
405 * @param v the value to lookup
406 * @return the proportion of values equal to v
407 */
408 public long getCumFreq(int v) {
409 return getCumFreq(Long.valueOf(v));
410 }
411
412 /**
413 * Returns the cumulative frequency of values less than or equal to v.
414 * <p>
415 * Returns 0 if v is not comparable to the values set.</p>
416 *
417 * @param v the value to lookup
418 * @return the proportion of values equal to v
419 */
420 public long getCumFreq(long v) {
421 return getCumFreq(Long.valueOf(v));
422 }
423
424 /**
425 * Returns the cumulative frequency of values less than or equal to v.
426 * <p>
427 * Returns 0 if v is not comparable to the values set.</p>
428 *
429 * @param v the value to lookup
430 * @return the proportion of values equal to v
431 */
432 public long getCumFreq(char v) {
433 return getCumFreq(Character.valueOf(v));
434 }
435
436 //----------------------------------------------------------------------------------------------
437
438 /**
439 * Returns the cumulative percentage of values less than or equal to v
440 * (as a proportion between 0 and 1).
441 * <p>
442 * Returns <code>Double.NaN</code> if no values have been added.
443 * Returns 0 if at least one value has been added, but v is not comparable
444 * to the values set.</p>
445 *
446 * @param v the value to lookup
447 * @return the proportion of values less than or equal to v
448 */
449 public double getCumPct(Comparable<?> v) {
450 final long sumFreq = getSumFreq();
451 if (sumFreq == 0) {
452 return Double.NaN;
453 }
454 return (double) getCumFreq(v) / (double) sumFreq;
455 }
456
457 /**
458 * Returns the cumulative percentage of values less than or equal to v
459 * (as a proportion between 0 and 1).
460 * <p>
461 * Returns 0 if v is not comparable to the values set.</p>
462 *
463 * @param v the value to lookup
464 * @return the proportion of values less than or equal to v
465 */
466 public double getCumPct(int v) {
467 return getCumPct(Long.valueOf(v));
468 }
469
470 /**
471 * Returns the cumulative percentage of values less than or equal to v
472 * (as a proportion between 0 and 1).
473 * <p>
474 * Returns 0 if v is not comparable to the values set.</p>
475 *
476 * @param v the value to lookup
477 * @return the proportion of values less than or equal to v
478 */
479 public double getCumPct(long v) {
480 return getCumPct(Long.valueOf(v));
481 }
482
483 /**
484 * Returns the cumulative percentage of values less than or equal to v
485 * (as a proportion between 0 and 1).
486 * <p>
487 * Returns 0 if v is not comparable to the values set.</p>
488 *
489 * @param v the value to lookup
490 * @return the proportion of values less than or equal to v
491 */
492 public double getCumPct(char v) {
493 return getCumPct(Character.valueOf(v));
494 }
495
496 //----------------------------------------------------------------------------------------------
497
498 /**
499 * Merge another Frequency object's counts into this instance.
500 * This Frequency's counts will be incremented (or set when not already set)
501 * by the counts represented by other.
502 *
503 * @param other the other {@link Frequency} object to be merged
504 * @throws NullArgumentException if {@code other} is null
505 * @since 3.1
506 */
507 public void merge(final Frequency other) throws NullArgumentException {
508 MathUtils.checkNotNull(other, LocalizedFormats.NULL_NOT_ALLOWED);
509
510 final Iterator<Map.Entry<Comparable<?>, Long>> iter = other.entrySetIterator();
511 while (iter.hasNext()) {
512 final Map.Entry<Comparable<?>, Long> entry = iter.next();
513 incrementValue(entry.getKey(), entry.getValue());
514 }
515 }
516
517 /**
518 * Merge a {@link Collection} of {@link Frequency} objects into this instance.
519 * This Frequency's counts will be incremented (or set when not already set)
520 * by the counts represented by each of the others.
521 *
522 * @param others the other {@link Frequency} objects to be merged
523 * @throws NullArgumentException if the collection is null
524 * @since 3.1
525 */
526 public void merge(final Collection<Frequency> others) throws NullArgumentException {
527 MathUtils.checkNotNull(others, LocalizedFormats.NULL_NOT_ALLOWED);
528
529 for (final Frequency freq : others) {
530 merge(freq);
531 }
532 }
533
534 //----------------------------------------------------------------------------------------------
535
536 /**
537 * A Comparator that compares comparable objects using the
538 * natural order. Copied from Commons Collections ComparableComparator.
539 */
540 private static class NaturalComparator<T extends Comparable<T>> implements Comparator<Comparable<T>>, Serializable {
541
542 /** Serializable version identifier */
543 private static final long serialVersionUID = -3852193713161395148L;
544
545 /**
546 * Compare the two {@link Comparable Comparable} arguments.
547 * This method is equivalent to:
548 * <pre>(({@link Comparable Comparable})o1).{@link Comparable#compareTo compareTo}(o2)</pre>
549 *
550 * @param o1 the first object
551 * @param o2 the second object
552 * @return result of comparison
553 * @throws NullPointerException when <i>o1</i> is <code>null</code>,
554 * or when <code>((Comparable)o1).compareTo(o2)</code> does
555 * @throws ClassCastException when <i>o1</i> is not a {@link Comparable Comparable},
556 * or when <code>((Comparable)o1).compareTo(o2)</code> does
557 */
558 @SuppressWarnings("unchecked") // cast to (T) may throw ClassCastException, see Javadoc
559 public int compare(Comparable<T> o1, Comparable<T> o2) {
560 return o1.compareTo((T) o2);
561 }
562 }
563
564 /** {@inheritDoc} */
565 @Override
566 public int hashCode() {
567 final int prime = 31;
568 int result = 1;
569 result = prime * result +
570 ((freqTable == null) ? 0 : freqTable.hashCode());
571 return result;
572 }
573
574 /** {@inheritDoc} */
575 @Override
576 public boolean equals(Object obj) {
577 if (this == obj) {
578 return true;
579 }
580 if (!(obj instanceof Frequency)) {
581 return false;
582 }
583 Frequency other = (Frequency) obj;
584 if (freqTable == null) {
585 if (other.freqTable != null) {
586 return false;
587 }
588 } else if (!freqTable.equals(other.freqTable)) {
589 return false;
590 }
591 return true;
592 }
593
594 }