Frequency.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.apache.commons.math4.legacy.stat;

  18. import java.text.NumberFormat;
  19. import java.util.ArrayList;
  20. import java.util.Collection;
  21. import java.util.Comparator;
  22. import java.util.Iterator;
  23. import java.util.List;
  24. import java.util.Map;
  25. import java.util.Map.Entry;
  26. import java.util.SortedMap;
  27. import java.util.TreeMap;

  28. import org.apache.commons.math4.legacy.exception.NullArgumentException;
  29. import org.apache.commons.math4.legacy.exception.util.LocalizedFormats;

  30. /**
  31.  * Maintains a frequency distribution.
  32.  *
  33.  * <p>The values are ordered using the default (natural order), unless a
  34.  * <code>Comparator</code> is supplied in the constructor.</p>
  35.  *
  36.  * @param <T> a comparable type used in the frequency distribution
  37.  */
  38. public class Frequency<T extends Comparable<T>> {
  39.     /** underlying collection. */
  40.     private final SortedMap<T, Long> freqTable;

  41.     /**
  42.      * Default constructor.
  43.      */
  44.     public Frequency() {
  45.         freqTable = new TreeMap<>();
  46.     }

  47.     /**
  48.      * Constructor allowing values Comparator to be specified.
  49.      *
  50.      * @param comparator Comparator used to order values
  51.      */
  52.     public Frequency(Comparator<T> comparator) {
  53.         freqTable = new TreeMap<>(comparator);
  54.     }

  55.     /**
  56.      * Return a string representation of this frequency distribution.
  57.      *
  58.      * @return a string representation.
  59.      */
  60.     @Override
  61.     public String toString() {
  62.         NumberFormat nf = NumberFormat.getPercentInstance();
  63.         StringBuilder outBuffer = new StringBuilder();
  64.         outBuffer.append("Value \t Freq. \t Pct. \t Cum Pct. \n");
  65.         Iterator<T> iter = freqTable.keySet().iterator();
  66.         while (iter.hasNext()) {
  67.             T value = iter.next();
  68.             outBuffer.append(value);
  69.             outBuffer.append('\t');
  70.             outBuffer.append(getCount(value));
  71.             outBuffer.append('\t');
  72.             outBuffer.append(nf.format(getPct(value)));
  73.             outBuffer.append('\t');
  74.             outBuffer.append(nf.format(getCumPct(value)));
  75.             outBuffer.append('\n');
  76.         }
  77.         return outBuffer.toString();
  78.     }

  79.     /**
  80.      * Adds 1 to the frequency count for v.
  81.      *
  82.      * @param v the value to add.
  83.      */
  84.     public void addValue(T v) {
  85.         incrementValue(v, 1);
  86.     }

  87.     /**
  88.      * Increments the frequency count for v.
  89.      *
  90.      * @param v the value to add.
  91.      * @param increment the amount by which the value should be incremented
  92.      * @since 3.1
  93.      */
  94.     public void incrementValue(T v, long increment) {
  95.         Long count = freqTable.get(v);
  96.         if (count == null) {
  97.             freqTable.put(v, Long.valueOf(increment));
  98.         } else {
  99.             freqTable.put(v, Long.valueOf(count.longValue() + increment));
  100.         }
  101.     }

  102.     /** Clears the frequency table. */
  103.     public void clear() {
  104.         freqTable.clear();
  105.     }

  106.     /**
  107.      * Returns an Iterator over the set of values that have been added.
  108.      *
  109.      * @return values Iterator
  110.      */
  111.     public Iterator<T> valuesIterator() {
  112.         return freqTable.keySet().iterator();
  113.     }

  114.     /**
  115.      * Return an Iterator over the set of keys and values that have been added.
  116.      * Using the entry set to iterate is more efficient in the case where you
  117.      * need to access respective counts as well as values, since it doesn't
  118.      * require a "get" for every key...the value is provided in the Map.Entry.
  119.      *
  120.      * @return entry set Iterator
  121.      * @since 3.1
  122.      */
  123.     public Iterator<Map.Entry<T, Long>> entrySetIterator() {
  124.         return freqTable.entrySet().iterator();
  125.     }

  126.     //-------------------------------------------------------------------------

  127.     /**
  128.      * Returns the sum of all frequencies.
  129.      *
  130.      * @return the total frequency count.
  131.      */
  132.     public long getSumFreq() {
  133.         long result = 0;
  134.         Iterator<Long> iterator = freqTable.values().iterator();
  135.         while (iterator.hasNext())  {
  136.             result += iterator.next().longValue();
  137.         }
  138.         return result;
  139.     }

  140.     /**
  141.      * Returns the number of values equal to v.
  142.      *
  143.      * @param v the value to lookup.
  144.      * @return the frequency of v.
  145.      */
  146.     public long getCount(T v) {
  147.         long result = 0;
  148.         Long count =  freqTable.get(v);
  149.         if (count != null) {
  150.             result = count.longValue();
  151.         }
  152.         return result;
  153.     }

  154.     /**
  155.      * Returns the number of values in the frequency table.
  156.      *
  157.      * @return the number of unique values that have been added to the frequency table.
  158.      * @see #valuesIterator()
  159.      */
  160.     public int getUniqueCount(){
  161.         return freqTable.keySet().size();
  162.     }

  163.     /**
  164.      * Returns the percentage of values that are equal to v
  165.      * (as a proportion between 0 and 1).
  166.      * <p>
  167.      * Returns <code>Double.NaN</code> if no values have been added.
  168.      * </p>
  169.      *
  170.      * @param v the value to lookup
  171.      * @return the proportion of values equal to v
  172.      */
  173.     public double getPct(T v) {
  174.         final long sumFreq = getSumFreq();
  175.         if (sumFreq == 0) {
  176.             return Double.NaN;
  177.         }
  178.         return (double) getCount(v) / (double) sumFreq;
  179.     }

  180.     //-----------------------------------------------------------------------------------------

  181.     /**
  182.      * Returns the cumulative frequency of values less than or equal to v.
  183.      *
  184.      * @param v the value to lookup.
  185.      * @return the proportion of values equal to v
  186.      */
  187.     public long getCumFreq(T v) {
  188.         if (getSumFreq() == 0) {
  189.             return 0;
  190.         }
  191.         Comparator<? super T> c = freqTable.comparator();
  192.         if (c == null) {
  193.             c = new NaturalComparator<>();
  194.         }
  195.         long result = 0;

  196.         Long value = freqTable.get(v);
  197.         if (value != null) {
  198.             result = value.longValue();
  199.         }

  200.         if (c.compare(v, freqTable.firstKey()) < 0) {
  201.             return 0;  // v is comparable, but less than first value
  202.         }

  203.         if (c.compare(v, freqTable.lastKey()) >= 0) {
  204.             return getSumFreq();    // v is comparable, but greater than the last value
  205.         }

  206.         Iterator<T> values = valuesIterator();
  207.         while (values.hasNext()) {
  208.             T nextValue = values.next();
  209.             if (c.compare(v, nextValue) > 0) {
  210.                 result += getCount(nextValue);
  211.             } else {
  212.                 return result;
  213.             }
  214.         }
  215.         return result;
  216.     }

  217.     //----------------------------------------------------------------------------------------------

  218.     /**
  219.      * Returns the cumulative percentage of values less than or equal to v
  220.      * (as a proportion between 0 and 1).
  221.      * <p>
  222.      * Returns <code>Double.NaN</code> if no values have been added.
  223.      * </p>
  224.      *
  225.      * @param v the value to lookup
  226.      * @return the proportion of values less than or equal to v
  227.      */
  228.     public double getCumPct(T v) {
  229.         final long sumFreq = getSumFreq();
  230.         if (sumFreq == 0) {
  231.             return Double.NaN;
  232.         }
  233.         return (double) getCumFreq(v) / (double) sumFreq;
  234.     }

  235.     /**
  236.      * Returns the mode value(s) in comparator order.
  237.      *
  238.      * @return a list containing the value(s) which appear most often.
  239.      * @since 3.3
  240.      */
  241.     public List<T> getMode() {
  242.         long mostPopular = 0; // frequencies are always positive

  243.         // Get the max count first, so we avoid having to recreate the List each time
  244.         for(Long l : freqTable.values()) {
  245.             long frequency = l.longValue();
  246.             if (frequency > mostPopular) {
  247.                 mostPopular = frequency;
  248.             }
  249.         }

  250.         List<T> modeList = new ArrayList<>();
  251.         for (Entry<T, Long> ent : freqTable.entrySet()) {
  252.             long frequency = ent.getValue().longValue();
  253.             if (frequency == mostPopular) {
  254.                modeList.add(ent.getKey());
  255.             }
  256.         }
  257.         return modeList;
  258.     }

  259.     //----------------------------------------------------------------------------------------------

  260.     /**
  261.      * Merge another Frequency object's counts into this instance.
  262.      * This Frequency's counts will be incremented (or set when not already set)
  263.      * by the counts represented by other.
  264.      *
  265.      * @param other the other {@link Frequency} object to be merged
  266.      * @throws NullArgumentException if {@code other} is null
  267.      * @since 3.1
  268.      */
  269.     public void merge(final Frequency<T> other) throws NullArgumentException {
  270.         NullArgumentException.check(other, LocalizedFormats.NULL_NOT_ALLOWED);

  271.         final Iterator<Map.Entry<T, Long>> iter = other.entrySetIterator();
  272.         while (iter.hasNext()) {
  273.             final Map.Entry<T, Long> entry = iter.next();
  274.             incrementValue(entry.getKey(), entry.getValue().longValue());
  275.         }
  276.     }

  277.     /**
  278.      * Merge a {@link Collection} of {@link Frequency} objects into this instance.
  279.      * This Frequency's counts will be incremented (or set when not already set)
  280.      * by the counts represented by each of the others.
  281.      *
  282.      * @param others the other {@link Frequency} objects to be merged
  283.      * @throws NullArgumentException if the collection is null
  284.      * @since 3.1
  285.      */
  286.     public void merge(final Collection<Frequency<T>> others) throws NullArgumentException {
  287.         NullArgumentException.check(others, LocalizedFormats.NULL_NOT_ALLOWED);

  288.         for (final Frequency<T> freq : others) {
  289.             merge(freq);
  290.         }
  291.     }

  292.     //----------------------------------------------------------------------------------------------

  293.     /**
  294.      * A Comparator that compares comparable objects using the
  295.      * natural order. Copied from Commons Collections ComparableComparator.
  296.      *
  297.      * @param <U> the type of the objects compared
  298.      */
  299.     private static final class NaturalComparator<U extends Comparable<U>> implements Comparator<U> {
  300.         /**
  301.          * Compare the two {@link Comparable Comparable} arguments.
  302.          * This method is equivalent to:
  303.          * <pre>(({@link Comparable Comparable})o1).{@link Comparable#compareTo compareTo}(o2)</pre>
  304.          *
  305.          * @param  o1 the first object
  306.          * @param  o2 the second object
  307.          * @return  result of comparison
  308.          * @throws NullPointerException when <i>o1</i> is <code>null</code>,
  309.          *         or when <code>((Comparable)o1).compareTo(o2)</code> does
  310.          */
  311.         @Override
  312.         public int compare(U o1, U o2) {
  313.             return o1.compareTo(o2);
  314.         }
  315.     }

  316.     /** {@inheritDoc} */
  317.     @Override
  318.     public int hashCode() {
  319.         final int prime = 31;
  320.         int result = 1;
  321.         result = prime * result +
  322.                  ((freqTable == null) ? 0 : freqTable.hashCode());
  323.         return result;
  324.     }

  325.     /** {@inheritDoc} */
  326.     @Override
  327.     public boolean equals(Object obj) {
  328.         if (this == obj) {
  329.             return true;
  330.         }
  331.         if (!(obj instanceof Frequency<?>)) {
  332.             return false;
  333.         }
  334.         Frequency<?> other = (Frequency<?>) obj;
  335.         return freqTable.equals(other.freqTable);
  336.     }
  337. }