AggregateSummaryStatistics.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */

  17. package org.apache.commons.math4.legacy.stat.descriptive;

  18. import java.util.Collection;
  19. import java.util.Iterator;

  20. import org.apache.commons.math4.legacy.exception.NullArgumentException;

  21. /**
  22.  * <p>
  23.  * An aggregator for {@code SummaryStatistics} from several data sets or
  24.  * data set partitions.  In its simplest usage mode, the client creates an
  25.  * instance via the zero-argument constructor, then uses
  26.  * {@link #createContributingStatistics()} to obtain a {@code SummaryStatistics}
  27.  * for each individual data set / partition.  The per-set statistics objects
  28.  * are used as normal, and at any time the aggregate statistics for all the
  29.  * contributors can be obtained from this object.
  30.  * </p><p>
  31.  * Clients with specialized requirements can use alternative constructors to
  32.  * control the statistics implementations and initial values used by the
  33.  * contributing and the internal aggregate {@code SummaryStatistics} objects.
  34.  * </p><p>
  35.  * A static {@link #aggregate(Collection)} method is also included that computes
  36.  * aggregate statistics directly from a Collection of SummaryStatistics instances.
  37.  * </p><p>
  38.  * When {@link #createContributingStatistics()} is used to create SummaryStatistics
  39.  * instances to be aggregated concurrently, the created instances'
  40.  * {@link SummaryStatistics#addValue(double)} methods must synchronize on the aggregating
  41.  * instance maintained by this class.  In multithreaded environments, if the functionality
  42.  * provided by {@link #aggregate(Collection)} is adequate, that method should be used
  43.  * to avoid unnecessary computation and synchronization delays.</p>
  44.  *
  45.  * @since 2.0
  46.  *
  47.  */
  48. public class AggregateSummaryStatistics implements StatisticalSummary {
  49.     /**
  50.      * A SummaryStatistics serving as a prototype for creating SummaryStatistics.
  51.      * contributing to this aggregate
  52.      */
  53.     private final SummaryStatistics statisticsPrototype;

  54.     /**
  55.      * The SummaryStatistics in which aggregate statistics are accumulated.
  56.      */
  57.     private final SummaryStatistics statistics;

  58.     /**
  59.      * Initializes a new AggregateSummaryStatistics with default statistics
  60.      * implementations.
  61.      *
  62.      */
  63.     public AggregateSummaryStatistics() {
  64.         // No try-catch or throws NAE because arg is guaranteed non-null
  65.         this(new SummaryStatistics());
  66.     }

  67.     /**
  68.      * Initializes a new AggregateSummaryStatistics with the specified statistics
  69.      * object as a prototype for contributing statistics and for the internal
  70.      * aggregate statistics.  This provides for customized statistics implementations
  71.      * to be used by contributing and aggregate statistics.
  72.      *
  73.      * @param prototypeStatistics a {@code SummaryStatistics} serving as a
  74.      *      prototype both for the internal aggregate statistics and for
  75.      *      contributing statistics obtained via the
  76.      *      {@code createContributingStatistics()} method.  Being a prototype
  77.      *      means that other objects are initialized by copying this object's state.
  78.      *      If {@code null}, a new, default statistics object is used.  Any statistic
  79.      *      values in the prototype are propagated to contributing statistics
  80.      *      objects and (once) into these aggregate statistics.
  81.      * @throws NullArgumentException if prototypeStatistics is null
  82.      * @see #createContributingStatistics()
  83.      */
  84.     public AggregateSummaryStatistics(SummaryStatistics prototypeStatistics) throws NullArgumentException {
  85.         this(prototypeStatistics,
  86.              prototypeStatistics == null ? null : new SummaryStatistics(prototypeStatistics));
  87.     }

  88.     /**
  89.      * Initializes a new AggregateSummaryStatistics with the specified statistics
  90.      * object as a prototype for contributing statistics and for the internal
  91.      * aggregate statistics.  This provides for different statistics implementations
  92.      * to be used by contributing and aggregate statistics and for an initial
  93.      * state to be supplied for the aggregate statistics.
  94.      *
  95.      * @param prototypeStatistics a {@code SummaryStatistics} serving as a
  96.      *      prototype both for the internal aggregate statistics and for
  97.      *      contributing statistics obtained via the
  98.      *      {@code createContributingStatistics()} method.  Being a prototype
  99.      *      means that other objects are initialized by copying this object's state.
  100.      *      If {@code null}, a new, default statistics object is used.  Any statistic
  101.      *      values in the prototype are propagated to contributing statistics
  102.      *      objects, but not into these aggregate statistics.
  103.      * @param initialStatistics a {@code SummaryStatistics} to serve as the
  104.      *      internal aggregate statistics object.  If {@code null}, a new, default
  105.      *      statistics object is used.
  106.      * @see #createContributingStatistics()
  107.      */
  108.     public AggregateSummaryStatistics(SummaryStatistics prototypeStatistics,
  109.                                       SummaryStatistics initialStatistics) {
  110.         this.statisticsPrototype =
  111.             (prototypeStatistics == null) ? new SummaryStatistics() : prototypeStatistics;
  112.         this.statistics =
  113.             (initialStatistics == null) ? new SummaryStatistics() : initialStatistics;
  114.     }

  115.     /**
  116.      * {@inheritDoc}.  This version returns the maximum over all the aggregated
  117.      * data.
  118.      *
  119.      * @see StatisticalSummary#getMax()
  120.      */
  121.     @Override
  122.     public double getMax() {
  123.         synchronized (statistics) {
  124.             return statistics.getMax();
  125.         }
  126.     }

  127.     /**
  128.      * {@inheritDoc}.  This version returns the mean of all the aggregated data.
  129.      *
  130.      * @see StatisticalSummary#getMean()
  131.      */
  132.     @Override
  133.     public double getMean() {
  134.         synchronized (statistics) {
  135.             return statistics.getMean();
  136.         }
  137.     }

  138.     /**
  139.      * {@inheritDoc}.  This version returns the minimum over all the aggregated
  140.      * data.
  141.      *
  142.      * @see StatisticalSummary#getMin()
  143.      */
  144.     @Override
  145.     public double getMin() {
  146.         synchronized (statistics) {
  147.             return statistics.getMin();
  148.         }
  149.     }

  150.     /**
  151.      * {@inheritDoc}.  This version returns a count of all the aggregated data.
  152.      *
  153.      * @see StatisticalSummary#getN()
  154.      */
  155.     @Override
  156.     public long getN() {
  157.         synchronized (statistics) {
  158.             return statistics.getN();
  159.         }
  160.     }

  161.     /**
  162.      * {@inheritDoc}.  This version returns the standard deviation of all the
  163.      * aggregated data.
  164.      *
  165.      * @see StatisticalSummary#getStandardDeviation()
  166.      */
  167.     @Override
  168.     public double getStandardDeviation() {
  169.         synchronized (statistics) {
  170.             return statistics.getStandardDeviation();
  171.         }
  172.     }

  173.     /**
  174.      * {@inheritDoc}.  This version returns a sum of all the aggregated data.
  175.      *
  176.      * @see StatisticalSummary#getSum()
  177.      */
  178.     @Override
  179.     public double getSum() {
  180.         synchronized (statistics) {
  181.             return statistics.getSum();
  182.         }
  183.     }

  184.     /**
  185.      * {@inheritDoc}.  This version returns the variance of all the aggregated
  186.      * data.
  187.      *
  188.      * @see StatisticalSummary#getVariance()
  189.      */
  190.     @Override
  191.     public double getVariance() {
  192.         synchronized (statistics) {
  193.             return statistics.getVariance();
  194.         }
  195.     }

  196.     /**
  197.      * Returns the sum of the logs of all the aggregated data.
  198.      *
  199.      * @return the sum of logs
  200.      * @see SummaryStatistics#getSumOfLogs()
  201.      */
  202.     public double getSumOfLogs() {
  203.         synchronized (statistics) {
  204.             return statistics.getSumOfLogs();
  205.         }
  206.     }

  207.     /**
  208.      * Returns the geometric mean of all the aggregated data.
  209.      *
  210.      * @return the geometric mean
  211.      * @see SummaryStatistics#getGeometricMean()
  212.      */
  213.     public double getGeometricMean() {
  214.         synchronized (statistics) {
  215.             return statistics.getGeometricMean();
  216.         }
  217.     }

  218.     /**
  219.      * Returns the sum of the squares of all the aggregated data.
  220.      *
  221.      * @return The sum of squares
  222.      * @see SummaryStatistics#getSumsq()
  223.      */
  224.     public double getSumsq() {
  225.         synchronized (statistics) {
  226.             return statistics.getSumsq();
  227.         }
  228.     }

  229.     /**
  230.      * Returns a statistic related to the Second Central Moment.  Specifically,
  231.      * what is returned is the sum of squared deviations from the sample mean
  232.      * among the all of the aggregated data.
  233.      *
  234.      * @return second central moment statistic
  235.      * @see SummaryStatistics#getSecondMoment()
  236.      */
  237.     public double getSecondMoment() {
  238.         synchronized (statistics) {
  239.             return statistics.getSecondMoment();
  240.         }
  241.     }

  242.     /**
  243.      * Return a {@link StatisticalSummaryValues} instance reporting current
  244.      * aggregate statistics.
  245.      *
  246.      * @return Current values of aggregate statistics
  247.      */
  248.     public StatisticalSummary getSummary() {
  249.         synchronized (statistics) {
  250.             return new StatisticalSummaryValues(getMean(), getVariance(), getN(),
  251.                     getMax(), getMin(), getSum());
  252.         }
  253.     }

  254.     /**
  255.      * Creates and returns a {@code SummaryStatistics} whose data will be
  256.      * aggregated with those of this {@code AggregateSummaryStatistics}.
  257.      *
  258.      * @return a {@code SummaryStatistics} whose data will be aggregated with
  259.      *      those of this {@code AggregateSummaryStatistics}.  The initial state
  260.      *      is a copy of the configured prototype statistics.
  261.      */
  262.     public SummaryStatistics createContributingStatistics() {
  263.         SummaryStatistics contributingStatistics
  264.                 = new AggregatingSummaryStatistics(statistics);

  265.         // No try - catch or advertising NAE because neither argument will ever be null
  266.         SummaryStatistics.copy(statisticsPrototype, contributingStatistics);

  267.         return contributingStatistics;
  268.     }

  269.     /**
  270.      * Computes aggregate summary statistics. This method can be used to combine statistics
  271.      * computed over partitions or subsamples - i.e., the StatisticalSummaryValues returned
  272.      * should contain the same values that would have been obtained by computing a single
  273.      * StatisticalSummary over the combined dataset.
  274.      * <p>
  275.      * Returns null if the collection is empty or null.
  276.      * </p>
  277.      *
  278.      * @param statistics collection of SummaryStatistics to aggregate
  279.      * @return summary statistics for the combined dataset
  280.      */
  281.     public static StatisticalSummaryValues aggregate(Collection<? extends StatisticalSummary> statistics) {
  282.         if (statistics == null) {
  283.             return null;
  284.         }
  285.         Iterator<? extends StatisticalSummary> iterator = statistics.iterator();
  286.         if (!iterator.hasNext()) {
  287.             return null;
  288.         }
  289.         StatisticalSummary current = iterator.next();
  290.         long n = current.getN();
  291.         double min = current.getMin();
  292.         double sum = current.getSum();
  293.         double max = current.getMax();
  294.         double var = current.getVariance();
  295.         double m2 = var * (n - 1d);
  296.         double mean = current.getMean();
  297.         while (iterator.hasNext()) {
  298.             current = iterator.next();
  299.             if (current.getMin() < min || Double.isNaN(min)) {
  300.                 min = current.getMin();
  301.             }
  302.             if (current.getMax() > max || Double.isNaN(max)) {
  303.                 max = current.getMax();
  304.             }
  305.             sum += current.getSum();
  306.             final double oldN = n;
  307.             final double curN = current.getN();
  308.             n += curN;
  309.             final double meanDiff = current.getMean() - mean;
  310.             mean = sum / n;
  311.             final double curM2 = current.getVariance() * (curN - 1d);
  312.             m2 = m2 + curM2 + meanDiff * meanDiff * oldN * curN / n;
  313.         }
  314.         final double variance;
  315.         if (n == 0) {
  316.             variance = Double.NaN;
  317.         } else if (n == 1) {
  318.             variance = 0d;
  319.         } else {
  320.             variance = m2 / (n - 1);
  321.         }
  322.         return new StatisticalSummaryValues(mean, variance, n, max, min, sum);
  323.     }

  324.     /**
  325.      * A SummaryStatistics that also forwards all values added to it to a second
  326.      * {@code SummaryStatistics} for aggregation.
  327.      *
  328.      * @since 2.0
  329.      */
  330.     private static final class AggregatingSummaryStatistics extends SummaryStatistics {

  331.         /**
  332.          * The serialization version of this class.
  333.          */
  334.         private static final long serialVersionUID = 1L;

  335.         /**
  336.          * An additional SummaryStatistics into which values added to these.
  337.          * statistics (and possibly others) are aggregated
  338.          */
  339.         private final SummaryStatistics aggregateStatistics;

  340.         /**
  341.          * Initializes a new AggregatingSummaryStatistics with the specified.
  342.          * aggregate statistics object
  343.          *
  344.          * @param aggregateStatistics a {@code SummaryStatistics} into which
  345.          *      values added to this statistics object should be aggregated
  346.          */
  347.         AggregatingSummaryStatistics(SummaryStatistics aggregateStatistics) {
  348.             this.aggregateStatistics = aggregateStatistics;
  349.         }

  350.         /**
  351.          * {@inheritDoc}.  This version adds the provided value to the configured
  352.          * aggregate after adding it to these statistics.
  353.          *
  354.          * @see SummaryStatistics#addValue(double)
  355.          */
  356.         @Override
  357.         public void addValue(double value) {
  358.             super.addValue(value);
  359.             synchronized (aggregateStatistics) {
  360.                 aggregateStatistics.addValue(value);
  361.             }
  362.         }

  363.         /**
  364.          * Returns true iff <code>object</code> is a
  365.          * <code>SummaryStatistics</code> instance and all statistics have the
  366.          * same values as this.
  367.          * @param object the object to test equality against.
  368.          * @return true if object equals this
  369.          */
  370.         @Override
  371.         public boolean equals(Object object) {
  372.             if (object == this) {
  373.                 return true;
  374.             }
  375.             if (!(object instanceof AggregatingSummaryStatistics)) {
  376.                 return false;
  377.             }
  378.             AggregatingSummaryStatistics stat = (AggregatingSummaryStatistics)object;
  379.             return super.equals(stat) &&
  380.                    aggregateStatistics.equals(stat.aggregateStatistics);
  381.         }

  382.         /**
  383.          * Returns hash code based on values of statistics.
  384.          * @return hash code
  385.          */
  386.         @Override
  387.         public int hashCode() {
  388.             return 123 + super.hashCode() + aggregateStatistics.hashCode();
  389.         }
  390.     }
  391. }