001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.math3.stat.descriptive;
019
020import java.io.Serializable;
021import java.util.Collection;
022import java.util.Iterator;
023
024import org.apache.commons.math3.exception.NullArgumentException;
025
026/**
027 * <p>
028 * An aggregator for {@code SummaryStatistics} from several data sets or
029 * data set partitions.  In its simplest usage mode, the client creates an
030 * instance via the zero-argument constructor, then uses
031 * {@link #createContributingStatistics()} to obtain a {@code SummaryStatistics}
032 * for each individual data set / partition.  The per-set statistics objects
033 * are used as normal, and at any time the aggregate statistics for all the
034 * contributors can be obtained from this object.
035 * </p><p>
036 * Clients with specialized requirements can use alternative constructors to
037 * control the statistics implementations and initial values used by the
038 * contributing and the internal aggregate {@code SummaryStatistics} objects.
039 * </p><p>
040 * A static {@link #aggregate(Collection)} method is also included that computes
041 * aggregate statistics directly from a Collection of SummaryStatistics instances.
042 * </p><p>
043 * When {@link #createContributingStatistics()} is used to create SummaryStatistics
044 * instances to be aggregated concurrently, the created instances'
045 * {@link SummaryStatistics#addValue(double)} methods must synchronize on the aggregating
046 * instance maintained by this class.  In multithreaded environments, if the functionality
047 * provided by {@link #aggregate(Collection)} is adequate, that method should be used
048 * to avoid unnecessary computation and synchronization delays.</p>
049 *
050 * @since 2.0
051 *
052 */
053public class AggregateSummaryStatistics implements StatisticalSummary,
054        Serializable {
055
056
057    /** Serializable version identifier */
058    private static final long serialVersionUID = -8207112444016386906L;
059
060    /**
061     * A SummaryStatistics serving as a prototype for creating SummaryStatistics
062     * contributing to this aggregate
063     */
064    private final SummaryStatistics statisticsPrototype;
065
066    /**
067     * The SummaryStatistics in which aggregate statistics are accumulated.
068     */
069    private final SummaryStatistics statistics;
070
071    /**
072     * Initializes a new AggregateSummaryStatistics with default statistics
073     * implementations.
074     *
075     */
076    public AggregateSummaryStatistics() {
077        // No try-catch or throws NAE because arg is guaranteed non-null
078        this(new SummaryStatistics());
079    }
080
081    /**
082     * Initializes a new AggregateSummaryStatistics with the specified statistics
083     * object as a prototype for contributing statistics and for the internal
084     * aggregate statistics.  This provides for customized statistics implementations
085     * to be used by contributing and aggregate statistics.
086     *
087     * @param prototypeStatistics a {@code SummaryStatistics} serving as a
088     *      prototype both for the internal aggregate statistics and for
089     *      contributing statistics obtained via the
090     *      {@code createContributingStatistics()} method.  Being a prototype
091     *      means that other objects are initialized by copying this object's state.
092     *      If {@code null}, a new, default statistics object is used.  Any statistic
093     *      values in the prototype are propagated to contributing statistics
094     *      objects and (once) into these aggregate statistics.
095     * @throws NullArgumentException if prototypeStatistics is null
096     * @see #createContributingStatistics()
097     */
098    public AggregateSummaryStatistics(SummaryStatistics prototypeStatistics) throws NullArgumentException {
099        this(prototypeStatistics,
100             prototypeStatistics == null ? null : new SummaryStatistics(prototypeStatistics));
101    }
102
103    /**
104     * Initializes a new AggregateSummaryStatistics with the specified statistics
105     * object as a prototype for contributing statistics and for the internal
106     * aggregate statistics.  This provides for different statistics implementations
107     * to be used by contributing and aggregate statistics and for an initial
108     * state to be supplied for the aggregate statistics.
109     *
110     * @param prototypeStatistics a {@code SummaryStatistics} serving as a
111     *      prototype both for the internal aggregate statistics and for
112     *      contributing statistics obtained via the
113     *      {@code createContributingStatistics()} method.  Being a prototype
114     *      means that other objects are initialized by copying this object's state.
115     *      If {@code null}, a new, default statistics object is used.  Any statistic
116     *      values in the prototype are propagated to contributing statistics
117     *      objects, but not into these aggregate statistics.
118     * @param initialStatistics a {@code SummaryStatistics} to serve as the
119     *      internal aggregate statistics object.  If {@code null}, a new, default
120     *      statistics object is used.
121     * @see #createContributingStatistics()
122     */
123    public AggregateSummaryStatistics(SummaryStatistics prototypeStatistics,
124                                      SummaryStatistics initialStatistics) {
125        this.statisticsPrototype =
126            (prototypeStatistics == null) ? new SummaryStatistics() : prototypeStatistics;
127        this.statistics =
128            (initialStatistics == null) ? new SummaryStatistics() : initialStatistics;
129    }
130
131    /**
132     * {@inheritDoc}.  This version returns the maximum over all the aggregated
133     * data.
134     *
135     * @see StatisticalSummary#getMax()
136     */
137    public double getMax() {
138        synchronized (statistics) {
139            return statistics.getMax();
140        }
141    }
142
143    /**
144     * {@inheritDoc}.  This version returns the mean of all the aggregated data.
145     *
146     * @see StatisticalSummary#getMean()
147     */
148    public double getMean() {
149        synchronized (statistics) {
150            return statistics.getMean();
151        }
152    }
153
154    /**
155     * {@inheritDoc}.  This version returns the minimum over all the aggregated
156     * data.
157     *
158     * @see StatisticalSummary#getMin()
159     */
160    public double getMin() {
161        synchronized (statistics) {
162            return statistics.getMin();
163        }
164    }
165
166    /**
167     * {@inheritDoc}.  This version returns a count of all the aggregated data.
168     *
169     * @see StatisticalSummary#getN()
170     */
171    public long getN() {
172        synchronized (statistics) {
173            return statistics.getN();
174        }
175    }
176
177    /**
178     * {@inheritDoc}.  This version returns the standard deviation of all the
179     * aggregated data.
180     *
181     * @see StatisticalSummary#getStandardDeviation()
182     */
183    public double getStandardDeviation() {
184        synchronized (statistics) {
185            return statistics.getStandardDeviation();
186        }
187    }
188
189    /**
190     * {@inheritDoc}.  This version returns a sum of all the aggregated data.
191     *
192     * @see StatisticalSummary#getSum()
193     */
194    public double getSum() {
195        synchronized (statistics) {
196            return statistics.getSum();
197        }
198    }
199
200    /**
201     * {@inheritDoc}.  This version returns the variance of all the aggregated
202     * data.
203     *
204     * @see StatisticalSummary#getVariance()
205     */
206    public double getVariance() {
207        synchronized (statistics) {
208            return statistics.getVariance();
209        }
210    }
211
212    /**
213     * Returns the sum of the logs of all the aggregated data.
214     *
215     * @return the sum of logs
216     * @see SummaryStatistics#getSumOfLogs()
217     */
218    public double getSumOfLogs() {
219        synchronized (statistics) {
220            return statistics.getSumOfLogs();
221        }
222    }
223
224    /**
225     * Returns the geometric mean of all the aggregated data.
226     *
227     * @return the geometric mean
228     * @see SummaryStatistics#getGeometricMean()
229     */
230    public double getGeometricMean() {
231        synchronized (statistics) {
232            return statistics.getGeometricMean();
233        }
234    }
235
236    /**
237     * Returns the sum of the squares of all the aggregated data.
238     *
239     * @return The sum of squares
240     * @see SummaryStatistics#getSumsq()
241     */
242    public double getSumsq() {
243        synchronized (statistics) {
244            return statistics.getSumsq();
245        }
246    }
247
248    /**
249     * Returns a statistic related to the Second Central Moment.  Specifically,
250     * what is returned is the sum of squared deviations from the sample mean
251     * among the all of the aggregated data.
252     *
253     * @return second central moment statistic
254     * @see SummaryStatistics#getSecondMoment()
255     */
256    public double getSecondMoment() {
257        synchronized (statistics) {
258            return statistics.getSecondMoment();
259        }
260    }
261
262    /**
263     * Return a {@link StatisticalSummaryValues} instance reporting current
264     * aggregate statistics.
265     *
266     * @return Current values of aggregate statistics
267     */
268    public StatisticalSummary getSummary() {
269        synchronized (statistics) {
270            return new StatisticalSummaryValues(getMean(), getVariance(), getN(),
271                    getMax(), getMin(), getSum());
272        }
273    }
274
275    /**
276     * Creates and returns a {@code SummaryStatistics} whose data will be
277     * aggregated with those of this {@code AggregateSummaryStatistics}.
278     *
279     * @return a {@code SummaryStatistics} whose data will be aggregated with
280     *      those of this {@code AggregateSummaryStatistics}.  The initial state
281     *      is a copy of the configured prototype statistics.
282     */
283    public SummaryStatistics createContributingStatistics() {
284        SummaryStatistics contributingStatistics
285                = new AggregatingSummaryStatistics(statistics);
286
287        // No try - catch or advertising NAE because neither argument will ever be null
288        SummaryStatistics.copy(statisticsPrototype, contributingStatistics);
289
290        return contributingStatistics;
291    }
292
293    /**
294     * Computes aggregate summary statistics. This method can be used to combine statistics
295     * computed over partitions or subsamples - i.e., the StatisticalSummaryValues returned
296     * should contain the same values that would have been obtained by computing a single
297     * StatisticalSummary over the combined dataset.
298     * <p>
299     * Returns null if the collection is empty or null.
300     * </p>
301     *
302     * @param statistics collection of SummaryStatistics to aggregate
303     * @return summary statistics for the combined dataset
304     */
305    public static StatisticalSummaryValues aggregate(Collection<? extends StatisticalSummary> statistics) {
306        if (statistics == null) {
307            return null;
308        }
309        Iterator<? extends StatisticalSummary> iterator = statistics.iterator();
310        if (!iterator.hasNext()) {
311            return null;
312        }
313        StatisticalSummary current = iterator.next();
314        long n = current.getN();
315        double min = current.getMin();
316        double sum = current.getSum();
317        double max = current.getMax();
318        double var = current.getVariance();
319        double m2 = var * (n - 1d);
320        double mean = current.getMean();
321        while (iterator.hasNext()) {
322            current = iterator.next();
323            if (current.getMin() < min || Double.isNaN(min)) {
324                min = current.getMin();
325            }
326            if (current.getMax() > max || Double.isNaN(max)) {
327                max = current.getMax();
328            }
329            sum += current.getSum();
330            final double oldN = n;
331            final double curN = current.getN();
332            n += curN;
333            final double meanDiff = current.getMean() - mean;
334            mean = sum / n;
335            final double curM2 = current.getVariance() * (curN - 1d);
336            m2 = m2 + curM2 + meanDiff * meanDiff * oldN * curN / n;
337        }
338        final double variance;
339        if (n == 0) {
340            variance = Double.NaN;
341        } else if (n == 1) {
342            variance = 0d;
343        } else {
344            variance = m2 / (n - 1);
345        }
346        return new StatisticalSummaryValues(mean, variance, n, max, min, sum);
347    }
348
349    /**
350     * A SummaryStatistics that also forwards all values added to it to a second
351     * {@code SummaryStatistics} for aggregation.
352     *
353     * @since 2.0
354     */
355    private static class AggregatingSummaryStatistics extends SummaryStatistics {
356
357        /**
358         * The serialization version of this class
359         */
360        private static final long serialVersionUID = 1L;
361
362        /**
363         * An additional SummaryStatistics into which values added to these
364         * statistics (and possibly others) are aggregated
365         */
366        private final SummaryStatistics aggregateStatistics;
367
368        /**
369         * Initializes a new AggregatingSummaryStatistics with the specified
370         * aggregate statistics object
371         *
372         * @param aggregateStatistics a {@code SummaryStatistics} into which
373         *      values added to this statistics object should be aggregated
374         */
375        AggregatingSummaryStatistics(SummaryStatistics aggregateStatistics) {
376            this.aggregateStatistics = aggregateStatistics;
377        }
378
379        /**
380         * {@inheritDoc}.  This version adds the provided value to the configured
381         * aggregate after adding it to these statistics.
382         *
383         * @see SummaryStatistics#addValue(double)
384         */
385        @Override
386        public void addValue(double value) {
387            super.addValue(value);
388            synchronized (aggregateStatistics) {
389                aggregateStatistics.addValue(value);
390            }
391        }
392
393        /**
394         * Returns true iff <code>object</code> is a
395         * <code>SummaryStatistics</code> instance and all statistics have the
396         * same values as this.
397         * @param object the object to test equality against.
398         * @return true if object equals this
399         */
400        @Override
401        public boolean equals(Object object) {
402            if (object == this) {
403                return true;
404            }
405            if (object instanceof AggregatingSummaryStatistics == false) {
406                return false;
407            }
408            AggregatingSummaryStatistics stat = (AggregatingSummaryStatistics)object;
409            return super.equals(stat) &&
410                   aggregateStatistics.equals(stat.aggregateStatistics);
411        }
412
413        /**
414         * Returns hash code based on values of statistics
415         * @return hash code
416         */
417        @Override
418        public int hashCode() {
419            return 123 + super.hashCode() + aggregateStatistics.hashCode();
420        }
421    }
422}