001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.math3.stat.descriptive;
019
020import java.io.Serializable;
021import java.util.Collection;
022import java.util.Iterator;
023
024import org.apache.commons.math3.exception.NullArgumentException;
025
026/**
027 * <p>
028 * An aggregator for {@code SummaryStatistics} from several data sets or
029 * data set partitions.  In its simplest usage mode, the client creates an
030 * instance via the zero-argument constructor, then uses
031 * {@link #createContributingStatistics()} to obtain a {@code SummaryStatistics}
032 * for each individual data set / partition.  The per-set statistics objects
033 * are used as normal, and at any time the aggregate statistics for all the
034 * contributors can be obtained from this object.
035 * </p><p>
036 * Clients with specialized requirements can use alternative constructors to
037 * control the statistics implementations and initial values used by the
038 * contributing and the internal aggregate {@code SummaryStatistics} objects.
039 * </p><p>
040 * A static {@link #aggregate(Collection)} method is also included that computes
041 * aggregate statistics directly from a Collection of SummaryStatistics instances.
042 * </p><p>
043 * When {@link #createContributingStatistics()} is used to create SummaryStatistics
044 * instances to be aggregated concurrently, the created instances'
045 * {@link SummaryStatistics#addValue(double)} methods must synchronize on the aggregating
046 * instance maintained by this class.  In multithreaded environments, if the functionality
047 * provided by {@link #aggregate(Collection)} is adequate, that method should be used
048 * to avoid unnecessary computation and synchronization delays.</p>
049 *
050 * @since 2.0
051 * @version $Id: AggregateSummaryStatistics.java 1416643 2012-12-03 19:37:14Z tn $
052 *
053 */
054public class AggregateSummaryStatistics implements StatisticalSummary,
055        Serializable {
056
057
058    /** Serializable version identifier */
059    private static final long serialVersionUID = -8207112444016386906L;
060
061    /**
062     * A SummaryStatistics serving as a prototype for creating SummaryStatistics
063     * contributing to this aggregate
064     */
065    private final SummaryStatistics statisticsPrototype;
066
067    /**
068     * The SummaryStatistics in which aggregate statistics are accumulated.
069     */
070    private final SummaryStatistics statistics;
071
072    /**
073     * Initializes a new AggregateSummaryStatistics with default statistics
074     * implementations.
075     *
076     */
077    public AggregateSummaryStatistics() {
078        // No try-catch or throws NAE because arg is guaranteed non-null
079        this(new SummaryStatistics());
080    }
081
082    /**
083     * Initializes a new AggregateSummaryStatistics with the specified statistics
084     * object as a prototype for contributing statistics and for the internal
085     * aggregate statistics.  This provides for customized statistics implementations
086     * to be used by contributing and aggregate statistics.
087     *
088     * @param prototypeStatistics a {@code SummaryStatistics} serving as a
089     *      prototype both for the internal aggregate statistics and for
090     *      contributing statistics obtained via the
091     *      {@code createContributingStatistics()} method.  Being a prototype
092     *      means that other objects are initialized by copying this object's state.
093     *      If {@code null}, a new, default statistics object is used.  Any statistic
094     *      values in the prototype are propagated to contributing statistics
095     *      objects and (once) into these aggregate statistics.
096     * @throws NullArgumentException if prototypeStatistics is null
097     * @see #createContributingStatistics()
098     */
099    public AggregateSummaryStatistics(SummaryStatistics prototypeStatistics) throws NullArgumentException {
100        this(prototypeStatistics,
101             prototypeStatistics == null ? null : new SummaryStatistics(prototypeStatistics));
102    }
103
104    /**
105     * Initializes a new AggregateSummaryStatistics with the specified statistics
106     * object as a prototype for contributing statistics and for the internal
107     * aggregate statistics.  This provides for different statistics implementations
108     * to be used by contributing and aggregate statistics and for an initial
109     * state to be supplied for the aggregate statistics.
110     *
111     * @param prototypeStatistics a {@code SummaryStatistics} serving as a
112     *      prototype both for the internal aggregate statistics and for
113     *      contributing statistics obtained via the
114     *      {@code createContributingStatistics()} method.  Being a prototype
115     *      means that other objects are initialized by copying this object's state.
116     *      If {@code null}, a new, default statistics object is used.  Any statistic
117     *      values in the prototype are propagated to contributing statistics
118     *      objects, but not into these aggregate statistics.
119     * @param initialStatistics a {@code SummaryStatistics} to serve as the
120     *      internal aggregate statistics object.  If {@code null}, a new, default
121     *      statistics object is used.
122     * @see #createContributingStatistics()
123     */
124    public AggregateSummaryStatistics(SummaryStatistics prototypeStatistics,
125                                      SummaryStatistics initialStatistics) {
126        this.statisticsPrototype =
127            (prototypeStatistics == null) ? new SummaryStatistics() : prototypeStatistics;
128        this.statistics =
129            (initialStatistics == null) ? new SummaryStatistics() : initialStatistics;
130    }
131
132    /**
133     * {@inheritDoc}.  This version returns the maximum over all the aggregated
134     * data.
135     *
136     * @see StatisticalSummary#getMax()
137     */
138    public double getMax() {
139        synchronized (statistics) {
140            return statistics.getMax();
141        }
142    }
143
144    /**
145     * {@inheritDoc}.  This version returns the mean of all the aggregated data.
146     *
147     * @see StatisticalSummary#getMean()
148     */
149    public double getMean() {
150        synchronized (statistics) {
151            return statistics.getMean();
152        }
153    }
154
155    /**
156     * {@inheritDoc}.  This version returns the minimum over all the aggregated
157     * data.
158     *
159     * @see StatisticalSummary#getMin()
160     */
161    public double getMin() {
162        synchronized (statistics) {
163            return statistics.getMin();
164        }
165    }
166
167    /**
168     * {@inheritDoc}.  This version returns a count of all the aggregated data.
169     *
170     * @see StatisticalSummary#getN()
171     */
172    public long getN() {
173        synchronized (statistics) {
174            return statistics.getN();
175        }
176    }
177
178    /**
179     * {@inheritDoc}.  This version returns the standard deviation of all the
180     * aggregated data.
181     *
182     * @see StatisticalSummary#getStandardDeviation()
183     */
184    public double getStandardDeviation() {
185        synchronized (statistics) {
186            return statistics.getStandardDeviation();
187        }
188    }
189
190    /**
191     * {@inheritDoc}.  This version returns a sum of all the aggregated data.
192     *
193     * @see StatisticalSummary#getSum()
194     */
195    public double getSum() {
196        synchronized (statistics) {
197            return statistics.getSum();
198        }
199    }
200
201    /**
202     * {@inheritDoc}.  This version returns the variance of all the aggregated
203     * data.
204     *
205     * @see StatisticalSummary#getVariance()
206     */
207    public double getVariance() {
208        synchronized (statistics) {
209            return statistics.getVariance();
210        }
211    }
212
213    /**
214     * Returns the sum of the logs of all the aggregated data.
215     *
216     * @return the sum of logs
217     * @see SummaryStatistics#getSumOfLogs()
218     */
219    public double getSumOfLogs() {
220        synchronized (statistics) {
221            return statistics.getSumOfLogs();
222        }
223    }
224
225    /**
226     * Returns the geometric mean of all the aggregated data.
227     *
228     * @return the geometric mean
229     * @see SummaryStatistics#getGeometricMean()
230     */
231    public double getGeometricMean() {
232        synchronized (statistics) {
233            return statistics.getGeometricMean();
234        }
235    }
236
237    /**
238     * Returns the sum of the squares of all the aggregated data.
239     *
240     * @return The sum of squares
241     * @see SummaryStatistics#getSumsq()
242     */
243    public double getSumsq() {
244        synchronized (statistics) {
245            return statistics.getSumsq();
246        }
247    }
248
249    /**
250     * Returns a statistic related to the Second Central Moment.  Specifically,
251     * what is returned is the sum of squared deviations from the sample mean
252     * among the all of the aggregated data.
253     *
254     * @return second central moment statistic
255     * @see SummaryStatistics#getSecondMoment()
256     */
257    public double getSecondMoment() {
258        synchronized (statistics) {
259            return statistics.getSecondMoment();
260        }
261    }
262
263    /**
264     * Return a {@link StatisticalSummaryValues} instance reporting current
265     * aggregate statistics.
266     *
267     * @return Current values of aggregate statistics
268     */
269    public StatisticalSummary getSummary() {
270        synchronized (statistics) {
271            return new StatisticalSummaryValues(getMean(), getVariance(), getN(),
272                    getMax(), getMin(), getSum());
273        }
274    }
275
276    /**
277     * Creates and returns a {@code SummaryStatistics} whose data will be
278     * aggregated with those of this {@code AggregateSummaryStatistics}.
279     *
280     * @return a {@code SummaryStatistics} whose data will be aggregated with
281     *      those of this {@code AggregateSummaryStatistics}.  The initial state
282     *      is a copy of the configured prototype statistics.
283     */
284    public SummaryStatistics createContributingStatistics() {
285        SummaryStatistics contributingStatistics
286                = new AggregatingSummaryStatistics(statistics);
287
288        // No try - catch or advertising NAE because neither argument will ever be null
289        SummaryStatistics.copy(statisticsPrototype, contributingStatistics);
290
291        return contributingStatistics;
292    }
293
294    /**
295     * Computes aggregate summary statistics. This method can be used to combine statistics
296     * computed over partitions or subsamples - i.e., the StatisticalSummaryValues returned
297     * should contain the same values that would have been obtained by computing a single
298     * StatisticalSummary over the combined dataset.
299     * <p>
300     * Returns null if the collection is empty or null.
301     * </p>
302     *
303     * @param statistics collection of SummaryStatistics to aggregate
304     * @return summary statistics for the combined dataset
305     */
306    public static StatisticalSummaryValues aggregate(Collection<SummaryStatistics> statistics) {
307        if (statistics == null) {
308            return null;
309        }
310        Iterator<SummaryStatistics> iterator = statistics.iterator();
311        if (!iterator.hasNext()) {
312            return null;
313        }
314        SummaryStatistics current = iterator.next();
315        long n = current.getN();
316        double min = current.getMin();
317        double sum = current.getSum();
318        double max = current.getMax();
319        double m2 = current.getSecondMoment();
320        double mean = current.getMean();
321        while (iterator.hasNext()) {
322            current = iterator.next();
323            if (current.getMin() < min || Double.isNaN(min)) {
324                min = current.getMin();
325            }
326            if (current.getMax() > max || Double.isNaN(max)) {
327                max = current.getMax();
328            }
329            sum += current.getSum();
330            final double oldN = n;
331            final double curN = current.getN();
332            n += curN;
333            final double meanDiff = current.getMean() - mean;
334            mean = sum / n;
335            m2 = m2 + current.getSecondMoment() + meanDiff * meanDiff * oldN * curN / n;
336        }
337        final double variance;
338        if (n == 0) {
339            variance = Double.NaN;
340        } else if (n == 1) {
341            variance = 0d;
342        } else {
343            variance = m2 / (n - 1);
344        }
345        return new StatisticalSummaryValues(mean, variance, n, max, min, sum);
346    }
347
348    /**
349     * A SummaryStatistics that also forwards all values added to it to a second
350     * {@code SummaryStatistics} for aggregation.
351     *
352     * @since 2.0
353     */
354    private static class AggregatingSummaryStatistics extends SummaryStatistics {
355
356        /**
357         * The serialization version of this class
358         */
359        private static final long serialVersionUID = 1L;
360
361        /**
362         * An additional SummaryStatistics into which values added to these
363         * statistics (and possibly others) are aggregated
364         */
365        private final SummaryStatistics aggregateStatistics;
366
367        /**
368         * Initializes a new AggregatingSummaryStatistics with the specified
369         * aggregate statistics object
370         *
371         * @param aggregateStatistics a {@code SummaryStatistics} into which
372         *      values added to this statistics object should be aggregated
373         */
374        public AggregatingSummaryStatistics(SummaryStatistics aggregateStatistics) {
375            this.aggregateStatistics = aggregateStatistics;
376        }
377
378        /**
379         * {@inheritDoc}.  This version adds the provided value to the configured
380         * aggregate after adding it to these statistics.
381         *
382         * @see SummaryStatistics#addValue(double)
383         */
384        @Override
385        public void addValue(double value) {
386            super.addValue(value);
387            synchronized (aggregateStatistics) {
388                aggregateStatistics.addValue(value);
389            }
390        }
391
392        /**
393         * Returns true iff <code>object</code> is a
394         * <code>SummaryStatistics</code> instance and all statistics have the
395         * same values as this.
396         * @param object the object to test equality against.
397         * @return true if object equals this
398         */
399        @Override
400        public boolean equals(Object object) {
401            if (object == this) {
402                return true;
403            }
404            if (object instanceof AggregatingSummaryStatistics == false) {
405                return false;
406            }
407            AggregatingSummaryStatistics stat = (AggregatingSummaryStatistics)object;
408            return super.equals(stat) &&
409                   aggregateStatistics.equals(stat.aggregateStatistics);
410        }
411
412        /**
413         * Returns hash code based on values of statistics
414         * @return hash code
415         */
416        @Override
417        public int hashCode() {
418            return 123 + super.hashCode() + aggregateStatistics.hashCode();
419        }
420    }
421}