001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.math4.legacy.stat.descriptive;
019
020import java.util.Collection;
021import java.util.Iterator;
022
023import org.apache.commons.math4.legacy.exception.NullArgumentException;
024
025/**
026 * <p>
027 * An aggregator for {@code SummaryStatistics} from several data sets or
028 * data set partitions.  In its simplest usage mode, the client creates an
029 * instance via the zero-argument constructor, then uses
030 * {@link #createContributingStatistics()} to obtain a {@code SummaryStatistics}
031 * for each individual data set / partition.  The per-set statistics objects
032 * are used as normal, and at any time the aggregate statistics for all the
033 * contributors can be obtained from this object.
034 * </p><p>
035 * Clients with specialized requirements can use alternative constructors to
036 * control the statistics implementations and initial values used by the
037 * contributing and the internal aggregate {@code SummaryStatistics} objects.
038 * </p><p>
039 * A static {@link #aggregate(Collection)} method is also included that computes
040 * aggregate statistics directly from a Collection of SummaryStatistics instances.
041 * </p><p>
042 * When {@link #createContributingStatistics()} is used to create SummaryStatistics
043 * instances to be aggregated concurrently, the created instances'
044 * {@link SummaryStatistics#addValue(double)} methods must synchronize on the aggregating
045 * instance maintained by this class.  In multithreaded environments, if the functionality
046 * provided by {@link #aggregate(Collection)} is adequate, that method should be used
047 * to avoid unnecessary computation and synchronization delays.</p>
048 *
049 * @since 2.0
050 *
051 */
052public class AggregateSummaryStatistics implements StatisticalSummary {
053    /**
054     * A SummaryStatistics serving as a prototype for creating SummaryStatistics.
055     * contributing to this aggregate
056     */
057    private final SummaryStatistics statisticsPrototype;
058
059    /**
060     * The SummaryStatistics in which aggregate statistics are accumulated.
061     */
062    private final SummaryStatistics statistics;
063
064    /**
065     * Initializes a new AggregateSummaryStatistics with default statistics
066     * implementations.
067     *
068     */
069    public AggregateSummaryStatistics() {
070        // No try-catch or throws NAE because arg is guaranteed non-null
071        this(new SummaryStatistics());
072    }
073
074    /**
075     * Initializes a new AggregateSummaryStatistics with the specified statistics
076     * object as a prototype for contributing statistics and for the internal
077     * aggregate statistics.  This provides for customized statistics implementations
078     * to be used by contributing and aggregate statistics.
079     *
080     * @param prototypeStatistics a {@code SummaryStatistics} serving as a
081     *      prototype both for the internal aggregate statistics and for
082     *      contributing statistics obtained via the
083     *      {@code createContributingStatistics()} method.  Being a prototype
084     *      means that other objects are initialized by copying this object's state.
085     *      If {@code null}, a new, default statistics object is used.  Any statistic
086     *      values in the prototype are propagated to contributing statistics
087     *      objects and (once) into these aggregate statistics.
088     * @throws NullArgumentException if prototypeStatistics is null
089     * @see #createContributingStatistics()
090     */
091    public AggregateSummaryStatistics(SummaryStatistics prototypeStatistics) throws NullArgumentException {
092        this(prototypeStatistics,
093             prototypeStatistics == null ? null : new SummaryStatistics(prototypeStatistics));
094    }
095
096    /**
097     * Initializes a new AggregateSummaryStatistics with the specified statistics
098     * object as a prototype for contributing statistics and for the internal
099     * aggregate statistics.  This provides for different statistics implementations
100     * to be used by contributing and aggregate statistics and for an initial
101     * state to be supplied for the aggregate statistics.
102     *
103     * @param prototypeStatistics a {@code SummaryStatistics} serving as a
104     *      prototype both for the internal aggregate statistics and for
105     *      contributing statistics obtained via the
106     *      {@code createContributingStatistics()} method.  Being a prototype
107     *      means that other objects are initialized by copying this object's state.
108     *      If {@code null}, a new, default statistics object is used.  Any statistic
109     *      values in the prototype are propagated to contributing statistics
110     *      objects, but not into these aggregate statistics.
111     * @param initialStatistics a {@code SummaryStatistics} to serve as the
112     *      internal aggregate statistics object.  If {@code null}, a new, default
113     *      statistics object is used.
114     * @see #createContributingStatistics()
115     */
116    public AggregateSummaryStatistics(SummaryStatistics prototypeStatistics,
117                                      SummaryStatistics initialStatistics) {
118        this.statisticsPrototype =
119            (prototypeStatistics == null) ? new SummaryStatistics() : prototypeStatistics;
120        this.statistics =
121            (initialStatistics == null) ? new SummaryStatistics() : initialStatistics;
122    }
123
124    /**
125     * {@inheritDoc}.  This version returns the maximum over all the aggregated
126     * data.
127     *
128     * @see StatisticalSummary#getMax()
129     */
130    @Override
131    public double getMax() {
132        synchronized (statistics) {
133            return statistics.getMax();
134        }
135    }
136
137    /**
138     * {@inheritDoc}.  This version returns the mean of all the aggregated data.
139     *
140     * @see StatisticalSummary#getMean()
141     */
142    @Override
143    public double getMean() {
144        synchronized (statistics) {
145            return statistics.getMean();
146        }
147    }
148
149    /**
150     * {@inheritDoc}.  This version returns the minimum over all the aggregated
151     * data.
152     *
153     * @see StatisticalSummary#getMin()
154     */
155    @Override
156    public double getMin() {
157        synchronized (statistics) {
158            return statistics.getMin();
159        }
160    }
161
162    /**
163     * {@inheritDoc}.  This version returns a count of all the aggregated data.
164     *
165     * @see StatisticalSummary#getN()
166     */
167    @Override
168    public long getN() {
169        synchronized (statistics) {
170            return statistics.getN();
171        }
172    }
173
174    /**
175     * {@inheritDoc}.  This version returns the standard deviation of all the
176     * aggregated data.
177     *
178     * @see StatisticalSummary#getStandardDeviation()
179     */
180    @Override
181    public double getStandardDeviation() {
182        synchronized (statistics) {
183            return statistics.getStandardDeviation();
184        }
185    }
186
187    /**
188     * {@inheritDoc}.  This version returns a sum of all the aggregated data.
189     *
190     * @see StatisticalSummary#getSum()
191     */
192    @Override
193    public double getSum() {
194        synchronized (statistics) {
195            return statistics.getSum();
196        }
197    }
198
199    /**
200     * {@inheritDoc}.  This version returns the variance of all the aggregated
201     * data.
202     *
203     * @see StatisticalSummary#getVariance()
204     */
205    @Override
206    public double getVariance() {
207        synchronized (statistics) {
208            return statistics.getVariance();
209        }
210    }
211
212    /**
213     * Returns the sum of the logs of all the aggregated data.
214     *
215     * @return the sum of logs
216     * @see SummaryStatistics#getSumOfLogs()
217     */
218    public double getSumOfLogs() {
219        synchronized (statistics) {
220            return statistics.getSumOfLogs();
221        }
222    }
223
224    /**
225     * Returns the geometric mean of all the aggregated data.
226     *
227     * @return the geometric mean
228     * @see SummaryStatistics#getGeometricMean()
229     */
230    public double getGeometricMean() {
231        synchronized (statistics) {
232            return statistics.getGeometricMean();
233        }
234    }
235
236    /**
237     * Returns the sum of the squares of all the aggregated data.
238     *
239     * @return The sum of squares
240     * @see SummaryStatistics#getSumsq()
241     */
242    public double getSumsq() {
243        synchronized (statistics) {
244            return statistics.getSumsq();
245        }
246    }
247
248    /**
249     * Returns a statistic related to the Second Central Moment.  Specifically,
250     * what is returned is the sum of squared deviations from the sample mean
251     * among the all of the aggregated data.
252     *
253     * @return second central moment statistic
254     * @see SummaryStatistics#getSecondMoment()
255     */
256    public double getSecondMoment() {
257        synchronized (statistics) {
258            return statistics.getSecondMoment();
259        }
260    }
261
262    /**
263     * Return a {@link StatisticalSummaryValues} instance reporting current
264     * aggregate statistics.
265     *
266     * @return Current values of aggregate statistics
267     */
268    public StatisticalSummary getSummary() {
269        synchronized (statistics) {
270            return new StatisticalSummaryValues(getMean(), getVariance(), getN(),
271                    getMax(), getMin(), getSum());
272        }
273    }
274
275    /**
276     * Creates and returns a {@code SummaryStatistics} whose data will be
277     * aggregated with those of this {@code AggregateSummaryStatistics}.
278     *
279     * @return a {@code SummaryStatistics} whose data will be aggregated with
280     *      those of this {@code AggregateSummaryStatistics}.  The initial state
281     *      is a copy of the configured prototype statistics.
282     */
283    public SummaryStatistics createContributingStatistics() {
284        SummaryStatistics contributingStatistics
285                = new AggregatingSummaryStatistics(statistics);
286
287        // No try - catch or advertising NAE because neither argument will ever be null
288        SummaryStatistics.copy(statisticsPrototype, contributingStatistics);
289
290        return contributingStatistics;
291    }
292
293    /**
294     * Computes aggregate summary statistics. This method can be used to combine statistics
295     * computed over partitions or subsamples - i.e., the StatisticalSummaryValues returned
296     * should contain the same values that would have been obtained by computing a single
297     * StatisticalSummary over the combined dataset.
298     * <p>
299     * Returns null if the collection is empty or null.
300     * </p>
301     *
302     * @param statistics collection of SummaryStatistics to aggregate
303     * @return summary statistics for the combined dataset
304     */
305    public static StatisticalSummaryValues aggregate(Collection<? extends StatisticalSummary> statistics) {
306        if (statistics == null) {
307            return null;
308        }
309        Iterator<? extends StatisticalSummary> iterator = statistics.iterator();
310        if (!iterator.hasNext()) {
311            return null;
312        }
313        StatisticalSummary current = iterator.next();
314        long n = current.getN();
315        double min = current.getMin();
316        double sum = current.getSum();
317        double max = current.getMax();
318        double var = current.getVariance();
319        double m2 = var * (n - 1d);
320        double mean = current.getMean();
321        while (iterator.hasNext()) {
322            current = iterator.next();
323            if (current.getMin() < min || Double.isNaN(min)) {
324                min = current.getMin();
325            }
326            if (current.getMax() > max || Double.isNaN(max)) {
327                max = current.getMax();
328            }
329            sum += current.getSum();
330            final double oldN = n;
331            final double curN = current.getN();
332            n += curN;
333            final double meanDiff = current.getMean() - mean;
334            mean = sum / n;
335            final double curM2 = current.getVariance() * (curN - 1d);
336            m2 = m2 + curM2 + meanDiff * meanDiff * oldN * curN / n;
337        }
338        final double variance;
339        if (n == 0) {
340            variance = Double.NaN;
341        } else if (n == 1) {
342            variance = 0d;
343        } else {
344            variance = m2 / (n - 1);
345        }
346        return new StatisticalSummaryValues(mean, variance, n, max, min, sum);
347    }
348
349    /**
350     * A SummaryStatistics that also forwards all values added to it to a second
351     * {@code SummaryStatistics} for aggregation.
352     *
353     * @since 2.0
354     */
355    private static class AggregatingSummaryStatistics extends SummaryStatistics {
356
357        /**
358         * The serialization version of this class.
359         */
360        private static final long serialVersionUID = 1L;
361
362        /**
363         * An additional SummaryStatistics into which values added to these.
364         * statistics (and possibly others) are aggregated
365         */
366        private final SummaryStatistics aggregateStatistics;
367
368        /**
369         * Initializes a new AggregatingSummaryStatistics with the specified.
370         * aggregate statistics object
371         *
372         * @param aggregateStatistics a {@code SummaryStatistics} into which
373         *      values added to this statistics object should be aggregated
374         */
375        AggregatingSummaryStatistics(SummaryStatistics aggregateStatistics) {
376            this.aggregateStatistics = aggregateStatistics;
377        }
378
379        /**
380         * {@inheritDoc}.  This version adds the provided value to the configured
381         * aggregate after adding it to these statistics.
382         *
383         * @see SummaryStatistics#addValue(double)
384         */
385        @Override
386        public void addValue(double value) {
387            super.addValue(value);
388            synchronized (aggregateStatistics) {
389                aggregateStatistics.addValue(value);
390            }
391        }
392
393        /**
394         * Returns true iff <code>object</code> is a
395         * <code>SummaryStatistics</code> instance and all statistics have the
396         * same values as this.
397         * @param object the object to test equality against.
398         * @return true if object equals this
399         */
400        @Override
401        public boolean equals(Object object) {
402            if (object == this) {
403                return true;
404            }
405            if (!(object instanceof AggregatingSummaryStatistics)) {
406                return false;
407            }
408            AggregatingSummaryStatistics stat = (AggregatingSummaryStatistics)object;
409            return super.equals(stat) &&
410                   aggregateStatistics.equals(stat.aggregateStatistics);
411        }
412
413        /**
414         * Returns hash code based on values of statistics.
415         * @return hash code
416         */
417        @Override
418        public int hashCode() {
419            return 123 + super.hashCode() + aggregateStatistics.hashCode();
420        }
421    }
422}