View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.math4.legacy.stat.descriptive;
19  
20  import java.util.Collection;
21  import java.util.Iterator;
22  
23  import org.apache.commons.math4.legacy.exception.NullArgumentException;
24  
25  /**
26   * <p>
27   * An aggregator for {@code SummaryStatistics} from several data sets or
28   * data set partitions.  In its simplest usage mode, the client creates an
29   * instance via the zero-argument constructor, then uses
30   * {@link #createContributingStatistics()} to obtain a {@code SummaryStatistics}
31   * for each individual data set / partition.  The per-set statistics objects
32   * are used as normal, and at any time the aggregate statistics for all the
33   * contributors can be obtained from this object.
34   * </p><p>
35   * Clients with specialized requirements can use alternative constructors to
36   * control the statistics implementations and initial values used by the
37   * contributing and the internal aggregate {@code SummaryStatistics} objects.
38   * </p><p>
39   * A static {@link #aggregate(Collection)} method is also included that computes
40   * aggregate statistics directly from a Collection of SummaryStatistics instances.
41   * </p><p>
42   * When {@link #createContributingStatistics()} is used to create SummaryStatistics
43   * instances to be aggregated concurrently, the created instances'
44   * {@link SummaryStatistics#addValue(double)} methods must synchronize on the aggregating
45   * instance maintained by this class.  In multithreaded environments, if the functionality
46   * provided by {@link #aggregate(Collection)} is adequate, that method should be used
47   * to avoid unnecessary computation and synchronization delays.</p>
48   *
49   * @since 2.0
50   *
51   */
52  public class AggregateSummaryStatistics implements StatisticalSummary {
53      /**
54       * A SummaryStatistics serving as a prototype for creating SummaryStatistics.
55       * contributing to this aggregate
56       */
57      private final SummaryStatistics statisticsPrototype;
58  
59      /**
60       * The SummaryStatistics in which aggregate statistics are accumulated.
61       */
62      private final SummaryStatistics statistics;
63  
64      /**
65       * Initializes a new AggregateSummaryStatistics with default statistics
66       * implementations.
67       *
68       */
69      public AggregateSummaryStatistics() {
70          // No try-catch or throws NAE because arg is guaranteed non-null
71          this(new SummaryStatistics());
72      }
73  
74      /**
75       * Initializes a new AggregateSummaryStatistics with the specified statistics
76       * object as a prototype for contributing statistics and for the internal
77       * aggregate statistics.  This provides for customized statistics implementations
78       * to be used by contributing and aggregate statistics.
79       *
80       * @param prototypeStatistics a {@code SummaryStatistics} serving as a
81       *      prototype both for the internal aggregate statistics and for
82       *      contributing statistics obtained via the
83       *      {@code createContributingStatistics()} method.  Being a prototype
84       *      means that other objects are initialized by copying this object's state.
85       *      If {@code null}, a new, default statistics object is used.  Any statistic
86       *      values in the prototype are propagated to contributing statistics
87       *      objects and (once) into these aggregate statistics.
88       * @throws NullArgumentException if prototypeStatistics is null
89       * @see #createContributingStatistics()
90       */
91      public AggregateSummaryStatistics(SummaryStatistics prototypeStatistics) throws NullArgumentException {
92          this(prototypeStatistics,
93               prototypeStatistics == null ? null : new SummaryStatistics(prototypeStatistics));
94      }
95  
96      /**
97       * Initializes a new AggregateSummaryStatistics with the specified statistics
98       * object as a prototype for contributing statistics and for the internal
99       * aggregate statistics.  This provides for different statistics implementations
100      * to be used by contributing and aggregate statistics and for an initial
101      * state to be supplied for the aggregate statistics.
102      *
103      * @param prototypeStatistics a {@code SummaryStatistics} serving as a
104      *      prototype both for the internal aggregate statistics and for
105      *      contributing statistics obtained via the
106      *      {@code createContributingStatistics()} method.  Being a prototype
107      *      means that other objects are initialized by copying this object's state.
108      *      If {@code null}, a new, default statistics object is used.  Any statistic
109      *      values in the prototype are propagated to contributing statistics
110      *      objects, but not into these aggregate statistics.
111      * @param initialStatistics a {@code SummaryStatistics} to serve as the
112      *      internal aggregate statistics object.  If {@code null}, a new, default
113      *      statistics object is used.
114      * @see #createContributingStatistics()
115      */
116     public AggregateSummaryStatistics(SummaryStatistics prototypeStatistics,
117                                       SummaryStatistics initialStatistics) {
118         this.statisticsPrototype =
119             (prototypeStatistics == null) ? new SummaryStatistics() : prototypeStatistics;
120         this.statistics =
121             (initialStatistics == null) ? new SummaryStatistics() : initialStatistics;
122     }
123 
124     /**
125      * {@inheritDoc}.  This version returns the maximum over all the aggregated
126      * data.
127      *
128      * @see StatisticalSummary#getMax()
129      */
130     @Override
131     public double getMax() {
132         synchronized (statistics) {
133             return statistics.getMax();
134         }
135     }
136 
137     /**
138      * {@inheritDoc}.  This version returns the mean of all the aggregated data.
139      *
140      * @see StatisticalSummary#getMean()
141      */
142     @Override
143     public double getMean() {
144         synchronized (statistics) {
145             return statistics.getMean();
146         }
147     }
148 
149     /**
150      * {@inheritDoc}.  This version returns the minimum over all the aggregated
151      * data.
152      *
153      * @see StatisticalSummary#getMin()
154      */
155     @Override
156     public double getMin() {
157         synchronized (statistics) {
158             return statistics.getMin();
159         }
160     }
161 
162     /**
163      * {@inheritDoc}.  This version returns a count of all the aggregated data.
164      *
165      * @see StatisticalSummary#getN()
166      */
167     @Override
168     public long getN() {
169         synchronized (statistics) {
170             return statistics.getN();
171         }
172     }
173 
174     /**
175      * {@inheritDoc}.  This version returns the standard deviation of all the
176      * aggregated data.
177      *
178      * @see StatisticalSummary#getStandardDeviation()
179      */
180     @Override
181     public double getStandardDeviation() {
182         synchronized (statistics) {
183             return statistics.getStandardDeviation();
184         }
185     }
186 
187     /**
188      * {@inheritDoc}.  This version returns a sum of all the aggregated data.
189      *
190      * @see StatisticalSummary#getSum()
191      */
192     @Override
193     public double getSum() {
194         synchronized (statistics) {
195             return statistics.getSum();
196         }
197     }
198 
199     /**
200      * {@inheritDoc}.  This version returns the variance of all the aggregated
201      * data.
202      *
203      * @see StatisticalSummary#getVariance()
204      */
205     @Override
206     public double getVariance() {
207         synchronized (statistics) {
208             return statistics.getVariance();
209         }
210     }
211 
212     /**
213      * Returns the sum of the logs of all the aggregated data.
214      *
215      * @return the sum of logs
216      * @see SummaryStatistics#getSumOfLogs()
217      */
218     public double getSumOfLogs() {
219         synchronized (statistics) {
220             return statistics.getSumOfLogs();
221         }
222     }
223 
224     /**
225      * Returns the geometric mean of all the aggregated data.
226      *
227      * @return the geometric mean
228      * @see SummaryStatistics#getGeometricMean()
229      */
230     public double getGeometricMean() {
231         synchronized (statistics) {
232             return statistics.getGeometricMean();
233         }
234     }
235 
236     /**
237      * Returns the sum of the squares of all the aggregated data.
238      *
239      * @return The sum of squares
240      * @see SummaryStatistics#getSumsq()
241      */
242     public double getSumsq() {
243         synchronized (statistics) {
244             return statistics.getSumsq();
245         }
246     }
247 
248     /**
249      * Returns a statistic related to the Second Central Moment.  Specifically,
250      * what is returned is the sum of squared deviations from the sample mean
251      * among the all of the aggregated data.
252      *
253      * @return second central moment statistic
254      * @see SummaryStatistics#getSecondMoment()
255      */
256     public double getSecondMoment() {
257         synchronized (statistics) {
258             return statistics.getSecondMoment();
259         }
260     }
261 
262     /**
263      * Return a {@link StatisticalSummaryValues} instance reporting current
264      * aggregate statistics.
265      *
266      * @return Current values of aggregate statistics
267      */
268     public StatisticalSummary getSummary() {
269         synchronized (statistics) {
270             return new StatisticalSummaryValues(getMean(), getVariance(), getN(),
271                     getMax(), getMin(), getSum());
272         }
273     }
274 
275     /**
276      * Creates and returns a {@code SummaryStatistics} whose data will be
277      * aggregated with those of this {@code AggregateSummaryStatistics}.
278      *
279      * @return a {@code SummaryStatistics} whose data will be aggregated with
280      *      those of this {@code AggregateSummaryStatistics}.  The initial state
281      *      is a copy of the configured prototype statistics.
282      */
283     public SummaryStatistics createContributingStatistics() {
284         SummaryStatistics contributingStatistics
285                 = new AggregatingSummaryStatistics(statistics);
286 
287         // No try - catch or advertising NAE because neither argument will ever be null
288         SummaryStatistics.copy(statisticsPrototype, contributingStatistics);
289 
290         return contributingStatistics;
291     }
292 
293     /**
294      * Computes aggregate summary statistics. This method can be used to combine statistics
295      * computed over partitions or subsamples - i.e., the StatisticalSummaryValues returned
296      * should contain the same values that would have been obtained by computing a single
297      * StatisticalSummary over the combined dataset.
298      * <p>
299      * Returns null if the collection is empty or null.
300      * </p>
301      *
302      * @param statistics collection of SummaryStatistics to aggregate
303      * @return summary statistics for the combined dataset
304      */
305     public static StatisticalSummaryValues aggregate(Collection<? extends StatisticalSummary> statistics) {
306         if (statistics == null) {
307             return null;
308         }
309         Iterator<? extends StatisticalSummary> iterator = statistics.iterator();
310         if (!iterator.hasNext()) {
311             return null;
312         }
313         StatisticalSummary current = iterator.next();
314         long n = current.getN();
315         double min = current.getMin();
316         double sum = current.getSum();
317         double max = current.getMax();
318         double var = current.getVariance();
319         double m2 = var * (n - 1d);
320         double mean = current.getMean();
321         while (iterator.hasNext()) {
322             current = iterator.next();
323             if (current.getMin() < min || Double.isNaN(min)) {
324                 min = current.getMin();
325             }
326             if (current.getMax() > max || Double.isNaN(max)) {
327                 max = current.getMax();
328             }
329             sum += current.getSum();
330             final double oldN = n;
331             final double curN = current.getN();
332             n += curN;
333             final double meanDiff = current.getMean() - mean;
334             mean = sum / n;
335             final double curM2 = current.getVariance() * (curN - 1d);
336             m2 = m2 + curM2 + meanDiff * meanDiff * oldN * curN / n;
337         }
338         final double variance;
339         if (n == 0) {
340             variance = Double.NaN;
341         } else if (n == 1) {
342             variance = 0d;
343         } else {
344             variance = m2 / (n - 1);
345         }
346         return new StatisticalSummaryValues(mean, variance, n, max, min, sum);
347     }
348 
349     /**
350      * A SummaryStatistics that also forwards all values added to it to a second
351      * {@code SummaryStatistics} for aggregation.
352      *
353      * @since 2.0
354      */
355     private static final class AggregatingSummaryStatistics extends SummaryStatistics {
356 
357         /**
358          * The serialization version of this class.
359          */
360         private static final long serialVersionUID = 1L;
361 
362         /**
363          * An additional SummaryStatistics into which values added to these.
364          * statistics (and possibly others) are aggregated
365          */
366         private final SummaryStatistics aggregateStatistics;
367 
368         /**
369          * Initializes a new AggregatingSummaryStatistics with the specified.
370          * aggregate statistics object
371          *
372          * @param aggregateStatistics a {@code SummaryStatistics} into which
373          *      values added to this statistics object should be aggregated
374          */
375         AggregatingSummaryStatistics(SummaryStatistics aggregateStatistics) {
376             this.aggregateStatistics = aggregateStatistics;
377         }
378 
379         /**
380          * {@inheritDoc}.  This version adds the provided value to the configured
381          * aggregate after adding it to these statistics.
382          *
383          * @see SummaryStatistics#addValue(double)
384          */
385         @Override
386         public void addValue(double value) {
387             super.addValue(value);
388             synchronized (aggregateStatistics) {
389                 aggregateStatistics.addValue(value);
390             }
391         }
392 
393         /**
394          * Returns true iff <code>object</code> is a
395          * <code>SummaryStatistics</code> instance and all statistics have the
396          * same values as this.
397          * @param object the object to test equality against.
398          * @return true if object equals this
399          */
400         @Override
401         public boolean equals(Object object) {
402             if (object == this) {
403                 return true;
404             }
405             if (!(object instanceof AggregatingSummaryStatistics)) {
406                 return false;
407             }
408             AggregatingSummaryStatistics stat = (AggregatingSummaryStatistics)object;
409             return super.equals(stat) &&
410                    aggregateStatistics.equals(stat.aggregateStatistics);
411         }
412 
413         /**
414          * Returns hash code based on values of statistics.
415          * @return hash code
416          */
417         @Override
418         public int hashCode() {
419             return 123 + super.hashCode() + aggregateStatistics.hashCode();
420         }
421     }
422 }