1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.commons.math4.legacy.stat.descriptive;
19
20 import java.util.Collection;
21 import java.util.Iterator;
22
23 import org.apache.commons.math4.legacy.exception.NullArgumentException;
24
25 /**
26 * <p>
27 * An aggregator for {@code SummaryStatistics} from several data sets or
28 * data set partitions. In its simplest usage mode, the client creates an
29 * instance via the zero-argument constructor, then uses
30 * {@link #createContributingStatistics()} to obtain a {@code SummaryStatistics}
31 * for each individual data set / partition. The per-set statistics objects
32 * are used as normal, and at any time the aggregate statistics for all the
33 * contributors can be obtained from this object.
34 * </p><p>
35 * Clients with specialized requirements can use alternative constructors to
36 * control the statistics implementations and initial values used by the
37 * contributing and the internal aggregate {@code SummaryStatistics} objects.
38 * </p><p>
39 * A static {@link #aggregate(Collection)} method is also included that computes
40 * aggregate statistics directly from a Collection of SummaryStatistics instances.
41 * </p><p>
42 * When {@link #createContributingStatistics()} is used to create SummaryStatistics
43 * instances to be aggregated concurrently, the created instances'
44 * {@link SummaryStatistics#addValue(double)} methods must synchronize on the aggregating
45 * instance maintained by this class. In multithreaded environments, if the functionality
46 * provided by {@link #aggregate(Collection)} is adequate, that method should be used
47 * to avoid unnecessary computation and synchronization delays.</p>
48 *
49 * @since 2.0
50 *
51 */
52 public class AggregateSummaryStatistics implements StatisticalSummary {
53 /**
54 * A SummaryStatistics serving as a prototype for creating SummaryStatistics.
55 * contributing to this aggregate
56 */
57 private final SummaryStatistics statisticsPrototype;
58
59 /**
60 * The SummaryStatistics in which aggregate statistics are accumulated.
61 */
62 private final SummaryStatistics statistics;
63
64 /**
65 * Initializes a new AggregateSummaryStatistics with default statistics
66 * implementations.
67 *
68 */
69 public AggregateSummaryStatistics() {
70 // No try-catch or throws NAE because arg is guaranteed non-null
71 this(new SummaryStatistics());
72 }
73
74 /**
75 * Initializes a new AggregateSummaryStatistics with the specified statistics
76 * object as a prototype for contributing statistics and for the internal
77 * aggregate statistics. This provides for customized statistics implementations
78 * to be used by contributing and aggregate statistics.
79 *
80 * @param prototypeStatistics a {@code SummaryStatistics} serving as a
81 * prototype both for the internal aggregate statistics and for
82 * contributing statistics obtained via the
83 * {@code createContributingStatistics()} method. Being a prototype
84 * means that other objects are initialized by copying this object's state.
85 * If {@code null}, a new, default statistics object is used. Any statistic
86 * values in the prototype are propagated to contributing statistics
87 * objects and (once) into these aggregate statistics.
88 * @throws NullArgumentException if prototypeStatistics is null
89 * @see #createContributingStatistics()
90 */
91 public AggregateSummaryStatistics(SummaryStatistics prototypeStatistics) throws NullArgumentException {
92 this(prototypeStatistics,
93 prototypeStatistics == null ? null : new SummaryStatistics(prototypeStatistics));
94 }
95
96 /**
97 * Initializes a new AggregateSummaryStatistics with the specified statistics
98 * object as a prototype for contributing statistics and for the internal
99 * aggregate statistics. This provides for different statistics implementations
100 * to be used by contributing and aggregate statistics and for an initial
101 * state to be supplied for the aggregate statistics.
102 *
103 * @param prototypeStatistics a {@code SummaryStatistics} serving as a
104 * prototype both for the internal aggregate statistics and for
105 * contributing statistics obtained via the
106 * {@code createContributingStatistics()} method. Being a prototype
107 * means that other objects are initialized by copying this object's state.
108 * If {@code null}, a new, default statistics object is used. Any statistic
109 * values in the prototype are propagated to contributing statistics
110 * objects, but not into these aggregate statistics.
111 * @param initialStatistics a {@code SummaryStatistics} to serve as the
112 * internal aggregate statistics object. If {@code null}, a new, default
113 * statistics object is used.
114 * @see #createContributingStatistics()
115 */
116 public AggregateSummaryStatistics(SummaryStatistics prototypeStatistics,
117 SummaryStatistics initialStatistics) {
118 this.statisticsPrototype =
119 (prototypeStatistics == null) ? new SummaryStatistics() : prototypeStatistics;
120 this.statistics =
121 (initialStatistics == null) ? new SummaryStatistics() : initialStatistics;
122 }
123
124 /**
125 * {@inheritDoc}. This version returns the maximum over all the aggregated
126 * data.
127 *
128 * @see StatisticalSummary#getMax()
129 */
130 @Override
131 public double getMax() {
132 synchronized (statistics) {
133 return statistics.getMax();
134 }
135 }
136
137 /**
138 * {@inheritDoc}. This version returns the mean of all the aggregated data.
139 *
140 * @see StatisticalSummary#getMean()
141 */
142 @Override
143 public double getMean() {
144 synchronized (statistics) {
145 return statistics.getMean();
146 }
147 }
148
149 /**
150 * {@inheritDoc}. This version returns the minimum over all the aggregated
151 * data.
152 *
153 * @see StatisticalSummary#getMin()
154 */
155 @Override
156 public double getMin() {
157 synchronized (statistics) {
158 return statistics.getMin();
159 }
160 }
161
162 /**
163 * {@inheritDoc}. This version returns a count of all the aggregated data.
164 *
165 * @see StatisticalSummary#getN()
166 */
167 @Override
168 public long getN() {
169 synchronized (statistics) {
170 return statistics.getN();
171 }
172 }
173
174 /**
175 * {@inheritDoc}. This version returns the standard deviation of all the
176 * aggregated data.
177 *
178 * @see StatisticalSummary#getStandardDeviation()
179 */
180 @Override
181 public double getStandardDeviation() {
182 synchronized (statistics) {
183 return statistics.getStandardDeviation();
184 }
185 }
186
187 /**
188 * {@inheritDoc}. This version returns a sum of all the aggregated data.
189 *
190 * @see StatisticalSummary#getSum()
191 */
192 @Override
193 public double getSum() {
194 synchronized (statistics) {
195 return statistics.getSum();
196 }
197 }
198
199 /**
200 * {@inheritDoc}. This version returns the variance of all the aggregated
201 * data.
202 *
203 * @see StatisticalSummary#getVariance()
204 */
205 @Override
206 public double getVariance() {
207 synchronized (statistics) {
208 return statistics.getVariance();
209 }
210 }
211
212 /**
213 * Returns the sum of the logs of all the aggregated data.
214 *
215 * @return the sum of logs
216 * @see SummaryStatistics#getSumOfLogs()
217 */
218 public double getSumOfLogs() {
219 synchronized (statistics) {
220 return statistics.getSumOfLogs();
221 }
222 }
223
224 /**
225 * Returns the geometric mean of all the aggregated data.
226 *
227 * @return the geometric mean
228 * @see SummaryStatistics#getGeometricMean()
229 */
230 public double getGeometricMean() {
231 synchronized (statistics) {
232 return statistics.getGeometricMean();
233 }
234 }
235
236 /**
237 * Returns the sum of the squares of all the aggregated data.
238 *
239 * @return The sum of squares
240 * @see SummaryStatistics#getSumsq()
241 */
242 public double getSumsq() {
243 synchronized (statistics) {
244 return statistics.getSumsq();
245 }
246 }
247
248 /**
249 * Returns a statistic related to the Second Central Moment. Specifically,
250 * what is returned is the sum of squared deviations from the sample mean
251 * among the all of the aggregated data.
252 *
253 * @return second central moment statistic
254 * @see SummaryStatistics#getSecondMoment()
255 */
256 public double getSecondMoment() {
257 synchronized (statistics) {
258 return statistics.getSecondMoment();
259 }
260 }
261
262 /**
263 * Return a {@link StatisticalSummaryValues} instance reporting current
264 * aggregate statistics.
265 *
266 * @return Current values of aggregate statistics
267 */
268 public StatisticalSummary getSummary() {
269 synchronized (statistics) {
270 return new StatisticalSummaryValues(getMean(), getVariance(), getN(),
271 getMax(), getMin(), getSum());
272 }
273 }
274
275 /**
276 * Creates and returns a {@code SummaryStatistics} whose data will be
277 * aggregated with those of this {@code AggregateSummaryStatistics}.
278 *
279 * @return a {@code SummaryStatistics} whose data will be aggregated with
280 * those of this {@code AggregateSummaryStatistics}. The initial state
281 * is a copy of the configured prototype statistics.
282 */
283 public SummaryStatistics createContributingStatistics() {
284 SummaryStatistics contributingStatistics
285 = new AggregatingSummaryStatistics(statistics);
286
287 // No try - catch or advertising NAE because neither argument will ever be null
288 SummaryStatistics.copy(statisticsPrototype, contributingStatistics);
289
290 return contributingStatistics;
291 }
292
293 /**
294 * Computes aggregate summary statistics. This method can be used to combine statistics
295 * computed over partitions or subsamples - i.e., the StatisticalSummaryValues returned
296 * should contain the same values that would have been obtained by computing a single
297 * StatisticalSummary over the combined dataset.
298 * <p>
299 * Returns null if the collection is empty or null.
300 * </p>
301 *
302 * @param statistics collection of SummaryStatistics to aggregate
303 * @return summary statistics for the combined dataset
304 */
305 public static StatisticalSummaryValues aggregate(Collection<? extends StatisticalSummary> statistics) {
306 if (statistics == null) {
307 return null;
308 }
309 Iterator<? extends StatisticalSummary> iterator = statistics.iterator();
310 if (!iterator.hasNext()) {
311 return null;
312 }
313 StatisticalSummary current = iterator.next();
314 long n = current.getN();
315 double min = current.getMin();
316 double sum = current.getSum();
317 double max = current.getMax();
318 double var = current.getVariance();
319 double m2 = var * (n - 1d);
320 double mean = current.getMean();
321 while (iterator.hasNext()) {
322 current = iterator.next();
323 if (current.getMin() < min || Double.isNaN(min)) {
324 min = current.getMin();
325 }
326 if (current.getMax() > max || Double.isNaN(max)) {
327 max = current.getMax();
328 }
329 sum += current.getSum();
330 final double oldN = n;
331 final double curN = current.getN();
332 n += curN;
333 final double meanDiff = current.getMean() - mean;
334 mean = sum / n;
335 final double curM2 = current.getVariance() * (curN - 1d);
336 m2 = m2 + curM2 + meanDiff * meanDiff * oldN * curN / n;
337 }
338 final double variance;
339 if (n == 0) {
340 variance = Double.NaN;
341 } else if (n == 1) {
342 variance = 0d;
343 } else {
344 variance = m2 / (n - 1);
345 }
346 return new StatisticalSummaryValues(mean, variance, n, max, min, sum);
347 }
348
349 /**
350 * A SummaryStatistics that also forwards all values added to it to a second
351 * {@code SummaryStatistics} for aggregation.
352 *
353 * @since 2.0
354 */
355 private static final class AggregatingSummaryStatistics extends SummaryStatistics {
356
357 /**
358 * The serialization version of this class.
359 */
360 private static final long serialVersionUID = 1L;
361
362 /**
363 * An additional SummaryStatistics into which values added to these.
364 * statistics (and possibly others) are aggregated
365 */
366 private final SummaryStatistics aggregateStatistics;
367
368 /**
369 * Initializes a new AggregatingSummaryStatistics with the specified.
370 * aggregate statistics object
371 *
372 * @param aggregateStatistics a {@code SummaryStatistics} into which
373 * values added to this statistics object should be aggregated
374 */
375 AggregatingSummaryStatistics(SummaryStatistics aggregateStatistics) {
376 this.aggregateStatistics = aggregateStatistics;
377 }
378
379 /**
380 * {@inheritDoc}. This version adds the provided value to the configured
381 * aggregate after adding it to these statistics.
382 *
383 * @see SummaryStatistics#addValue(double)
384 */
385 @Override
386 public void addValue(double value) {
387 super.addValue(value);
388 synchronized (aggregateStatistics) {
389 aggregateStatistics.addValue(value);
390 }
391 }
392
393 /**
394 * Returns true iff <code>object</code> is a
395 * <code>SummaryStatistics</code> instance and all statistics have the
396 * same values as this.
397 * @param object the object to test equality against.
398 * @return true if object equals this
399 */
400 @Override
401 public boolean equals(Object object) {
402 if (object == this) {
403 return true;
404 }
405 if (!(object instanceof AggregatingSummaryStatistics)) {
406 return false;
407 }
408 AggregatingSummaryStatistics stat = (AggregatingSummaryStatistics)object;
409 return super.equals(stat) &&
410 aggregateStatistics.equals(stat.aggregateStatistics);
411 }
412
413 /**
414 * Returns hash code based on values of statistics.
415 * @return hash code
416 */
417 @Override
418 public int hashCode() {
419 return 123 + super.hashCode() + aggregateStatistics.hashCode();
420 }
421 }
422 }