001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.math3.stat.descriptive; 019 020import java.io.Serializable; 021import java.util.Collection; 022import java.util.Iterator; 023 024import org.apache.commons.math3.exception.NullArgumentException; 025 026/** 027 * <p> 028 * An aggregator for {@code SummaryStatistics} from several data sets or 029 * data set partitions. In its simplest usage mode, the client creates an 030 * instance via the zero-argument constructor, then uses 031 * {@link #createContributingStatistics()} to obtain a {@code SummaryStatistics} 032 * for each individual data set / partition. The per-set statistics objects 033 * are used as normal, and at any time the aggregate statistics for all the 034 * contributors can be obtained from this object. 035 * </p><p> 036 * Clients with specialized requirements can use alternative constructors to 037 * control the statistics implementations and initial values used by the 038 * contributing and the internal aggregate {@code SummaryStatistics} objects. 039 * </p><p> 040 * A static {@link #aggregate(Collection)} method is also included that computes 041 * aggregate statistics directly from a Collection of SummaryStatistics instances. 042 * </p><p> 043 * When {@link #createContributingStatistics()} is used to create SummaryStatistics 044 * instances to be aggregated concurrently, the created instances' 045 * {@link SummaryStatistics#addValue(double)} methods must synchronize on the aggregating 046 * instance maintained by this class. In multithreaded environments, if the functionality 047 * provided by {@link #aggregate(Collection)} is adequate, that method should be used 048 * to avoid unnecessary computation and synchronization delays.</p> 049 * 050 * @since 2.0 051 * 052 */ 053public class AggregateSummaryStatistics implements StatisticalSummary, 054 Serializable { 055 056 057 /** Serializable version identifier */ 058 private static final long serialVersionUID = -8207112444016386906L; 059 060 /** 061 * A SummaryStatistics serving as a prototype for creating SummaryStatistics 062 * contributing to this aggregate 063 */ 064 private final SummaryStatistics statisticsPrototype; 065 066 /** 067 * The SummaryStatistics in which aggregate statistics are accumulated. 068 */ 069 private final SummaryStatistics statistics; 070 071 /** 072 * Initializes a new AggregateSummaryStatistics with default statistics 073 * implementations. 074 * 075 */ 076 public AggregateSummaryStatistics() { 077 // No try-catch or throws NAE because arg is guaranteed non-null 078 this(new SummaryStatistics()); 079 } 080 081 /** 082 * Initializes a new AggregateSummaryStatistics with the specified statistics 083 * object as a prototype for contributing statistics and for the internal 084 * aggregate statistics. This provides for customized statistics implementations 085 * to be used by contributing and aggregate statistics. 086 * 087 * @param prototypeStatistics a {@code SummaryStatistics} serving as a 088 * prototype both for the internal aggregate statistics and for 089 * contributing statistics obtained via the 090 * {@code createContributingStatistics()} method. Being a prototype 091 * means that other objects are initialized by copying this object's state. 092 * If {@code null}, a new, default statistics object is used. Any statistic 093 * values in the prototype are propagated to contributing statistics 094 * objects and (once) into these aggregate statistics. 095 * @throws NullArgumentException if prototypeStatistics is null 096 * @see #createContributingStatistics() 097 */ 098 public AggregateSummaryStatistics(SummaryStatistics prototypeStatistics) throws NullArgumentException { 099 this(prototypeStatistics, 100 prototypeStatistics == null ? null : new SummaryStatistics(prototypeStatistics)); 101 } 102 103 /** 104 * Initializes a new AggregateSummaryStatistics with the specified statistics 105 * object as a prototype for contributing statistics and for the internal 106 * aggregate statistics. This provides for different statistics implementations 107 * to be used by contributing and aggregate statistics and for an initial 108 * state to be supplied for the aggregate statistics. 109 * 110 * @param prototypeStatistics a {@code SummaryStatistics} serving as a 111 * prototype both for the internal aggregate statistics and for 112 * contributing statistics obtained via the 113 * {@code createContributingStatistics()} method. Being a prototype 114 * means that other objects are initialized by copying this object's state. 115 * If {@code null}, a new, default statistics object is used. Any statistic 116 * values in the prototype are propagated to contributing statistics 117 * objects, but not into these aggregate statistics. 118 * @param initialStatistics a {@code SummaryStatistics} to serve as the 119 * internal aggregate statistics object. If {@code null}, a new, default 120 * statistics object is used. 121 * @see #createContributingStatistics() 122 */ 123 public AggregateSummaryStatistics(SummaryStatistics prototypeStatistics, 124 SummaryStatistics initialStatistics) { 125 this.statisticsPrototype = 126 (prototypeStatistics == null) ? new SummaryStatistics() : prototypeStatistics; 127 this.statistics = 128 (initialStatistics == null) ? new SummaryStatistics() : initialStatistics; 129 } 130 131 /** 132 * {@inheritDoc}. This version returns the maximum over all the aggregated 133 * data. 134 * 135 * @see StatisticalSummary#getMax() 136 */ 137 public double getMax() { 138 synchronized (statistics) { 139 return statistics.getMax(); 140 } 141 } 142 143 /** 144 * {@inheritDoc}. This version returns the mean of all the aggregated data. 145 * 146 * @see StatisticalSummary#getMean() 147 */ 148 public double getMean() { 149 synchronized (statistics) { 150 return statistics.getMean(); 151 } 152 } 153 154 /** 155 * {@inheritDoc}. This version returns the minimum over all the aggregated 156 * data. 157 * 158 * @see StatisticalSummary#getMin() 159 */ 160 public double getMin() { 161 synchronized (statistics) { 162 return statistics.getMin(); 163 } 164 } 165 166 /** 167 * {@inheritDoc}. This version returns a count of all the aggregated data. 168 * 169 * @see StatisticalSummary#getN() 170 */ 171 public long getN() { 172 synchronized (statistics) { 173 return statistics.getN(); 174 } 175 } 176 177 /** 178 * {@inheritDoc}. This version returns the standard deviation of all the 179 * aggregated data. 180 * 181 * @see StatisticalSummary#getStandardDeviation() 182 */ 183 public double getStandardDeviation() { 184 synchronized (statistics) { 185 return statistics.getStandardDeviation(); 186 } 187 } 188 189 /** 190 * {@inheritDoc}. This version returns a sum of all the aggregated data. 191 * 192 * @see StatisticalSummary#getSum() 193 */ 194 public double getSum() { 195 synchronized (statistics) { 196 return statistics.getSum(); 197 } 198 } 199 200 /** 201 * {@inheritDoc}. This version returns the variance of all the aggregated 202 * data. 203 * 204 * @see StatisticalSummary#getVariance() 205 */ 206 public double getVariance() { 207 synchronized (statistics) { 208 return statistics.getVariance(); 209 } 210 } 211 212 /** 213 * Returns the sum of the logs of all the aggregated data. 214 * 215 * @return the sum of logs 216 * @see SummaryStatistics#getSumOfLogs() 217 */ 218 public double getSumOfLogs() { 219 synchronized (statistics) { 220 return statistics.getSumOfLogs(); 221 } 222 } 223 224 /** 225 * Returns the geometric mean of all the aggregated data. 226 * 227 * @return the geometric mean 228 * @see SummaryStatistics#getGeometricMean() 229 */ 230 public double getGeometricMean() { 231 synchronized (statistics) { 232 return statistics.getGeometricMean(); 233 } 234 } 235 236 /** 237 * Returns the sum of the squares of all the aggregated data. 238 * 239 * @return The sum of squares 240 * @see SummaryStatistics#getSumsq() 241 */ 242 public double getSumsq() { 243 synchronized (statistics) { 244 return statistics.getSumsq(); 245 } 246 } 247 248 /** 249 * Returns a statistic related to the Second Central Moment. Specifically, 250 * what is returned is the sum of squared deviations from the sample mean 251 * among the all of the aggregated data. 252 * 253 * @return second central moment statistic 254 * @see SummaryStatistics#getSecondMoment() 255 */ 256 public double getSecondMoment() { 257 synchronized (statistics) { 258 return statistics.getSecondMoment(); 259 } 260 } 261 262 /** 263 * Return a {@link StatisticalSummaryValues} instance reporting current 264 * aggregate statistics. 265 * 266 * @return Current values of aggregate statistics 267 */ 268 public StatisticalSummary getSummary() { 269 synchronized (statistics) { 270 return new StatisticalSummaryValues(getMean(), getVariance(), getN(), 271 getMax(), getMin(), getSum()); 272 } 273 } 274 275 /** 276 * Creates and returns a {@code SummaryStatistics} whose data will be 277 * aggregated with those of this {@code AggregateSummaryStatistics}. 278 * 279 * @return a {@code SummaryStatistics} whose data will be aggregated with 280 * those of this {@code AggregateSummaryStatistics}. The initial state 281 * is a copy of the configured prototype statistics. 282 */ 283 public SummaryStatistics createContributingStatistics() { 284 SummaryStatistics contributingStatistics 285 = new AggregatingSummaryStatistics(statistics); 286 287 // No try - catch or advertising NAE because neither argument will ever be null 288 SummaryStatistics.copy(statisticsPrototype, contributingStatistics); 289 290 return contributingStatistics; 291 } 292 293 /** 294 * Computes aggregate summary statistics. This method can be used to combine statistics 295 * computed over partitions or subsamples - i.e., the StatisticalSummaryValues returned 296 * should contain the same values that would have been obtained by computing a single 297 * StatisticalSummary over the combined dataset. 298 * <p> 299 * Returns null if the collection is empty or null. 300 * </p> 301 * 302 * @param statistics collection of SummaryStatistics to aggregate 303 * @return summary statistics for the combined dataset 304 */ 305 public static StatisticalSummaryValues aggregate(Collection<? extends StatisticalSummary> statistics) { 306 if (statistics == null) { 307 return null; 308 } 309 Iterator<? extends StatisticalSummary> iterator = statistics.iterator(); 310 if (!iterator.hasNext()) { 311 return null; 312 } 313 StatisticalSummary current = iterator.next(); 314 long n = current.getN(); 315 double min = current.getMin(); 316 double sum = current.getSum(); 317 double max = current.getMax(); 318 double var = current.getVariance(); 319 double m2 = var * (n - 1d); 320 double mean = current.getMean(); 321 while (iterator.hasNext()) { 322 current = iterator.next(); 323 if (current.getMin() < min || Double.isNaN(min)) { 324 min = current.getMin(); 325 } 326 if (current.getMax() > max || Double.isNaN(max)) { 327 max = current.getMax(); 328 } 329 sum += current.getSum(); 330 final double oldN = n; 331 final double curN = current.getN(); 332 n += curN; 333 final double meanDiff = current.getMean() - mean; 334 mean = sum / n; 335 final double curM2 = current.getVariance() * (curN - 1d); 336 m2 = m2 + curM2 + meanDiff * meanDiff * oldN * curN / n; 337 } 338 final double variance; 339 if (n == 0) { 340 variance = Double.NaN; 341 } else if (n == 1) { 342 variance = 0d; 343 } else { 344 variance = m2 / (n - 1); 345 } 346 return new StatisticalSummaryValues(mean, variance, n, max, min, sum); 347 } 348 349 /** 350 * A SummaryStatistics that also forwards all values added to it to a second 351 * {@code SummaryStatistics} for aggregation. 352 * 353 * @since 2.0 354 */ 355 private static class AggregatingSummaryStatistics extends SummaryStatistics { 356 357 /** 358 * The serialization version of this class 359 */ 360 private static final long serialVersionUID = 1L; 361 362 /** 363 * An additional SummaryStatistics into which values added to these 364 * statistics (and possibly others) are aggregated 365 */ 366 private final SummaryStatistics aggregateStatistics; 367 368 /** 369 * Initializes a new AggregatingSummaryStatistics with the specified 370 * aggregate statistics object 371 * 372 * @param aggregateStatistics a {@code SummaryStatistics} into which 373 * values added to this statistics object should be aggregated 374 */ 375 AggregatingSummaryStatistics(SummaryStatistics aggregateStatistics) { 376 this.aggregateStatistics = aggregateStatistics; 377 } 378 379 /** 380 * {@inheritDoc}. This version adds the provided value to the configured 381 * aggregate after adding it to these statistics. 382 * 383 * @see SummaryStatistics#addValue(double) 384 */ 385 @Override 386 public void addValue(double value) { 387 super.addValue(value); 388 synchronized (aggregateStatistics) { 389 aggregateStatistics.addValue(value); 390 } 391 } 392 393 /** 394 * Returns true iff <code>object</code> is a 395 * <code>SummaryStatistics</code> instance and all statistics have the 396 * same values as this. 397 * @param object the object to test equality against. 398 * @return true if object equals this 399 */ 400 @Override 401 public boolean equals(Object object) { 402 if (object == this) { 403 return true; 404 } 405 if (object instanceof AggregatingSummaryStatistics == false) { 406 return false; 407 } 408 AggregatingSummaryStatistics stat = (AggregatingSummaryStatistics)object; 409 return super.equals(stat) && 410 aggregateStatistics.equals(stat.aggregateStatistics); 411 } 412 413 /** 414 * Returns hash code based on values of statistics 415 * @return hash code 416 */ 417 @Override 418 public int hashCode() { 419 return 123 + super.hashCode() + aggregateStatistics.hashCode(); 420 } 421 } 422}