001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.math4.legacy.stat.descriptive; 019 020import java.util.Collection; 021import java.util.Iterator; 022 023import org.apache.commons.math4.legacy.exception.NullArgumentException; 024 025/** 026 * <p> 027 * An aggregator for {@code SummaryStatistics} from several data sets or 028 * data set partitions. In its simplest usage mode, the client creates an 029 * instance via the zero-argument constructor, then uses 030 * {@link #createContributingStatistics()} to obtain a {@code SummaryStatistics} 031 * for each individual data set / partition. The per-set statistics objects 032 * are used as normal, and at any time the aggregate statistics for all the 033 * contributors can be obtained from this object. 034 * </p><p> 035 * Clients with specialized requirements can use alternative constructors to 036 * control the statistics implementations and initial values used by the 037 * contributing and the internal aggregate {@code SummaryStatistics} objects. 038 * </p><p> 039 * A static {@link #aggregate(Collection)} method is also included that computes 040 * aggregate statistics directly from a Collection of SummaryStatistics instances. 041 * </p><p> 042 * When {@link #createContributingStatistics()} is used to create SummaryStatistics 043 * instances to be aggregated concurrently, the created instances' 044 * {@link SummaryStatistics#addValue(double)} methods must synchronize on the aggregating 045 * instance maintained by this class. In multithreaded environments, if the functionality 046 * provided by {@link #aggregate(Collection)} is adequate, that method should be used 047 * to avoid unnecessary computation and synchronization delays.</p> 048 * 049 * @since 2.0 050 * 051 */ 052public class AggregateSummaryStatistics implements StatisticalSummary { 053 /** 054 * A SummaryStatistics serving as a prototype for creating SummaryStatistics. 055 * contributing to this aggregate 056 */ 057 private final SummaryStatistics statisticsPrototype; 058 059 /** 060 * The SummaryStatistics in which aggregate statistics are accumulated. 061 */ 062 private final SummaryStatistics statistics; 063 064 /** 065 * Initializes a new AggregateSummaryStatistics with default statistics 066 * implementations. 067 * 068 */ 069 public AggregateSummaryStatistics() { 070 // No try-catch or throws NAE because arg is guaranteed non-null 071 this(new SummaryStatistics()); 072 } 073 074 /** 075 * Initializes a new AggregateSummaryStatistics with the specified statistics 076 * object as a prototype for contributing statistics and for the internal 077 * aggregate statistics. This provides for customized statistics implementations 078 * to be used by contributing and aggregate statistics. 079 * 080 * @param prototypeStatistics a {@code SummaryStatistics} serving as a 081 * prototype both for the internal aggregate statistics and for 082 * contributing statistics obtained via the 083 * {@code createContributingStatistics()} method. Being a prototype 084 * means that other objects are initialized by copying this object's state. 085 * If {@code null}, a new, default statistics object is used. Any statistic 086 * values in the prototype are propagated to contributing statistics 087 * objects and (once) into these aggregate statistics. 088 * @throws NullArgumentException if prototypeStatistics is null 089 * @see #createContributingStatistics() 090 */ 091 public AggregateSummaryStatistics(SummaryStatistics prototypeStatistics) throws NullArgumentException { 092 this(prototypeStatistics, 093 prototypeStatistics == null ? null : new SummaryStatistics(prototypeStatistics)); 094 } 095 096 /** 097 * Initializes a new AggregateSummaryStatistics with the specified statistics 098 * object as a prototype for contributing statistics and for the internal 099 * aggregate statistics. This provides for different statistics implementations 100 * to be used by contributing and aggregate statistics and for an initial 101 * state to be supplied for the aggregate statistics. 102 * 103 * @param prototypeStatistics a {@code SummaryStatistics} serving as a 104 * prototype both for the internal aggregate statistics and for 105 * contributing statistics obtained via the 106 * {@code createContributingStatistics()} method. Being a prototype 107 * means that other objects are initialized by copying this object's state. 108 * If {@code null}, a new, default statistics object is used. Any statistic 109 * values in the prototype are propagated to contributing statistics 110 * objects, but not into these aggregate statistics. 111 * @param initialStatistics a {@code SummaryStatistics} to serve as the 112 * internal aggregate statistics object. If {@code null}, a new, default 113 * statistics object is used. 114 * @see #createContributingStatistics() 115 */ 116 public AggregateSummaryStatistics(SummaryStatistics prototypeStatistics, 117 SummaryStatistics initialStatistics) { 118 this.statisticsPrototype = 119 (prototypeStatistics == null) ? new SummaryStatistics() : prototypeStatistics; 120 this.statistics = 121 (initialStatistics == null) ? new SummaryStatistics() : initialStatistics; 122 } 123 124 /** 125 * {@inheritDoc}. This version returns the maximum over all the aggregated 126 * data. 127 * 128 * @see StatisticalSummary#getMax() 129 */ 130 @Override 131 public double getMax() { 132 synchronized (statistics) { 133 return statistics.getMax(); 134 } 135 } 136 137 /** 138 * {@inheritDoc}. This version returns the mean of all the aggregated data. 139 * 140 * @see StatisticalSummary#getMean() 141 */ 142 @Override 143 public double getMean() { 144 synchronized (statistics) { 145 return statistics.getMean(); 146 } 147 } 148 149 /** 150 * {@inheritDoc}. This version returns the minimum over all the aggregated 151 * data. 152 * 153 * @see StatisticalSummary#getMin() 154 */ 155 @Override 156 public double getMin() { 157 synchronized (statistics) { 158 return statistics.getMin(); 159 } 160 } 161 162 /** 163 * {@inheritDoc}. This version returns a count of all the aggregated data. 164 * 165 * @see StatisticalSummary#getN() 166 */ 167 @Override 168 public long getN() { 169 synchronized (statistics) { 170 return statistics.getN(); 171 } 172 } 173 174 /** 175 * {@inheritDoc}. This version returns the standard deviation of all the 176 * aggregated data. 177 * 178 * @see StatisticalSummary#getStandardDeviation() 179 */ 180 @Override 181 public double getStandardDeviation() { 182 synchronized (statistics) { 183 return statistics.getStandardDeviation(); 184 } 185 } 186 187 /** 188 * {@inheritDoc}. This version returns a sum of all the aggregated data. 189 * 190 * @see StatisticalSummary#getSum() 191 */ 192 @Override 193 public double getSum() { 194 synchronized (statistics) { 195 return statistics.getSum(); 196 } 197 } 198 199 /** 200 * {@inheritDoc}. This version returns the variance of all the aggregated 201 * data. 202 * 203 * @see StatisticalSummary#getVariance() 204 */ 205 @Override 206 public double getVariance() { 207 synchronized (statistics) { 208 return statistics.getVariance(); 209 } 210 } 211 212 /** 213 * Returns the sum of the logs of all the aggregated data. 214 * 215 * @return the sum of logs 216 * @see SummaryStatistics#getSumOfLogs() 217 */ 218 public double getSumOfLogs() { 219 synchronized (statistics) { 220 return statistics.getSumOfLogs(); 221 } 222 } 223 224 /** 225 * Returns the geometric mean of all the aggregated data. 226 * 227 * @return the geometric mean 228 * @see SummaryStatistics#getGeometricMean() 229 */ 230 public double getGeometricMean() { 231 synchronized (statistics) { 232 return statistics.getGeometricMean(); 233 } 234 } 235 236 /** 237 * Returns the sum of the squares of all the aggregated data. 238 * 239 * @return The sum of squares 240 * @see SummaryStatistics#getSumsq() 241 */ 242 public double getSumsq() { 243 synchronized (statistics) { 244 return statistics.getSumsq(); 245 } 246 } 247 248 /** 249 * Returns a statistic related to the Second Central Moment. Specifically, 250 * what is returned is the sum of squared deviations from the sample mean 251 * among the all of the aggregated data. 252 * 253 * @return second central moment statistic 254 * @see SummaryStatistics#getSecondMoment() 255 */ 256 public double getSecondMoment() { 257 synchronized (statistics) { 258 return statistics.getSecondMoment(); 259 } 260 } 261 262 /** 263 * Return a {@link StatisticalSummaryValues} instance reporting current 264 * aggregate statistics. 265 * 266 * @return Current values of aggregate statistics 267 */ 268 public StatisticalSummary getSummary() { 269 synchronized (statistics) { 270 return new StatisticalSummaryValues(getMean(), getVariance(), getN(), 271 getMax(), getMin(), getSum()); 272 } 273 } 274 275 /** 276 * Creates and returns a {@code SummaryStatistics} whose data will be 277 * aggregated with those of this {@code AggregateSummaryStatistics}. 278 * 279 * @return a {@code SummaryStatistics} whose data will be aggregated with 280 * those of this {@code AggregateSummaryStatistics}. The initial state 281 * is a copy of the configured prototype statistics. 282 */ 283 public SummaryStatistics createContributingStatistics() { 284 SummaryStatistics contributingStatistics 285 = new AggregatingSummaryStatistics(statistics); 286 287 // No try - catch or advertising NAE because neither argument will ever be null 288 SummaryStatistics.copy(statisticsPrototype, contributingStatistics); 289 290 return contributingStatistics; 291 } 292 293 /** 294 * Computes aggregate summary statistics. This method can be used to combine statistics 295 * computed over partitions or subsamples - i.e., the StatisticalSummaryValues returned 296 * should contain the same values that would have been obtained by computing a single 297 * StatisticalSummary over the combined dataset. 298 * <p> 299 * Returns null if the collection is empty or null. 300 * </p> 301 * 302 * @param statistics collection of SummaryStatistics to aggregate 303 * @return summary statistics for the combined dataset 304 */ 305 public static StatisticalSummaryValues aggregate(Collection<? extends StatisticalSummary> statistics) { 306 if (statistics == null) { 307 return null; 308 } 309 Iterator<? extends StatisticalSummary> iterator = statistics.iterator(); 310 if (!iterator.hasNext()) { 311 return null; 312 } 313 StatisticalSummary current = iterator.next(); 314 long n = current.getN(); 315 double min = current.getMin(); 316 double sum = current.getSum(); 317 double max = current.getMax(); 318 double var = current.getVariance(); 319 double m2 = var * (n - 1d); 320 double mean = current.getMean(); 321 while (iterator.hasNext()) { 322 current = iterator.next(); 323 if (current.getMin() < min || Double.isNaN(min)) { 324 min = current.getMin(); 325 } 326 if (current.getMax() > max || Double.isNaN(max)) { 327 max = current.getMax(); 328 } 329 sum += current.getSum(); 330 final double oldN = n; 331 final double curN = current.getN(); 332 n += curN; 333 final double meanDiff = current.getMean() - mean; 334 mean = sum / n; 335 final double curM2 = current.getVariance() * (curN - 1d); 336 m2 = m2 + curM2 + meanDiff * meanDiff * oldN * curN / n; 337 } 338 final double variance; 339 if (n == 0) { 340 variance = Double.NaN; 341 } else if (n == 1) { 342 variance = 0d; 343 } else { 344 variance = m2 / (n - 1); 345 } 346 return new StatisticalSummaryValues(mean, variance, n, max, min, sum); 347 } 348 349 /** 350 * A SummaryStatistics that also forwards all values added to it to a second 351 * {@code SummaryStatistics} for aggregation. 352 * 353 * @since 2.0 354 */ 355 private static class AggregatingSummaryStatistics extends SummaryStatistics { 356 357 /** 358 * The serialization version of this class. 359 */ 360 private static final long serialVersionUID = 1L; 361 362 /** 363 * An additional SummaryStatistics into which values added to these. 364 * statistics (and possibly others) are aggregated 365 */ 366 private final SummaryStatistics aggregateStatistics; 367 368 /** 369 * Initializes a new AggregatingSummaryStatistics with the specified. 370 * aggregate statistics object 371 * 372 * @param aggregateStatistics a {@code SummaryStatistics} into which 373 * values added to this statistics object should be aggregated 374 */ 375 AggregatingSummaryStatistics(SummaryStatistics aggregateStatistics) { 376 this.aggregateStatistics = aggregateStatistics; 377 } 378 379 /** 380 * {@inheritDoc}. This version adds the provided value to the configured 381 * aggregate after adding it to these statistics. 382 * 383 * @see SummaryStatistics#addValue(double) 384 */ 385 @Override 386 public void addValue(double value) { 387 super.addValue(value); 388 synchronized (aggregateStatistics) { 389 aggregateStatistics.addValue(value); 390 } 391 } 392 393 /** 394 * Returns true iff <code>object</code> is a 395 * <code>SummaryStatistics</code> instance and all statistics have the 396 * same values as this. 397 * @param object the object to test equality against. 398 * @return true if object equals this 399 */ 400 @Override 401 public boolean equals(Object object) { 402 if (object == this) { 403 return true; 404 } 405 if (!(object instanceof AggregatingSummaryStatistics)) { 406 return false; 407 } 408 AggregatingSummaryStatistics stat = (AggregatingSummaryStatistics)object; 409 return super.equals(stat) && 410 aggregateStatistics.equals(stat.aggregateStatistics); 411 } 412 413 /** 414 * Returns hash code based on values of statistics. 415 * @return hash code 416 */ 417 @Override 418 public int hashCode() { 419 return 123 + super.hashCode() + aggregateStatistics.hashCode(); 420 } 421 } 422}