001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017 package org.apache.commons.math.stat.descriptive;
018
019 import java.io.Serializable;
020
021 import org.apache.commons.math.MathRuntimeException;
022 import org.apache.commons.math.exception.NullArgumentException;
023 import org.apache.commons.math.exception.util.LocalizedFormats;
024 import org.apache.commons.math.stat.descriptive.moment.GeometricMean;
025 import org.apache.commons.math.stat.descriptive.moment.Mean;
026 import org.apache.commons.math.stat.descriptive.moment.SecondMoment;
027 import org.apache.commons.math.stat.descriptive.moment.Variance;
028 import org.apache.commons.math.stat.descriptive.rank.Max;
029 import org.apache.commons.math.stat.descriptive.rank.Min;
030 import org.apache.commons.math.stat.descriptive.summary.Sum;
031 import org.apache.commons.math.stat.descriptive.summary.SumOfLogs;
032 import org.apache.commons.math.stat.descriptive.summary.SumOfSquares;
033 import org.apache.commons.math.util.MathUtils;
034 import org.apache.commons.math.util.Precision;
035 import org.apache.commons.math.util.FastMath;
036
037 /**
038 * <p>
039 * Computes summary statistics for a stream of data values added using the
040 * {@link #addValue(double) addValue} method. The data values are not stored in
041 * memory, so this class can be used to compute statistics for very large data
042 * streams.
043 * </p>
044 * <p>
045 * The {@link StorelessUnivariateStatistic} instances used to maintain summary
046 * state and compute statistics are configurable via setters. For example, the
047 * default implementation for the variance can be overridden by calling
048 * {@link #setVarianceImpl(StorelessUnivariateStatistic)}. Actual parameters to
049 * these methods must implement the {@link StorelessUnivariateStatistic}
050 * interface and configuration must be completed before <code>addValue</code>
051 * is called. No configuration is necessary to use the default, commons-math
052 * provided implementations.
053 * </p>
054 * <p>
055 * Note: This class is not thread-safe. Use
056 * {@link SynchronizedSummaryStatistics} if concurrent access from multiple
057 * threads is required.
058 * </p>
059 * @version $Id: SummaryStatistics.java 1181282 2011-10-10 22:35:54Z erans $
060 */
061 public class SummaryStatistics implements StatisticalSummary, Serializable {
062
063 /** Serialization UID */
064 private static final long serialVersionUID = -2021321786743555871L;
065
066 /** count of values that have been added */
067 protected long n = 0;
068
069 /** SecondMoment is used to compute the mean and variance */
070 protected SecondMoment secondMoment = new SecondMoment();
071
072 /** sum of values that have been added */
073 protected Sum sum = new Sum();
074
075 /** sum of the square of each value that has been added */
076 protected SumOfSquares sumsq = new SumOfSquares();
077
078 /** min of values that have been added */
079 protected Min min = new Min();
080
081 /** max of values that have been added */
082 protected Max max = new Max();
083
084 /** sumLog of values that have been added */
085 protected SumOfLogs sumLog = new SumOfLogs();
086
087 /** geoMean of values that have been added */
088 protected GeometricMean geoMean = new GeometricMean(sumLog);
089
090 /** mean of values that have been added */
091 protected Mean mean = new Mean();
092
093 /** variance of values that have been added */
094 protected Variance variance = new Variance();
095
096 /** Sum statistic implementation - can be reset by setter. */
097 private StorelessUnivariateStatistic sumImpl = sum;
098
099 /** Sum of squares statistic implementation - can be reset by setter. */
100 private StorelessUnivariateStatistic sumsqImpl = sumsq;
101
102 /** Minimum statistic implementation - can be reset by setter. */
103 private StorelessUnivariateStatistic minImpl = min;
104
105 /** Maximum statistic implementation - can be reset by setter. */
106 private StorelessUnivariateStatistic maxImpl = max;
107
108 /** Sum of log statistic implementation - can be reset by setter. */
109 private StorelessUnivariateStatistic sumLogImpl = sumLog;
110
111 /** Geometric mean statistic implementation - can be reset by setter. */
112 private StorelessUnivariateStatistic geoMeanImpl = geoMean;
113
114 /** Mean statistic implementation - can be reset by setter. */
115 private StorelessUnivariateStatistic meanImpl = mean;
116
117 /** Variance statistic implementation - can be reset by setter. */
118 private StorelessUnivariateStatistic varianceImpl = variance;
119
120 /**
121 * Construct a SummaryStatistics instance
122 */
123 public SummaryStatistics() {
124 }
125
126 /**
127 * A copy constructor. Creates a deep-copy of the {@code original}.
128 *
129 * @param original the {@code SummaryStatistics} instance to copy
130 */
131 public SummaryStatistics(SummaryStatistics original) {
132 copy(original, this);
133 }
134
135 /**
136 * Return a {@link StatisticalSummaryValues} instance reporting current
137 * statistics.
138 * @return Current values of statistics
139 */
140 public StatisticalSummary getSummary() {
141 return new StatisticalSummaryValues(getMean(), getVariance(), getN(),
142 getMax(), getMin(), getSum());
143 }
144
145 /**
146 * Add a value to the data
147 * @param value the value to add
148 */
149 public void addValue(double value) {
150 sumImpl.increment(value);
151 sumsqImpl.increment(value);
152 minImpl.increment(value);
153 maxImpl.increment(value);
154 sumLogImpl.increment(value);
155 secondMoment.increment(value);
156 // If mean, variance or geomean have been overridden,
157 // need to increment these
158 if (!(meanImpl instanceof Mean)) {
159 meanImpl.increment(value);
160 }
161 if (!(varianceImpl instanceof Variance)) {
162 varianceImpl.increment(value);
163 }
164 if (!(geoMeanImpl instanceof GeometricMean)) {
165 geoMeanImpl.increment(value);
166 }
167 n++;
168 }
169
170 /**
171 * Returns the number of available values
172 * @return The number of available values
173 */
174 public long getN() {
175 return n;
176 }
177
178 /**
179 * Returns the sum of the values that have been added
180 * @return The sum or <code>Double.NaN</code> if no values have been added
181 */
182 public double getSum() {
183 return sumImpl.getResult();
184 }
185
186 /**
187 * Returns the sum of the squares of the values that have been added.
188 * <p>
189 * Double.NaN is returned if no values have been added.
190 * </p>
191 * @return The sum of squares
192 */
193 public double getSumsq() {
194 return sumsqImpl.getResult();
195 }
196
197 /**
198 * Returns the mean of the values that have been added.
199 * <p>
200 * Double.NaN is returned if no values have been added.
201 * </p>
202 * @return the mean
203 */
204 public double getMean() {
205 if (mean == meanImpl) {
206 return new Mean(secondMoment).getResult();
207 } else {
208 return meanImpl.getResult();
209 }
210 }
211
212 /**
213 * Returns the standard deviation of the values that have been added.
214 * <p>
215 * Double.NaN is returned if no values have been added.
216 * </p>
217 * @return the standard deviation
218 */
219 public double getStandardDeviation() {
220 double stdDev = Double.NaN;
221 if (getN() > 0) {
222 if (getN() > 1) {
223 stdDev = FastMath.sqrt(getVariance());
224 } else {
225 stdDev = 0.0;
226 }
227 }
228 return stdDev;
229 }
230
231 /**
232 * Returns the variance of the values that have been added.
233 * <p>
234 * Double.NaN is returned if no values have been added.
235 * </p>
236 * @return the variance
237 */
238 public double getVariance() {
239 if (varianceImpl == variance) {
240 return new Variance(secondMoment).getResult();
241 } else {
242 return varianceImpl.getResult();
243 }
244 }
245
246 /**
247 * Returns the maximum of the values that have been added.
248 * <p>
249 * Double.NaN is returned if no values have been added.
250 * </p>
251 * @return the maximum
252 */
253 public double getMax() {
254 return maxImpl.getResult();
255 }
256
257 /**
258 * Returns the minimum of the values that have been added.
259 * <p>
260 * Double.NaN is returned if no values have been added.
261 * </p>
262 * @return the minimum
263 */
264 public double getMin() {
265 return minImpl.getResult();
266 }
267
268 /**
269 * Returns the geometric mean of the values that have been added.
270 * <p>
271 * Double.NaN is returned if no values have been added.
272 * </p>
273 * @return the geometric mean
274 */
275 public double getGeometricMean() {
276 return geoMeanImpl.getResult();
277 }
278
279 /**
280 * Returns the sum of the logs of the values that have been added.
281 * <p>
282 * Double.NaN is returned if no values have been added.
283 * </p>
284 * @return the sum of logs
285 * @since 1.2
286 */
287 public double getSumOfLogs() {
288 return sumLogImpl.getResult();
289 }
290
291 /**
292 * Returns a statistic related to the Second Central Moment. Specifically,
293 * what is returned is the sum of squared deviations from the sample mean
294 * among the values that have been added.
295 * <p>
296 * Returns <code>Double.NaN</code> if no data values have been added and
297 * returns <code>0</code> if there is just one value in the data set.</p>
298 * <p>
299 * @return second central moment statistic
300 * @since 2.0
301 */
302 public double getSecondMoment() {
303 return secondMoment.getResult();
304 }
305
306 /**
307 * Generates a text report displaying summary statistics from values that
308 * have been added.
309 * @return String with line feeds displaying statistics
310 * @since 1.2
311 */
312 @Override
313 public String toString() {
314 StringBuilder outBuffer = new StringBuilder();
315 String endl = "\n";
316 outBuffer.append("SummaryStatistics:").append(endl);
317 outBuffer.append("n: ").append(getN()).append(endl);
318 outBuffer.append("min: ").append(getMin()).append(endl);
319 outBuffer.append("max: ").append(getMax()).append(endl);
320 outBuffer.append("mean: ").append(getMean()).append(endl);
321 outBuffer.append("geometric mean: ").append(getGeometricMean())
322 .append(endl);
323 outBuffer.append("variance: ").append(getVariance()).append(endl);
324 outBuffer.append("sum of squares: ").append(getSumsq()).append(endl);
325 outBuffer.append("standard deviation: ").append(getStandardDeviation())
326 .append(endl);
327 outBuffer.append("sum of logs: ").append(getSumOfLogs()).append(endl);
328 return outBuffer.toString();
329 }
330
331 /**
332 * Resets all statistics and storage
333 */
334 public void clear() {
335 this.n = 0;
336 minImpl.clear();
337 maxImpl.clear();
338 sumImpl.clear();
339 sumLogImpl.clear();
340 sumsqImpl.clear();
341 geoMeanImpl.clear();
342 secondMoment.clear();
343 if (meanImpl != mean) {
344 meanImpl.clear();
345 }
346 if (varianceImpl != variance) {
347 varianceImpl.clear();
348 }
349 }
350
351 /**
352 * Returns true iff <code>object</code> is a
353 * <code>SummaryStatistics</code> instance and all statistics have the
354 * same values as this.
355 * @param object the object to test equality against.
356 * @return true if object equals this
357 */
358 @Override
359 public boolean equals(Object object) {
360 if (object == this) {
361 return true;
362 }
363 if (object instanceof SummaryStatistics == false) {
364 return false;
365 }
366 SummaryStatistics stat = (SummaryStatistics)object;
367 return Precision.equalsIncludingNaN(stat.getGeometricMean(), getGeometricMean()) &&
368 Precision.equalsIncludingNaN(stat.getMax(), getMax()) &&
369 Precision.equalsIncludingNaN(stat.getMean(), getMean()) &&
370 Precision.equalsIncludingNaN(stat.getMin(), getMin()) &&
371 Precision.equalsIncludingNaN(stat.getN(), getN()) &&
372 Precision.equalsIncludingNaN(stat.getSum(), getSum()) &&
373 Precision.equalsIncludingNaN(stat.getSumsq(), getSumsq()) &&
374 Precision.equalsIncludingNaN(stat.getVariance(), getVariance());
375 }
376
377 /**
378 * Returns hash code based on values of statistics
379 * @return hash code
380 */
381 @Override
382 public int hashCode() {
383 int result = 31 + MathUtils.hash(getGeometricMean());
384 result = result * 31 + MathUtils.hash(getGeometricMean());
385 result = result * 31 + MathUtils.hash(getMax());
386 result = result * 31 + MathUtils.hash(getMean());
387 result = result * 31 + MathUtils.hash(getMin());
388 result = result * 31 + MathUtils.hash(getN());
389 result = result * 31 + MathUtils.hash(getSum());
390 result = result * 31 + MathUtils.hash(getSumsq());
391 result = result * 31 + MathUtils.hash(getVariance());
392 return result;
393 }
394
395 // Getters and setters for statistics implementations
396 /**
397 * Returns the currently configured Sum implementation
398 * @return the StorelessUnivariateStatistic implementing the sum
399 * @since 1.2
400 */
401 public StorelessUnivariateStatistic getSumImpl() {
402 return sumImpl;
403 }
404
405 /**
406 * <p>
407 * Sets the implementation for the Sum.
408 * </p>
409 * <p>
410 * This method must be activated before any data has been added - i.e.,
411 * before {@link #addValue(double) addValue} has been used to add data;
412 * otherwise an IllegalStateException will be thrown.
413 * </p>
414 * @param sumImpl the StorelessUnivariateStatistic instance to use for
415 * computing the Sum
416 * @throws IllegalStateException if data has already been added (i.e if n >
417 * 0)
418 * @since 1.2
419 */
420 public void setSumImpl(StorelessUnivariateStatistic sumImpl) {
421 checkEmpty();
422 this.sumImpl = sumImpl;
423 }
424
425 /**
426 * Returns the currently configured sum of squares implementation
427 * @return the StorelessUnivariateStatistic implementing the sum of squares
428 * @since 1.2
429 */
430 public StorelessUnivariateStatistic getSumsqImpl() {
431 return sumsqImpl;
432 }
433
434 /**
435 * <p>
436 * Sets the implementation for the sum of squares.
437 * </p>
438 * <p>
439 * This method must be activated before any data has been added - i.e.,
440 * before {@link #addValue(double) addValue} has been used to add data;
441 * otherwise an IllegalStateException will be thrown.
442 * </p>
443 * @param sumsqImpl the StorelessUnivariateStatistic instance to use for
444 * computing the sum of squares
445 * @throws IllegalStateException if data has already been added (i.e if n >
446 * 0)
447 * @since 1.2
448 */
449 public void setSumsqImpl(StorelessUnivariateStatistic sumsqImpl) {
450 checkEmpty();
451 this.sumsqImpl = sumsqImpl;
452 }
453
454 /**
455 * Returns the currently configured minimum implementation
456 * @return the StorelessUnivariateStatistic implementing the minimum
457 * @since 1.2
458 */
459 public StorelessUnivariateStatistic getMinImpl() {
460 return minImpl;
461 }
462
463 /**
464 * <p>
465 * Sets the implementation for the minimum.
466 * </p>
467 * <p>
468 * This method must be activated before any data has been added - i.e.,
469 * before {@link #addValue(double) addValue} has been used to add data;
470 * otherwise an IllegalStateException will be thrown.
471 * </p>
472 * @param minImpl the StorelessUnivariateStatistic instance to use for
473 * computing the minimum
474 * @throws IllegalStateException if data has already been added (i.e if n >
475 * 0)
476 * @since 1.2
477 */
478 public void setMinImpl(StorelessUnivariateStatistic minImpl) {
479 checkEmpty();
480 this.minImpl = minImpl;
481 }
482
483 /**
484 * Returns the currently configured maximum implementation
485 * @return the StorelessUnivariateStatistic implementing the maximum
486 * @since 1.2
487 */
488 public StorelessUnivariateStatistic getMaxImpl() {
489 return maxImpl;
490 }
491
492 /**
493 * <p>
494 * Sets the implementation for the maximum.
495 * </p>
496 * <p>
497 * This method must be activated before any data has been added - i.e.,
498 * before {@link #addValue(double) addValue} has been used to add data;
499 * otherwise an IllegalStateException will be thrown.
500 * </p>
501 * @param maxImpl the StorelessUnivariateStatistic instance to use for
502 * computing the maximum
503 * @throws IllegalStateException if data has already been added (i.e if n >
504 * 0)
505 * @since 1.2
506 */
507 public void setMaxImpl(StorelessUnivariateStatistic maxImpl) {
508 checkEmpty();
509 this.maxImpl = maxImpl;
510 }
511
512 /**
513 * Returns the currently configured sum of logs implementation
514 * @return the StorelessUnivariateStatistic implementing the log sum
515 * @since 1.2
516 */
517 public StorelessUnivariateStatistic getSumLogImpl() {
518 return sumLogImpl;
519 }
520
521 /**
522 * <p>
523 * Sets the implementation for the sum of logs.
524 * </p>
525 * <p>
526 * This method must be activated before any data has been added - i.e.,
527 * before {@link #addValue(double) addValue} has been used to add data;
528 * otherwise an IllegalStateException will be thrown.
529 * </p>
530 * @param sumLogImpl the StorelessUnivariateStatistic instance to use for
531 * computing the log sum
532 * @throws IllegalStateException if data has already been added (i.e if n >
533 * 0)
534 * @since 1.2
535 */
536 public void setSumLogImpl(StorelessUnivariateStatistic sumLogImpl) {
537 checkEmpty();
538 this.sumLogImpl = sumLogImpl;
539 geoMean.setSumLogImpl(sumLogImpl);
540 }
541
542 /**
543 * Returns the currently configured geometric mean implementation
544 * @return the StorelessUnivariateStatistic implementing the geometric mean
545 * @since 1.2
546 */
547 public StorelessUnivariateStatistic getGeoMeanImpl() {
548 return geoMeanImpl;
549 }
550
551 /**
552 * <p>
553 * Sets the implementation for the geometric mean.
554 * </p>
555 * <p>
556 * This method must be activated before any data has been added - i.e.,
557 * before {@link #addValue(double) addValue} has been used to add data;
558 * otherwise an IllegalStateException will be thrown.
559 * </p>
560 * @param geoMeanImpl the StorelessUnivariateStatistic instance to use for
561 * computing the geometric mean
562 * @throws IllegalStateException if data has already been added (i.e if n >
563 * 0)
564 * @since 1.2
565 */
566 public void setGeoMeanImpl(StorelessUnivariateStatistic geoMeanImpl) {
567 checkEmpty();
568 this.geoMeanImpl = geoMeanImpl;
569 }
570
571 /**
572 * Returns the currently configured mean implementation
573 * @return the StorelessUnivariateStatistic implementing the mean
574 * @since 1.2
575 */
576 public StorelessUnivariateStatistic getMeanImpl() {
577 return meanImpl;
578 }
579
580 /**
581 * <p>
582 * Sets the implementation for the mean.
583 * </p>
584 * <p>
585 * This method must be activated before any data has been added - i.e.,
586 * before {@link #addValue(double) addValue} has been used to add data;
587 * otherwise an IllegalStateException will be thrown.
588 * </p>
589 * @param meanImpl the StorelessUnivariateStatistic instance to use for
590 * computing the mean
591 * @throws IllegalStateException if data has already been added (i.e if n >
592 * 0)
593 * @since 1.2
594 */
595 public void setMeanImpl(StorelessUnivariateStatistic meanImpl) {
596 checkEmpty();
597 this.meanImpl = meanImpl;
598 }
599
600 /**
601 * Returns the currently configured variance implementation
602 * @return the StorelessUnivariateStatistic implementing the variance
603 * @since 1.2
604 */
605 public StorelessUnivariateStatistic getVarianceImpl() {
606 return varianceImpl;
607 }
608
609 /**
610 * <p>
611 * Sets the implementation for the variance.
612 * </p>
613 * <p>
614 * This method must be activated before any data has been added - i.e.,
615 * before {@link #addValue(double) addValue} has been used to add data;
616 * otherwise an IllegalStateException will be thrown.
617 * </p>
618 * @param varianceImpl the StorelessUnivariateStatistic instance to use for
619 * computing the variance
620 * @throws IllegalStateException if data has already been added (i.e if n >
621 * 0)
622 * @since 1.2
623 */
624 public void setVarianceImpl(StorelessUnivariateStatistic varianceImpl) {
625 checkEmpty();
626 this.varianceImpl = varianceImpl;
627 }
628
629 /**
630 * Throws IllegalStateException if n > 0.
631 */
632 private void checkEmpty() {
633 if (n > 0) {
634 throw MathRuntimeException.createIllegalStateException(
635 LocalizedFormats.VALUES_ADDED_BEFORE_CONFIGURING_STATISTIC,
636 n);
637 }
638 }
639
640 /**
641 * Returns a copy of this SummaryStatistics instance with the same internal state.
642 *
643 * @return a copy of this
644 */
645 public SummaryStatistics copy() {
646 SummaryStatistics result = new SummaryStatistics();
647 copy(this, result);
648 return result;
649 }
650
651 /**
652 * Copies source to dest.
653 * <p>Neither source nor dest can be null.</p>
654 *
655 * @param source SummaryStatistics to copy
656 * @param dest SummaryStatistics to copy to
657 * @throws NullArgumentException if either source or dest is null
658 */
659 public static void copy(SummaryStatistics source, SummaryStatistics dest)
660 throws NullArgumentException {
661 MathUtils.checkNotNull(source);
662 MathUtils.checkNotNull(dest);
663 dest.maxImpl = source.maxImpl.copy();
664 dest.meanImpl = source.meanImpl.copy();
665 dest.minImpl = source.minImpl.copy();
666 dest.sumImpl = source.sumImpl.copy();
667 dest.varianceImpl = source.varianceImpl.copy();
668 dest.sumLogImpl = source.sumLogImpl.copy();
669 dest.sumsqImpl = source.sumsqImpl.copy();
670 if (source.getGeoMeanImpl() instanceof GeometricMean) {
671 // Keep geoMeanImpl, sumLogImpl in synch
672 dest.geoMeanImpl = new GeometricMean((SumOfLogs) dest.sumLogImpl);
673 } else {
674 dest.geoMeanImpl = source.geoMeanImpl.copy();
675 }
676 SecondMoment.copy(source.secondMoment, dest.secondMoment);
677 dest.n = source.n;
678
679 // Make sure that if stat == statImpl in source, same
680 // holds in dest; otherwise copy stat
681 if (source.geoMean == source.geoMeanImpl) {
682 dest.geoMean = (GeometricMean) dest.geoMeanImpl;
683 } else {
684 GeometricMean.copy(source.geoMean, dest.geoMean);
685 }
686 if (source.max == source.maxImpl) {
687 dest.max = (Max) dest.maxImpl;
688 } else {
689 Max.copy(source.max, dest.max);
690 }
691 if (source.mean == source.meanImpl) {
692 dest.mean = (Mean) dest.meanImpl;
693 } else {
694 Mean.copy(source.mean, dest.mean);
695 }
696 if (source.min == source.minImpl) {
697 dest.min = (Min) dest.minImpl;
698 } else {
699 Min.copy(source.min, dest.min);
700 }
701 if (source.sum == source.sumImpl) {
702 dest.sum = (Sum) dest.sumImpl;
703 } else {
704 Sum.copy(source.sum, dest.sum);
705 }
706 if (source.variance == source.varianceImpl) {
707 dest.variance = (Variance) dest.varianceImpl;
708 } else {
709 Variance.copy(source.variance, dest.variance);
710 }
711 if (source.sumLog == source.sumLogImpl) {
712 dest.sumLog = (SumOfLogs) dest.sumLogImpl;
713 } else {
714 SumOfLogs.copy(source.sumLog, dest.sumLog);
715 }
716 if (source.sumsq == source.sumsqImpl) {
717 dest.sumsq = (SumOfSquares) dest.sumsqImpl;
718 } else {
719 SumOfSquares.copy(source.sumsq, dest.sumsq);
720 }
721 }
722 }