1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.math.stat.descriptive;
18
19 import java.io.Serializable;
20
21 import org.apache.commons.math.stat.descriptive.moment.GeometricMean;
22 import org.apache.commons.math.stat.descriptive.moment.Mean;
23 import org.apache.commons.math.stat.descriptive.moment.SecondMoment;
24 import org.apache.commons.math.stat.descriptive.moment.Variance;
25 import org.apache.commons.math.stat.descriptive.rank.Max;
26 import org.apache.commons.math.stat.descriptive.rank.Min;
27 import org.apache.commons.math.stat.descriptive.summary.Sum;
28 import org.apache.commons.math.stat.descriptive.summary.SumOfLogs;
29 import org.apache.commons.math.stat.descriptive.summary.SumOfSquares;
30 import org.apache.commons.math.util.MathUtils;
31
32 /**
33 * <p>
34 * Computes summary statistics for a stream of data values added using the
35 * {@link #addValue(double) addValue} method. The data values are not stored in
36 * memory, so this class can be used to compute statistics for very large data
37 * streams.
38 * </p>
39 * <p>
40 * The {@link StorelessUnivariateStatistic} instances used to maintain summary
41 * state and compute statistics are configurable via setters. For example, the
42 * default implementation for the variance can be overridden by calling
43 * {@link #setVarianceImpl(StorelessUnivariateStatistic)}. Actual parameters to
44 * these methods must implement the {@link StorelessUnivariateStatistic}
45 * interface and configuration must be completed before <code>addValue</code>
46 * is called. No configuration is necessary to use the default, commons-math
47 * provided implementations.
48 * </p>
49 * <p>
50 * Note: This class is not thread-safe. Use
51 * {@link SynchronizedSummaryStatistics} if concurrent access from multiple
52 * threads is required.
53 * </p>
54 * @version $Revision: 670469 $ $Date: 2008-06-23 10:01:38 +0200 (lun, 23 jun 2008) $
55 */
56 public class SummaryStatistics implements StatisticalSummary, Serializable {
57
58 /** Serialization UID */
59 private static final long serialVersionUID = -2021321786743555871L;
60
61 /**
62 * Construct a SummaryStatistics instance
63 */
64 public SummaryStatistics() {
65 }
66
67 /** count of values that have been added */
68 protected long n = 0;
69
70 /** SecondMoment is used to compute the mean and variance */
71 protected SecondMoment secondMoment = new SecondMoment();
72
73 /** sum of values that have been added */
74 protected Sum sum = new Sum();
75
76 /** sum of the square of each value that has been added */
77 protected SumOfSquares sumsq = new SumOfSquares();
78
79 /** min of values that have been added */
80 protected Min min = new Min();
81
82 /** max of values that have been added */
83 protected Max max = new Max();
84
85 /** sumLog of values that have been added */
86 protected SumOfLogs sumLog = new SumOfLogs();
87
88 /** geoMean of values that have been added */
89 protected GeometricMean geoMean = new GeometricMean(sumLog);
90
91 /** mean of values that have been added */
92 protected Mean mean = new Mean();
93
94 /** variance of values that have been added */
95 protected Variance variance = new Variance();
96
97 /** Sum statistic implementation - can be reset by setter. */
98 private StorelessUnivariateStatistic sumImpl = sum;
99
100 /** Sum of squares statistic implementation - can be reset by setter. */
101 private StorelessUnivariateStatistic sumsqImpl = sumsq;
102
103 /** Minimum statistic implementation - can be reset by setter. */
104 private StorelessUnivariateStatistic minImpl = min;
105
106 /** Maximum statistic implementation - can be reset by setter. */
107 private StorelessUnivariateStatistic maxImpl = max;
108
109 /** Sum of log statistic implementation - can be reset by setter. */
110 private StorelessUnivariateStatistic sumLogImpl = sumLog;
111
112 /** Geometric mean statistic implementation - can be reset by setter. */
113 private StorelessUnivariateStatistic geoMeanImpl = geoMean;
114
115 /** Mean statistic implementation - can be reset by setter. */
116 private StorelessUnivariateStatistic meanImpl = mean;
117
118 /** Variance statistic implementation - can be reset by setter. */
119 private StorelessUnivariateStatistic varianceImpl = variance;
120
121 /**
122 * Return a {@link StatisticalSummaryValues} instance reporting current
123 * statistics.
124 * @return Current values of statistics
125 */
126 public StatisticalSummary getSummary() {
127 return new StatisticalSummaryValues(getMean(), getVariance(), getN(), getMax(), getMin(), getSum());
128 }
129
130 /**
131 * Add a value to the data
132 * @param value the value to add
133 */
134 public void addValue(double value) {
135 sumImpl.increment(value);
136 sumsqImpl.increment(value);
137 minImpl.increment(value);
138 maxImpl.increment(value);
139 sumLogImpl.increment(value);
140 secondMoment.increment(value);
141 // If mean, variance or geomean have been overridden,
142 // need to increment these
143 if (!(meanImpl instanceof Mean)) {
144 meanImpl.increment(value);
145 }
146 if (!(varianceImpl instanceof Variance)) {
147 varianceImpl.increment(value);
148 }
149 if (!(geoMeanImpl instanceof GeometricMean)) {
150 geoMeanImpl.increment(value);
151 }
152 n++;
153 }
154
155 /**
156 * Returns the number of available values
157 * @return The number of available values
158 */
159 public long getN() {
160 return n;
161 }
162
163 /**
164 * Returns the sum of the values that have been added
165 * @return The sum or <code>Double.NaN</code> if no values have been added
166 */
167 public double getSum() {
168 return sumImpl.getResult();
169 }
170
171 /**
172 * Returns the sum of the squares of the values that have been added.
173 * <p>
174 * Double.NaN is returned if no values have been added.
175 * </p>
176 * @return The sum of squares
177 */
178 public double getSumsq() {
179 return sumsqImpl.getResult();
180 }
181
182 /**
183 * Returns the mean of the values that have been added.
184 * <p>
185 * Double.NaN is returned if no values have been added.
186 * </p>
187 * @return the mean
188 */
189 public double getMean() {
190 if (mean == meanImpl) {
191 return new Mean(secondMoment).getResult();
192 } else {
193 return meanImpl.getResult();
194 }
195 }
196
197 /**
198 * Returns the standard deviation of the values that have been added.
199 * <p>
200 * Double.NaN is returned if no values have been added.
201 * </p>
202 * @return the standard deviation
203 */
204 public double getStandardDeviation() {
205 double stdDev = Double.NaN;
206 if (getN() > 0) {
207 if (getN() > 1) {
208 stdDev = Math.sqrt(getVariance());
209 } else {
210 stdDev = 0.0;
211 }
212 }
213 return (stdDev);
214 }
215
216 /**
217 * Returns the variance of the values that have been added.
218 * <p>
219 * Double.NaN is returned if no values have been added.
220 * </p>
221 * @return the variance
222 */
223 public double getVariance() {
224 if (varianceImpl == variance) {
225 return new Variance(secondMoment).getResult();
226 } else {
227 return varianceImpl.getResult();
228 }
229 }
230
231 /**
232 * Returns the maximum of the values that have been added.
233 * <p>
234 * Double.NaN is returned if no values have been added.
235 * </p>
236 * @return the maximum
237 */
238 public double getMax() {
239 return maxImpl.getResult();
240 }
241
242 /**
243 * Returns the minimum of the values that have been added.
244 * <p>
245 * Double.NaN is returned if no values have been added.
246 * </p>
247 * @return the minimum
248 */
249 public double getMin() {
250 return minImpl.getResult();
251 }
252
253 /**
254 * Returns the geometric mean of the values that have been added.
255 * <p>
256 * Double.NaN is returned if no values have been added.
257 * </p>
258 * @return the geometric mean
259 */
260 public double getGeometricMean() {
261 return geoMeanImpl.getResult();
262 }
263
264 /**
265 * Returns the sum of the logs of the values that have been added.
266 * <p>
267 * Double.NaN is returned if no values have been added.
268 * </p>
269 * @return the sum of logs
270 * @since 1.2
271 */
272 public double getSumOfLogs() {
273 return sumLogImpl.getResult();
274 }
275
276 /**
277 * Generates a text report displaying summary statistics from values that
278 * have been added.
279 * @return String with line feeds displaying statistics
280 * @since 1.2
281 */
282 public String toString() {
283 StringBuffer outBuffer = new StringBuffer();
284 String endl = "\n";
285 outBuffer.append("SummaryStatistics:").append(endl);
286 outBuffer.append("n: ").append(getN()).append(endl);
287 outBuffer.append("min: ").append(getMin()).append(endl);
288 outBuffer.append("max: ").append(getMax()).append(endl);
289 outBuffer.append("mean: ").append(getMean()).append(endl);
290 outBuffer.append("geometric mean: ").append(getGeometricMean())
291 .append(endl);
292 outBuffer.append("variance: ").append(getVariance()).append(endl);
293 outBuffer.append("sum of squares: ").append(getSumsq()).append(endl);
294 outBuffer.append("standard deviation: ").append(getStandardDeviation())
295 .append(endl);
296 outBuffer.append("sum of logs: ").append(getSumOfLogs()).append(endl);
297 return outBuffer.toString();
298 }
299
300 /**
301 * Resets all statistics and storage
302 */
303 public void clear() {
304 this.n = 0;
305 minImpl.clear();
306 maxImpl.clear();
307 sumImpl.clear();
308 sumLogImpl.clear();
309 sumsqImpl.clear();
310 geoMeanImpl.clear();
311 secondMoment.clear();
312 if (meanImpl != mean) {
313 meanImpl.clear();
314 }
315 if (varianceImpl != variance) {
316 varianceImpl.clear();
317 }
318 }
319
320 /**
321 * Returns true iff <code>object</code> is a
322 * <code>SummaryStatistics</code> instance and all statistics have the
323 * same values as this.
324 * @param object the object to test equality against.
325 * @return true if object equals this
326 */
327 public boolean equals(Object object) {
328 if (object == this) {
329 return true;
330 }
331 if (object instanceof SummaryStatistics == false) {
332 return false;
333 }
334 SummaryStatistics stat = (SummaryStatistics)object;
335 return (MathUtils.equals(stat.getGeometricMean(), this.getGeometricMean()) &&
336 MathUtils.equals(stat.getMax(), this.getMax()) &&
337 MathUtils.equals(stat.getMean(), this.getMean()) &&
338 MathUtils.equals(stat.getMin(), this.getMin()) &&
339 MathUtils.equals(stat.getN(), this.getN()) &&
340 MathUtils.equals(stat.getSum(), this.getSum()) &&
341 MathUtils.equals(stat.getSumsq(), this.getSumsq()) &&
342 MathUtils.equals(stat.getVariance(),
343 this.getVariance()));
344 }
345
346 /**
347 * Returns hash code based on values of statistics
348 * @return hash code
349 */
350 public int hashCode() {
351 int result = 31 + MathUtils.hash(getGeometricMean());
352 result = result * 31 + MathUtils.hash(getGeometricMean());
353 result = result * 31 + MathUtils.hash(getMax());
354 result = result * 31 + MathUtils.hash(getMean());
355 result = result * 31 + MathUtils.hash(getMin());
356 result = result * 31 + MathUtils.hash(getN());
357 result = result * 31 + MathUtils.hash(getSum());
358 result = result * 31 + MathUtils.hash(getSumsq());
359 result = result * 31 + MathUtils.hash(getVariance());
360 return result;
361 }
362
363 // Getters and setters for statistics implementations
364 /**
365 * Returns the currently configured Sum implementation
366 * @return the StorelessUnivariateStatistic implementing the sum
367 * @since 1.2
368 */
369 public StorelessUnivariateStatistic getSumImpl() {
370 return sumImpl;
371 }
372
373 /**
374 * <p>
375 * Sets the implementation for the Sum.
376 * </p>
377 * <p>
378 * This method must be activated before any data has been added - i.e.,
379 * before {@link #addValue(double) addValue} has been used to add data;
380 * otherwise an IllegalStateException will be thrown.
381 * </p>
382 * @param sumImpl the StorelessUnivariateStatistic instance to use for
383 * computing the Sum
384 * @throws IllegalStateException if data has already been added (i.e if n >
385 * 0)
386 * @since 1.2
387 */
388 public void setSumImpl(StorelessUnivariateStatistic sumImpl) {
389 checkEmpty();
390 this.sumImpl = sumImpl;
391 }
392
393 /**
394 * Returns the currently configured sum of squares implementation
395 * @return the StorelessUnivariateStatistic implementing the sum of squares
396 * @since 1.2
397 */
398 public StorelessUnivariateStatistic getSumsqImpl() {
399 return sumsqImpl;
400 }
401
402 /**
403 * <p>
404 * Sets the implementation for the sum of squares.
405 * </p>
406 * <p>
407 * This method must be activated before any data has been added - i.e.,
408 * before {@link #addValue(double) addValue} has been used to add data;
409 * otherwise an IllegalStateException will be thrown.
410 * </p>
411 * @param sumsqImpl the StorelessUnivariateStatistic instance to use for
412 * computing the sum of squares
413 * @throws IllegalStateException if data has already been added (i.e if n >
414 * 0)
415 * @since 1.2
416 */
417 public void setSumsqImpl(StorelessUnivariateStatistic sumsqImpl) {
418 checkEmpty();
419 this.sumsqImpl = sumsqImpl;
420 }
421
422 /**
423 * Returns the currently configured minimum implementation
424 * @return the StorelessUnivariateStatistic implementing the minimum
425 * @since 1.2
426 */
427 public StorelessUnivariateStatistic getMinImpl() {
428 return minImpl;
429 }
430
431 /**
432 * <p>
433 * Sets the implementation for the minimum.
434 * </p>
435 * <p>
436 * This method must be activated before any data has been added - i.e.,
437 * before {@link #addValue(double) addValue} has been used to add data;
438 * otherwise an IllegalStateException will be thrown.
439 * </p>
440 * @param minImpl the StorelessUnivariateStatistic instance to use for
441 * computing the minimum
442 * @throws IllegalStateException if data has already been added (i.e if n >
443 * 0)
444 * @since 1.2
445 */
446 public void setMinImpl(StorelessUnivariateStatistic minImpl) {
447 checkEmpty();
448 this.minImpl = minImpl;
449 }
450
451 /**
452 * Returns the currently configured maximum implementation
453 * @return the StorelessUnivariateStatistic implementing the maximum
454 * @since 1.2
455 */
456 public StorelessUnivariateStatistic getMaxImpl() {
457 return maxImpl;
458 }
459
460 /**
461 * <p>
462 * Sets the implementation for the maximum.
463 * </p>
464 * <p>
465 * This method must be activated before any data has been added - i.e.,
466 * before {@link #addValue(double) addValue} has been used to add data;
467 * otherwise an IllegalStateException will be thrown.
468 * </p>
469 * @param maxImpl the StorelessUnivariateStatistic instance to use for
470 * computing the maximum
471 * @throws IllegalStateException if data has already been added (i.e if n >
472 * 0)
473 * @since 1.2
474 */
475 public void setMaxImpl(StorelessUnivariateStatistic maxImpl) {
476 checkEmpty();
477 this.maxImpl = maxImpl;
478 }
479
480 /**
481 * Returns the currently configured sum of logs implementation
482 * @return the StorelessUnivariateStatistic implementing the log sum
483 * @since 1.2
484 */
485 public StorelessUnivariateStatistic getSumLogImpl() {
486 return sumLogImpl;
487 }
488
489 /**
490 * <p>
491 * Sets the implementation for the sum of logs.
492 * </p>
493 * <p>
494 * This method must be activated before any data has been added - i.e.,
495 * before {@link #addValue(double) addValue} has been used to add data;
496 * otherwise an IllegalStateException will be thrown.
497 * </p>
498 * @param sumLogImpl the StorelessUnivariateStatistic instance to use for
499 * computing the log sum
500 * @throws IllegalStateException if data has already been added (i.e if n >
501 * 0)
502 * @since 1.2
503 */
504 public void setSumLogImpl(StorelessUnivariateStatistic sumLogImpl) {
505 checkEmpty();
506 this.sumLogImpl = sumLogImpl;
507 geoMean.setSumLogImpl(sumLogImpl);
508 }
509
510 /**
511 * Returns the currently configured geometric mean implementation
512 * @return the StorelessUnivariateStatistic implementing the geometric mean
513 * @since 1.2
514 */
515 public StorelessUnivariateStatistic getGeoMeanImpl() {
516 return geoMeanImpl;
517 }
518
519 /**
520 * <p>
521 * Sets the implementation for the geometric mean.
522 * </p>
523 * <p>
524 * This method must be activated before any data has been added - i.e.,
525 * before {@link #addValue(double) addValue} has been used to add data;
526 * otherwise an IllegalStateException will be thrown.
527 * </p>
528 * @param geoMeanImpl the StorelessUnivariateStatistic instance to use for
529 * computing the geometric mean
530 * @throws IllegalStateException if data has already been added (i.e if n >
531 * 0)
532 * @since 1.2
533 */
534 public void setGeoMeanImpl(StorelessUnivariateStatistic geoMeanImpl) {
535 checkEmpty();
536 this.geoMeanImpl = geoMeanImpl;
537 }
538
539 /**
540 * Returns the currently configured mean implementation
541 * @return the StorelessUnivariateStatistic implementing the mean
542 * @since 1.2
543 */
544 public StorelessUnivariateStatistic getMeanImpl() {
545 return meanImpl;
546 }
547
548 /**
549 * <p>
550 * Sets the implementation for the mean.
551 * </p>
552 * <p>
553 * This method must be activated before any data has been added - i.e.,
554 * before {@link #addValue(double) addValue} has been used to add data;
555 * otherwise an IllegalStateException will be thrown.
556 * </p>
557 * @param meanImpl the StorelessUnivariateStatistic instance to use for
558 * computing the mean
559 * @throws IllegalStateException if data has already been added (i.e if n >
560 * 0)
561 * @since 1.2
562 */
563 public void setMeanImpl(StorelessUnivariateStatistic meanImpl) {
564 checkEmpty();
565 this.meanImpl = meanImpl;
566 }
567
568 /**
569 * Returns the currently configured variance implementation
570 * @return the StorelessUnivariateStatistic implementing the variance
571 * @since 1.2
572 */
573 public StorelessUnivariateStatistic getVarianceImpl() {
574 return varianceImpl;
575 }
576
577 /**
578 * <p>
579 * Sets the implementation for the variance.
580 * </p>
581 * <p>
582 * This method must be activated before any data has been added - i.e.,
583 * before {@link #addValue(double) addValue} has been used to add data;
584 * otherwise an IllegalStateException will be thrown.
585 * </p>
586 * @param varianceImpl the StorelessUnivariateStatistic instance to use for
587 * computing the variance
588 * @throws IllegalStateException if data has already been added (i.e if n >
589 * 0)
590 * @since 1.2
591 */
592 public void setVarianceImpl(StorelessUnivariateStatistic varianceImpl) {
593 checkEmpty();
594 this.varianceImpl = varianceImpl;
595 }
596
597 /**
598 * Throws IllegalStateException if n > 0.
599 */
600 private void checkEmpty() {
601 if (n > 0) {
602 throw new IllegalStateException("Implementations must be configured before values are added.");
603 }
604 }
605
606 }