View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.math.stat.descriptive;
18  
19  import java.io.Serializable;
20  
21  import org.apache.commons.math.stat.descriptive.moment.GeometricMean;
22  import org.apache.commons.math.stat.descriptive.moment.Mean;
23  import org.apache.commons.math.stat.descriptive.moment.SecondMoment;
24  import org.apache.commons.math.stat.descriptive.moment.Variance;
25  import org.apache.commons.math.stat.descriptive.rank.Max;
26  import org.apache.commons.math.stat.descriptive.rank.Min;
27  import org.apache.commons.math.stat.descriptive.summary.Sum;
28  import org.apache.commons.math.stat.descriptive.summary.SumOfLogs;
29  import org.apache.commons.math.stat.descriptive.summary.SumOfSquares;
30  import org.apache.commons.math.util.MathUtils;
31  
32  /**
33   * <p>
34   * Computes summary statistics for a stream of data values added using the
35   * {@link #addValue(double) addValue} method. The data values are not stored in
36   * memory, so this class can be used to compute statistics for very large data
37   * streams.
38   * </p>
39   * <p>
40   * The {@link StorelessUnivariateStatistic} instances used to maintain summary
41   * state and compute statistics are configurable via setters. For example, the
42   * default implementation for the variance can be overridden by calling
43   * {@link #setVarianceImpl(StorelessUnivariateStatistic)}. Actual parameters to
44   * these methods must implement the {@link StorelessUnivariateStatistic}
45   * interface and configuration must be completed before <code>addValue</code>
46   * is called. No configuration is necessary to use the default, commons-math
47   * provided implementations.
48   * </p>
49   * <p>
50   * Note: This class is not thread-safe. Use
51   * {@link SynchronizedSummaryStatistics} if concurrent access from multiple
52   * threads is required.
53   * </p>
54   * @version $Revision: 670469 $ $Date: 2008-06-23 10:01:38 +0200 (lun, 23 jun 2008) $
55   */
56  public class SummaryStatistics implements StatisticalSummary, Serializable {
57  
58      /** Serialization UID */
59      private static final long serialVersionUID = -2021321786743555871L;
60  
61      /**
62       * Construct a SummaryStatistics instance
63       */
64      public SummaryStatistics() {
65      }
66  
67      /** count of values that have been added */
68      protected long n = 0;
69  
70      /** SecondMoment is used to compute the mean and variance */
71      protected SecondMoment secondMoment = new SecondMoment();
72  
73      /** sum of values that have been added */
74      protected Sum sum = new Sum();
75  
76      /** sum of the square of each value that has been added */
77      protected SumOfSquares sumsq = new SumOfSquares();
78  
79      /** min of values that have been added */
80      protected Min min = new Min();
81  
82      /** max of values that have been added */
83      protected Max max = new Max();
84  
85      /** sumLog of values that have been added */
86      protected SumOfLogs sumLog = new SumOfLogs();
87  
88      /** geoMean of values that have been added */
89      protected GeometricMean geoMean = new GeometricMean(sumLog);
90  
91      /** mean of values that have been added */
92      protected Mean mean = new Mean();
93  
94      /** variance of values that have been added */
95      protected Variance variance = new Variance();
96  
97      /** Sum statistic implementation - can be reset by setter. */
98      private StorelessUnivariateStatistic sumImpl = sum;
99  
100     /** Sum of squares statistic implementation - can be reset by setter. */
101     private StorelessUnivariateStatistic sumsqImpl = sumsq;
102 
103     /** Minimum statistic implementation - can be reset by setter. */
104     private StorelessUnivariateStatistic minImpl = min;
105 
106     /** Maximum statistic implementation - can be reset by setter. */
107     private StorelessUnivariateStatistic maxImpl = max;
108 
109     /** Sum of log statistic implementation - can be reset by setter. */
110     private StorelessUnivariateStatistic sumLogImpl = sumLog;
111 
112     /** Geometric mean statistic implementation - can be reset by setter. */
113     private StorelessUnivariateStatistic geoMeanImpl = geoMean;
114 
115     /** Mean statistic implementation - can be reset by setter. */
116     private StorelessUnivariateStatistic meanImpl = mean;
117 
118     /** Variance statistic implementation - can be reset by setter. */
119     private StorelessUnivariateStatistic varianceImpl = variance;
120 
121     /**
122      * Return a {@link StatisticalSummaryValues} instance reporting current
123      * statistics.
124      * @return Current values of statistics
125      */
126     public StatisticalSummary getSummary() {
127         return new StatisticalSummaryValues(getMean(), getVariance(), getN(), getMax(), getMin(), getSum());
128     }
129 
130     /**
131      * Add a value to the data
132      * @param value the value to add
133      */
134     public void addValue(double value) {
135         sumImpl.increment(value);
136         sumsqImpl.increment(value);
137         minImpl.increment(value);
138         maxImpl.increment(value);
139         sumLogImpl.increment(value);
140         secondMoment.increment(value);
141         // If mean, variance or geomean have been overridden,
142         // need to increment these
143         if (!(meanImpl instanceof Mean)) {
144             meanImpl.increment(value);
145         }
146         if (!(varianceImpl instanceof Variance)) {
147             varianceImpl.increment(value);
148         }
149         if (!(geoMeanImpl instanceof GeometricMean)) {
150             geoMeanImpl.increment(value);
151         }
152         n++;
153     }
154 
155     /**
156      * Returns the number of available values
157      * @return The number of available values
158      */
159     public long getN() {
160         return n;
161     }
162 
163     /**
164      * Returns the sum of the values that have been added
165      * @return The sum or <code>Double.NaN</code> if no values have been added
166      */
167     public double getSum() {
168         return sumImpl.getResult();
169     }
170 
171     /**
172      * Returns the sum of the squares of the values that have been added.
173      * <p>
174      * Double.NaN is returned if no values have been added.
175      * </p>
176      * @return The sum of squares
177      */
178     public double getSumsq() {
179         return sumsqImpl.getResult();
180     }
181 
182     /**
183      * Returns the mean of the values that have been added.
184      * <p>
185      * Double.NaN is returned if no values have been added.
186      * </p>
187      * @return the mean
188      */
189     public double getMean() {
190         if (mean == meanImpl) {
191             return new Mean(secondMoment).getResult();
192         } else {
193             return meanImpl.getResult();
194         }
195     }
196 
197     /**
198      * Returns the standard deviation of the values that have been added.
199      * <p>
200      * Double.NaN is returned if no values have been added.
201      * </p>
202      * @return the standard deviation
203      */
204     public double getStandardDeviation() {
205         double stdDev = Double.NaN;
206         if (getN() > 0) {
207             if (getN() > 1) {
208                 stdDev = Math.sqrt(getVariance());
209             } else {
210                 stdDev = 0.0;
211             }
212         }
213         return (stdDev);
214     }
215 
216     /**
217      * Returns the variance of the values that have been added.
218      * <p>
219      * Double.NaN is returned if no values have been added.
220      * </p>
221      * @return the variance
222      */
223     public double getVariance() {
224         if (varianceImpl == variance) {
225             return new Variance(secondMoment).getResult();
226         } else {
227             return varianceImpl.getResult();
228         }
229     }
230 
231     /**
232      * Returns the maximum of the values that have been added.
233      * <p>
234      * Double.NaN is returned if no values have been added.
235      * </p>
236      * @return the maximum
237      */
238     public double getMax() {
239         return maxImpl.getResult();
240     }
241 
242     /**
243      * Returns the minimum of the values that have been added.
244      * <p>
245      * Double.NaN is returned if no values have been added.
246      * </p>
247      * @return the minimum
248      */
249     public double getMin() {
250         return minImpl.getResult();
251     }
252 
253     /**
254      * Returns the geometric mean of the values that have been added.
255      * <p>
256      * Double.NaN is returned if no values have been added.
257      * </p>
258      * @return the geometric mean
259      */
260     public double getGeometricMean() {
261         return geoMeanImpl.getResult();
262     }
263 
264     /**
265      * Returns the sum of the logs of the values that have been added.
266      * <p>
267      * Double.NaN is returned if no values have been added.
268      * </p>
269      * @return the sum of logs
270      * @since 1.2
271      */
272     public double getSumOfLogs() {
273         return sumLogImpl.getResult();
274     }
275 
276     /**
277      * Generates a text report displaying summary statistics from values that
278      * have been added.
279      * @return String with line feeds displaying statistics
280      * @since 1.2
281      */
282     public String toString() {
283         StringBuffer outBuffer = new StringBuffer();
284         String endl = "\n";
285         outBuffer.append("SummaryStatistics:").append(endl);
286         outBuffer.append("n: ").append(getN()).append(endl);
287         outBuffer.append("min: ").append(getMin()).append(endl);
288         outBuffer.append("max: ").append(getMax()).append(endl);
289         outBuffer.append("mean: ").append(getMean()).append(endl);
290         outBuffer.append("geometric mean: ").append(getGeometricMean())
291             .append(endl);
292         outBuffer.append("variance: ").append(getVariance()).append(endl);
293         outBuffer.append("sum of squares: ").append(getSumsq()).append(endl);
294         outBuffer.append("standard deviation: ").append(getStandardDeviation())
295             .append(endl);
296         outBuffer.append("sum of logs: ").append(getSumOfLogs()).append(endl);
297         return outBuffer.toString();
298     }
299 
300     /**
301      * Resets all statistics and storage
302      */
303     public void clear() {
304         this.n = 0;
305         minImpl.clear();
306         maxImpl.clear();
307         sumImpl.clear();
308         sumLogImpl.clear();
309         sumsqImpl.clear();
310         geoMeanImpl.clear();
311         secondMoment.clear();
312         if (meanImpl != mean) {
313             meanImpl.clear();
314         }
315         if (varianceImpl != variance) {
316             varianceImpl.clear();
317         }
318     }
319 
320     /**
321      * Returns true iff <code>object</code> is a
322      * <code>SummaryStatistics</code> instance and all statistics have the
323      * same values as this.
324      * @param object the object to test equality against.
325      * @return true if object equals this
326      */
327     public boolean equals(Object object) {
328         if (object == this) {
329             return true;
330         }
331         if (object instanceof SummaryStatistics == false) {
332             return false;
333         }
334         SummaryStatistics stat = (SummaryStatistics)object;
335         return (MathUtils.equals(stat.getGeometricMean(), this.getGeometricMean()) &&
336                 MathUtils.equals(stat.getMax(), this.getMax()) &&
337                 MathUtils.equals(stat.getMean(), this.getMean()) &&
338                 MathUtils.equals(stat.getMin(), this.getMin()) &&
339                 MathUtils.equals(stat.getN(), this.getN()) &&
340                 MathUtils.equals(stat.getSum(), this.getSum()) &&
341                 MathUtils.equals(stat.getSumsq(), this.getSumsq()) &&
342                 MathUtils.equals(stat.getVariance(),
343             this.getVariance()));
344     }
345 
346     /**
347      * Returns hash code based on values of statistics
348      * @return hash code
349      */
350     public int hashCode() {
351         int result = 31 + MathUtils.hash(getGeometricMean());
352         result = result * 31 + MathUtils.hash(getGeometricMean());
353         result = result * 31 + MathUtils.hash(getMax());
354         result = result * 31 + MathUtils.hash(getMean());
355         result = result * 31 + MathUtils.hash(getMin());
356         result = result * 31 + MathUtils.hash(getN());
357         result = result * 31 + MathUtils.hash(getSum());
358         result = result * 31 + MathUtils.hash(getSumsq());
359         result = result * 31 + MathUtils.hash(getVariance());
360         return result;
361     }
362 
363     // Getters and setters for statistics implementations
364     /**
365      * Returns the currently configured Sum implementation
366      * @return the StorelessUnivariateStatistic implementing the sum
367      * @since 1.2
368      */
369     public StorelessUnivariateStatistic getSumImpl() {
370         return sumImpl;
371     }
372 
373     /**
374      * <p>
375      * Sets the implementation for the Sum.
376      * </p>
377      * <p>
378      * This method must be activated before any data has been added - i.e.,
379      * before {@link #addValue(double) addValue} has been used to add data;
380      * otherwise an IllegalStateException will be thrown.
381      * </p>
382      * @param sumImpl the StorelessUnivariateStatistic instance to use for
383      *        computing the Sum
384      * @throws IllegalStateException if data has already been added (i.e if n >
385      *         0)
386      * @since 1.2
387      */
388     public void setSumImpl(StorelessUnivariateStatistic sumImpl) {
389         checkEmpty();
390         this.sumImpl = sumImpl;
391     }
392 
393     /**
394      * Returns the currently configured sum of squares implementation
395      * @return the StorelessUnivariateStatistic implementing the sum of squares
396      * @since 1.2
397      */
398     public StorelessUnivariateStatistic getSumsqImpl() {
399         return sumsqImpl;
400     }
401 
402     /**
403      * <p>
404      * Sets the implementation for the sum of squares.
405      * </p>
406      * <p>
407      * This method must be activated before any data has been added - i.e.,
408      * before {@link #addValue(double) addValue} has been used to add data;
409      * otherwise an IllegalStateException will be thrown.
410      * </p>
411      * @param sumsqImpl the StorelessUnivariateStatistic instance to use for
412      *        computing the sum of squares
413      * @throws IllegalStateException if data has already been added (i.e if n >
414      *         0)
415      * @since 1.2
416      */
417     public void setSumsqImpl(StorelessUnivariateStatistic sumsqImpl) {
418         checkEmpty();
419         this.sumsqImpl = sumsqImpl;
420     }
421 
422     /**
423      * Returns the currently configured minimum implementation
424      * @return the StorelessUnivariateStatistic implementing the minimum
425      * @since 1.2
426      */
427     public StorelessUnivariateStatistic getMinImpl() {
428         return minImpl;
429     }
430 
431     /**
432      * <p>
433      * Sets the implementation for the minimum.
434      * </p>
435      * <p>
436      * This method must be activated before any data has been added - i.e.,
437      * before {@link #addValue(double) addValue} has been used to add data;
438      * otherwise an IllegalStateException will be thrown.
439      * </p>
440      * @param minImpl the StorelessUnivariateStatistic instance to use for
441      *        computing the minimum
442      * @throws IllegalStateException if data has already been added (i.e if n >
443      *         0)
444      * @since 1.2
445      */
446     public void setMinImpl(StorelessUnivariateStatistic minImpl) {
447         checkEmpty();
448         this.minImpl = minImpl;
449     }
450 
451     /**
452      * Returns the currently configured maximum implementation
453      * @return the StorelessUnivariateStatistic implementing the maximum
454      * @since 1.2
455      */
456     public StorelessUnivariateStatistic getMaxImpl() {
457         return maxImpl;
458     }
459 
460     /**
461      * <p>
462      * Sets the implementation for the maximum.
463      * </p>
464      * <p>
465      * This method must be activated before any data has been added - i.e.,
466      * before {@link #addValue(double) addValue} has been used to add data;
467      * otherwise an IllegalStateException will be thrown.
468      * </p>
469      * @param maxImpl the StorelessUnivariateStatistic instance to use for
470      *        computing the maximum
471      * @throws IllegalStateException if data has already been added (i.e if n >
472      *         0)
473      * @since 1.2
474      */
475     public void setMaxImpl(StorelessUnivariateStatistic maxImpl) {
476         checkEmpty();
477         this.maxImpl = maxImpl;
478     }
479 
480     /**
481      * Returns the currently configured sum of logs implementation
482      * @return the StorelessUnivariateStatistic implementing the log sum
483      * @since 1.2
484      */
485     public StorelessUnivariateStatistic getSumLogImpl() {
486         return sumLogImpl;
487     }
488 
489     /**
490      * <p>
491      * Sets the implementation for the sum of logs.
492      * </p>
493      * <p>
494      * This method must be activated before any data has been added - i.e.,
495      * before {@link #addValue(double) addValue} has been used to add data;
496      * otherwise an IllegalStateException will be thrown.
497      * </p>
498      * @param sumLogImpl the StorelessUnivariateStatistic instance to use for
499      *        computing the log sum
500      * @throws IllegalStateException if data has already been added (i.e if n >
501      *         0)
502      * @since 1.2
503      */
504     public void setSumLogImpl(StorelessUnivariateStatistic sumLogImpl) {
505         checkEmpty();
506         this.sumLogImpl = sumLogImpl;
507         geoMean.setSumLogImpl(sumLogImpl);
508     }
509 
510     /**
511      * Returns the currently configured geometric mean implementation
512      * @return the StorelessUnivariateStatistic implementing the geometric mean
513      * @since 1.2
514      */
515     public StorelessUnivariateStatistic getGeoMeanImpl() {
516         return geoMeanImpl;
517     }
518 
519     /**
520      * <p>
521      * Sets the implementation for the geometric mean.
522      * </p>
523      * <p>
524      * This method must be activated before any data has been added - i.e.,
525      * before {@link #addValue(double) addValue} has been used to add data;
526      * otherwise an IllegalStateException will be thrown.
527      * </p>
528      * @param geoMeanImpl the StorelessUnivariateStatistic instance to use for
529      *        computing the geometric mean
530      * @throws IllegalStateException if data has already been added (i.e if n >
531      *         0)
532      * @since 1.2
533      */
534     public void setGeoMeanImpl(StorelessUnivariateStatistic geoMeanImpl) {
535         checkEmpty();
536         this.geoMeanImpl = geoMeanImpl;
537     }
538 
539     /**
540      * Returns the currently configured mean implementation
541      * @return the StorelessUnivariateStatistic implementing the mean
542      * @since 1.2
543      */
544     public StorelessUnivariateStatistic getMeanImpl() {
545         return meanImpl;
546     }
547 
548     /**
549      * <p>
550      * Sets the implementation for the mean.
551      * </p>
552      * <p>
553      * This method must be activated before any data has been added - i.e.,
554      * before {@link #addValue(double) addValue} has been used to add data;
555      * otherwise an IllegalStateException will be thrown.
556      * </p>
557      * @param meanImpl the StorelessUnivariateStatistic instance to use for
558      *        computing the mean
559      * @throws IllegalStateException if data has already been added (i.e if n >
560      *         0)
561      * @since 1.2
562      */
563     public void setMeanImpl(StorelessUnivariateStatistic meanImpl) {
564         checkEmpty();
565         this.meanImpl = meanImpl;
566     }
567 
568     /**
569      * Returns the currently configured variance implementation
570      * @return the StorelessUnivariateStatistic implementing the variance
571      * @since 1.2
572      */
573     public StorelessUnivariateStatistic getVarianceImpl() {
574         return varianceImpl;
575     }
576 
577     /**
578      * <p>
579      * Sets the implementation for the variance.
580      * </p>
581      * <p>
582      * This method must be activated before any data has been added - i.e.,
583      * before {@link #addValue(double) addValue} has been used to add data;
584      * otherwise an IllegalStateException will be thrown.
585      * </p>
586      * @param varianceImpl the StorelessUnivariateStatistic instance to use for
587      *        computing the variance
588      * @throws IllegalStateException if data has already been added (i.e if n >
589      *         0)
590      * @since 1.2
591      */
592     public void setVarianceImpl(StorelessUnivariateStatistic varianceImpl) {
593         checkEmpty();
594         this.varianceImpl = varianceImpl;
595     }
596 
597     /**
598      * Throws IllegalStateException if n > 0.
599      */
600     private void checkEmpty() {
601         if (n > 0) {
602             throw new IllegalStateException("Implementations must be configured before values are added.");
603         }
604     }
605 
606 }