001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.commons.math.stat.descriptive;
018    
019    import java.io.Serializable;
020    
021    import org.apache.commons.math.MathRuntimeException;
022    import org.apache.commons.math.exception.NullArgumentException;
023    import org.apache.commons.math.exception.util.LocalizedFormats;
024    import org.apache.commons.math.stat.descriptive.moment.GeometricMean;
025    import org.apache.commons.math.stat.descriptive.moment.Mean;
026    import org.apache.commons.math.stat.descriptive.moment.SecondMoment;
027    import org.apache.commons.math.stat.descriptive.moment.Variance;
028    import org.apache.commons.math.stat.descriptive.rank.Max;
029    import org.apache.commons.math.stat.descriptive.rank.Min;
030    import org.apache.commons.math.stat.descriptive.summary.Sum;
031    import org.apache.commons.math.stat.descriptive.summary.SumOfLogs;
032    import org.apache.commons.math.stat.descriptive.summary.SumOfSquares;
033    import org.apache.commons.math.util.MathUtils;
034    import org.apache.commons.math.util.Precision;
035    import org.apache.commons.math.util.FastMath;
036    
037    /**
038     * <p>
039     * Computes summary statistics for a stream of data values added using the
040     * {@link #addValue(double) addValue} method. The data values are not stored in
041     * memory, so this class can be used to compute statistics for very large data
042     * streams.
043     * </p>
044     * <p>
045     * The {@link StorelessUnivariateStatistic} instances used to maintain summary
046     * state and compute statistics are configurable via setters. For example, the
047     * default implementation for the variance can be overridden by calling
048     * {@link #setVarianceImpl(StorelessUnivariateStatistic)}. Actual parameters to
049     * these methods must implement the {@link StorelessUnivariateStatistic}
050     * interface and configuration must be completed before <code>addValue</code>
051     * is called. No configuration is necessary to use the default, commons-math
052     * provided implementations.
053     * </p>
054     * <p>
055     * Note: This class is not thread-safe. Use
056     * {@link SynchronizedSummaryStatistics} if concurrent access from multiple
057     * threads is required.
058     * </p>
059     * @version $Id: SummaryStatistics.java 1181282 2011-10-10 22:35:54Z erans $
060     */
061    public class SummaryStatistics implements StatisticalSummary, Serializable {
062    
063        /** Serialization UID */
064        private static final long serialVersionUID = -2021321786743555871L;
065    
066        /** count of values that have been added */
067        protected long n = 0;
068    
069        /** SecondMoment is used to compute the mean and variance */
070        protected SecondMoment secondMoment = new SecondMoment();
071    
072        /** sum of values that have been added */
073        protected Sum sum = new Sum();
074    
075        /** sum of the square of each value that has been added */
076        protected SumOfSquares sumsq = new SumOfSquares();
077    
078        /** min of values that have been added */
079        protected Min min = new Min();
080    
081        /** max of values that have been added */
082        protected Max max = new Max();
083    
084        /** sumLog of values that have been added */
085        protected SumOfLogs sumLog = new SumOfLogs();
086    
087        /** geoMean of values that have been added */
088        protected GeometricMean geoMean = new GeometricMean(sumLog);
089    
090        /** mean of values that have been added */
091        protected Mean mean = new Mean();
092    
093        /** variance of values that have been added */
094        protected Variance variance = new Variance();
095    
096        /** Sum statistic implementation - can be reset by setter. */
097        private StorelessUnivariateStatistic sumImpl = sum;
098    
099        /** Sum of squares statistic implementation - can be reset by setter. */
100        private StorelessUnivariateStatistic sumsqImpl = sumsq;
101    
102        /** Minimum statistic implementation - can be reset by setter. */
103        private StorelessUnivariateStatistic minImpl = min;
104    
105        /** Maximum statistic implementation - can be reset by setter. */
106        private StorelessUnivariateStatistic maxImpl = max;
107    
108        /** Sum of log statistic implementation - can be reset by setter. */
109        private StorelessUnivariateStatistic sumLogImpl = sumLog;
110    
111        /** Geometric mean statistic implementation - can be reset by setter. */
112        private StorelessUnivariateStatistic geoMeanImpl = geoMean;
113    
114        /** Mean statistic implementation - can be reset by setter. */
115        private StorelessUnivariateStatistic meanImpl = mean;
116    
117        /** Variance statistic implementation - can be reset by setter. */
118        private StorelessUnivariateStatistic varianceImpl = variance;
119    
120        /**
121         * Construct a SummaryStatistics instance
122         */
123        public SummaryStatistics() {
124        }
125    
126        /**
127         * A copy constructor. Creates a deep-copy of the {@code original}.
128         *
129         * @param original the {@code SummaryStatistics} instance to copy
130         */
131        public SummaryStatistics(SummaryStatistics original) {
132            copy(original, this);
133        }
134    
135        /**
136         * Return a {@link StatisticalSummaryValues} instance reporting current
137         * statistics.
138         * @return Current values of statistics
139         */
140        public StatisticalSummary getSummary() {
141            return new StatisticalSummaryValues(getMean(), getVariance(), getN(),
142                    getMax(), getMin(), getSum());
143        }
144    
145        /**
146         * Add a value to the data
147         * @param value the value to add
148         */
149        public void addValue(double value) {
150            sumImpl.increment(value);
151            sumsqImpl.increment(value);
152            minImpl.increment(value);
153            maxImpl.increment(value);
154            sumLogImpl.increment(value);
155            secondMoment.increment(value);
156            // If mean, variance or geomean have been overridden,
157            // need to increment these
158            if (!(meanImpl instanceof Mean)) {
159                meanImpl.increment(value);
160            }
161            if (!(varianceImpl instanceof Variance)) {
162                varianceImpl.increment(value);
163            }
164            if (!(geoMeanImpl instanceof GeometricMean)) {
165                geoMeanImpl.increment(value);
166            }
167            n++;
168        }
169    
170        /**
171         * Returns the number of available values
172         * @return The number of available values
173         */
174        public long getN() {
175            return n;
176        }
177    
178        /**
179         * Returns the sum of the values that have been added
180         * @return The sum or <code>Double.NaN</code> if no values have been added
181         */
182        public double getSum() {
183            return sumImpl.getResult();
184        }
185    
186        /**
187         * Returns the sum of the squares of the values that have been added.
188         * <p>
189         * Double.NaN is returned if no values have been added.
190         * </p>
191         * @return The sum of squares
192         */
193        public double getSumsq() {
194            return sumsqImpl.getResult();
195        }
196    
197        /**
198         * Returns the mean of the values that have been added.
199         * <p>
200         * Double.NaN is returned if no values have been added.
201         * </p>
202         * @return the mean
203         */
204        public double getMean() {
205            if (mean == meanImpl) {
206                return new Mean(secondMoment).getResult();
207            } else {
208                return meanImpl.getResult();
209            }
210        }
211    
212        /**
213         * Returns the standard deviation of the values that have been added.
214         * <p>
215         * Double.NaN is returned if no values have been added.
216         * </p>
217         * @return the standard deviation
218         */
219        public double getStandardDeviation() {
220            double stdDev = Double.NaN;
221            if (getN() > 0) {
222                if (getN() > 1) {
223                    stdDev = FastMath.sqrt(getVariance());
224                } else {
225                    stdDev = 0.0;
226                }
227            }
228            return stdDev;
229        }
230    
231        /**
232         * Returns the variance of the values that have been added.
233         * <p>
234         * Double.NaN is returned if no values have been added.
235         * </p>
236         * @return the variance
237         */
238        public double getVariance() {
239            if (varianceImpl == variance) {
240                return new Variance(secondMoment).getResult();
241            } else {
242                return varianceImpl.getResult();
243            }
244        }
245    
246        /**
247         * Returns the maximum of the values that have been added.
248         * <p>
249         * Double.NaN is returned if no values have been added.
250         * </p>
251         * @return the maximum
252         */
253        public double getMax() {
254            return maxImpl.getResult();
255        }
256    
257        /**
258         * Returns the minimum of the values that have been added.
259         * <p>
260         * Double.NaN is returned if no values have been added.
261         * </p>
262         * @return the minimum
263         */
264        public double getMin() {
265            return minImpl.getResult();
266        }
267    
268        /**
269         * Returns the geometric mean of the values that have been added.
270         * <p>
271         * Double.NaN is returned if no values have been added.
272         * </p>
273         * @return the geometric mean
274         */
275        public double getGeometricMean() {
276            return geoMeanImpl.getResult();
277        }
278    
279        /**
280         * Returns the sum of the logs of the values that have been added.
281         * <p>
282         * Double.NaN is returned if no values have been added.
283         * </p>
284         * @return the sum of logs
285         * @since 1.2
286         */
287        public double getSumOfLogs() {
288            return sumLogImpl.getResult();
289        }
290    
291        /**
292         * Returns a statistic related to the Second Central Moment.  Specifically,
293         * what is returned is the sum of squared deviations from the sample mean
294         * among the values that have been added.
295         * <p>
296         * Returns <code>Double.NaN</code> if no data values have been added and
297         * returns <code>0</code> if there is just one value in the data set.</p>
298         * <p>
299         * @return second central moment statistic
300         * @since 2.0
301         */
302        public double getSecondMoment() {
303            return secondMoment.getResult();
304        }
305    
306        /**
307         * Generates a text report displaying summary statistics from values that
308         * have been added.
309         * @return String with line feeds displaying statistics
310         * @since 1.2
311         */
312        @Override
313        public String toString() {
314            StringBuilder outBuffer = new StringBuilder();
315            String endl = "\n";
316            outBuffer.append("SummaryStatistics:").append(endl);
317            outBuffer.append("n: ").append(getN()).append(endl);
318            outBuffer.append("min: ").append(getMin()).append(endl);
319            outBuffer.append("max: ").append(getMax()).append(endl);
320            outBuffer.append("mean: ").append(getMean()).append(endl);
321            outBuffer.append("geometric mean: ").append(getGeometricMean())
322                .append(endl);
323            outBuffer.append("variance: ").append(getVariance()).append(endl);
324            outBuffer.append("sum of squares: ").append(getSumsq()).append(endl);
325            outBuffer.append("standard deviation: ").append(getStandardDeviation())
326                .append(endl);
327            outBuffer.append("sum of logs: ").append(getSumOfLogs()).append(endl);
328            return outBuffer.toString();
329        }
330    
331        /**
332         * Resets all statistics and storage
333         */
334        public void clear() {
335            this.n = 0;
336            minImpl.clear();
337            maxImpl.clear();
338            sumImpl.clear();
339            sumLogImpl.clear();
340            sumsqImpl.clear();
341            geoMeanImpl.clear();
342            secondMoment.clear();
343            if (meanImpl != mean) {
344                meanImpl.clear();
345            }
346            if (varianceImpl != variance) {
347                varianceImpl.clear();
348            }
349        }
350    
351        /**
352         * Returns true iff <code>object</code> is a
353         * <code>SummaryStatistics</code> instance and all statistics have the
354         * same values as this.
355         * @param object the object to test equality against.
356         * @return true if object equals this
357         */
358        @Override
359        public boolean equals(Object object) {
360            if (object == this) {
361                return true;
362            }
363            if (object instanceof SummaryStatistics == false) {
364                return false;
365            }
366            SummaryStatistics stat = (SummaryStatistics)object;
367            return Precision.equalsIncludingNaN(stat.getGeometricMean(), getGeometricMean()) &&
368                   Precision.equalsIncludingNaN(stat.getMax(),           getMax())           &&
369                   Precision.equalsIncludingNaN(stat.getMean(),          getMean())          &&
370                   Precision.equalsIncludingNaN(stat.getMin(),           getMin())           &&
371                   Precision.equalsIncludingNaN(stat.getN(),             getN())             &&
372                   Precision.equalsIncludingNaN(stat.getSum(),           getSum())           &&
373                   Precision.equalsIncludingNaN(stat.getSumsq(),         getSumsq())         &&
374                   Precision.equalsIncludingNaN(stat.getVariance(),      getVariance());
375        }
376    
377        /**
378         * Returns hash code based on values of statistics
379         * @return hash code
380         */
381        @Override
382        public int hashCode() {
383            int result = 31 + MathUtils.hash(getGeometricMean());
384            result = result * 31 + MathUtils.hash(getGeometricMean());
385            result = result * 31 + MathUtils.hash(getMax());
386            result = result * 31 + MathUtils.hash(getMean());
387            result = result * 31 + MathUtils.hash(getMin());
388            result = result * 31 + MathUtils.hash(getN());
389            result = result * 31 + MathUtils.hash(getSum());
390            result = result * 31 + MathUtils.hash(getSumsq());
391            result = result * 31 + MathUtils.hash(getVariance());
392            return result;
393        }
394    
395        // Getters and setters for statistics implementations
396        /**
397         * Returns the currently configured Sum implementation
398         * @return the StorelessUnivariateStatistic implementing the sum
399         * @since 1.2
400         */
401        public StorelessUnivariateStatistic getSumImpl() {
402            return sumImpl;
403        }
404    
405        /**
406         * <p>
407         * Sets the implementation for the Sum.
408         * </p>
409         * <p>
410         * This method must be activated before any data has been added - i.e.,
411         * before {@link #addValue(double) addValue} has been used to add data;
412         * otherwise an IllegalStateException will be thrown.
413         * </p>
414         * @param sumImpl the StorelessUnivariateStatistic instance to use for
415         *        computing the Sum
416         * @throws IllegalStateException if data has already been added (i.e if n >
417         *         0)
418         * @since 1.2
419         */
420        public void setSumImpl(StorelessUnivariateStatistic sumImpl) {
421            checkEmpty();
422            this.sumImpl = sumImpl;
423        }
424    
425        /**
426         * Returns the currently configured sum of squares implementation
427         * @return the StorelessUnivariateStatistic implementing the sum of squares
428         * @since 1.2
429         */
430        public StorelessUnivariateStatistic getSumsqImpl() {
431            return sumsqImpl;
432        }
433    
434        /**
435         * <p>
436         * Sets the implementation for the sum of squares.
437         * </p>
438         * <p>
439         * This method must be activated before any data has been added - i.e.,
440         * before {@link #addValue(double) addValue} has been used to add data;
441         * otherwise an IllegalStateException will be thrown.
442         * </p>
443         * @param sumsqImpl the StorelessUnivariateStatistic instance to use for
444         *        computing the sum of squares
445         * @throws IllegalStateException if data has already been added (i.e if n >
446         *         0)
447         * @since 1.2
448         */
449        public void setSumsqImpl(StorelessUnivariateStatistic sumsqImpl) {
450            checkEmpty();
451            this.sumsqImpl = sumsqImpl;
452        }
453    
454        /**
455         * Returns the currently configured minimum implementation
456         * @return the StorelessUnivariateStatistic implementing the minimum
457         * @since 1.2
458         */
459        public StorelessUnivariateStatistic getMinImpl() {
460            return minImpl;
461        }
462    
463        /**
464         * <p>
465         * Sets the implementation for the minimum.
466         * </p>
467         * <p>
468         * This method must be activated before any data has been added - i.e.,
469         * before {@link #addValue(double) addValue} has been used to add data;
470         * otherwise an IllegalStateException will be thrown.
471         * </p>
472         * @param minImpl the StorelessUnivariateStatistic instance to use for
473         *        computing the minimum
474         * @throws IllegalStateException if data has already been added (i.e if n >
475         *         0)
476         * @since 1.2
477         */
478        public void setMinImpl(StorelessUnivariateStatistic minImpl) {
479            checkEmpty();
480            this.minImpl = minImpl;
481        }
482    
483        /**
484         * Returns the currently configured maximum implementation
485         * @return the StorelessUnivariateStatistic implementing the maximum
486         * @since 1.2
487         */
488        public StorelessUnivariateStatistic getMaxImpl() {
489            return maxImpl;
490        }
491    
492        /**
493         * <p>
494         * Sets the implementation for the maximum.
495         * </p>
496         * <p>
497         * This method must be activated before any data has been added - i.e.,
498         * before {@link #addValue(double) addValue} has been used to add data;
499         * otherwise an IllegalStateException will be thrown.
500         * </p>
501         * @param maxImpl the StorelessUnivariateStatistic instance to use for
502         *        computing the maximum
503         * @throws IllegalStateException if data has already been added (i.e if n >
504         *         0)
505         * @since 1.2
506         */
507        public void setMaxImpl(StorelessUnivariateStatistic maxImpl) {
508            checkEmpty();
509            this.maxImpl = maxImpl;
510        }
511    
512        /**
513         * Returns the currently configured sum of logs implementation
514         * @return the StorelessUnivariateStatistic implementing the log sum
515         * @since 1.2
516         */
517        public StorelessUnivariateStatistic getSumLogImpl() {
518            return sumLogImpl;
519        }
520    
521        /**
522         * <p>
523         * Sets the implementation for the sum of logs.
524         * </p>
525         * <p>
526         * This method must be activated before any data has been added - i.e.,
527         * before {@link #addValue(double) addValue} has been used to add data;
528         * otherwise an IllegalStateException will be thrown.
529         * </p>
530         * @param sumLogImpl the StorelessUnivariateStatistic instance to use for
531         *        computing the log sum
532         * @throws IllegalStateException if data has already been added (i.e if n >
533         *         0)
534         * @since 1.2
535         */
536        public void setSumLogImpl(StorelessUnivariateStatistic sumLogImpl) {
537            checkEmpty();
538            this.sumLogImpl = sumLogImpl;
539            geoMean.setSumLogImpl(sumLogImpl);
540        }
541    
542        /**
543         * Returns the currently configured geometric mean implementation
544         * @return the StorelessUnivariateStatistic implementing the geometric mean
545         * @since 1.2
546         */
547        public StorelessUnivariateStatistic getGeoMeanImpl() {
548            return geoMeanImpl;
549        }
550    
551        /**
552         * <p>
553         * Sets the implementation for the geometric mean.
554         * </p>
555         * <p>
556         * This method must be activated before any data has been added - i.e.,
557         * before {@link #addValue(double) addValue} has been used to add data;
558         * otherwise an IllegalStateException will be thrown.
559         * </p>
560         * @param geoMeanImpl the StorelessUnivariateStatistic instance to use for
561         *        computing the geometric mean
562         * @throws IllegalStateException if data has already been added (i.e if n >
563         *         0)
564         * @since 1.2
565         */
566        public void setGeoMeanImpl(StorelessUnivariateStatistic geoMeanImpl) {
567            checkEmpty();
568            this.geoMeanImpl = geoMeanImpl;
569        }
570    
571        /**
572         * Returns the currently configured mean implementation
573         * @return the StorelessUnivariateStatistic implementing the mean
574         * @since 1.2
575         */
576        public StorelessUnivariateStatistic getMeanImpl() {
577            return meanImpl;
578        }
579    
580        /**
581         * <p>
582         * Sets the implementation for the mean.
583         * </p>
584         * <p>
585         * This method must be activated before any data has been added - i.e.,
586         * before {@link #addValue(double) addValue} has been used to add data;
587         * otherwise an IllegalStateException will be thrown.
588         * </p>
589         * @param meanImpl the StorelessUnivariateStatistic instance to use for
590         *        computing the mean
591         * @throws IllegalStateException if data has already been added (i.e if n >
592         *         0)
593         * @since 1.2
594         */
595        public void setMeanImpl(StorelessUnivariateStatistic meanImpl) {
596            checkEmpty();
597            this.meanImpl = meanImpl;
598        }
599    
600        /**
601         * Returns the currently configured variance implementation
602         * @return the StorelessUnivariateStatistic implementing the variance
603         * @since 1.2
604         */
605        public StorelessUnivariateStatistic getVarianceImpl() {
606            return varianceImpl;
607        }
608    
609        /**
610         * <p>
611         * Sets the implementation for the variance.
612         * </p>
613         * <p>
614         * This method must be activated before any data has been added - i.e.,
615         * before {@link #addValue(double) addValue} has been used to add data;
616         * otherwise an IllegalStateException will be thrown.
617         * </p>
618         * @param varianceImpl the StorelessUnivariateStatistic instance to use for
619         *        computing the variance
620         * @throws IllegalStateException if data has already been added (i.e if n >
621         *         0)
622         * @since 1.2
623         */
624        public void setVarianceImpl(StorelessUnivariateStatistic varianceImpl) {
625            checkEmpty();
626            this.varianceImpl = varianceImpl;
627        }
628    
629        /**
630         * Throws IllegalStateException if n > 0.
631         */
632        private void checkEmpty() {
633            if (n > 0) {
634                throw MathRuntimeException.createIllegalStateException(
635                        LocalizedFormats.VALUES_ADDED_BEFORE_CONFIGURING_STATISTIC,
636                        n);
637            }
638        }
639    
640        /**
641         * Returns a copy of this SummaryStatistics instance with the same internal state.
642         *
643         * @return a copy of this
644         */
645        public SummaryStatistics copy() {
646            SummaryStatistics result = new SummaryStatistics();
647            copy(this, result);
648            return result;
649        }
650    
651        /**
652         * Copies source to dest.
653         * <p>Neither source nor dest can be null.</p>
654         *
655         * @param source SummaryStatistics to copy
656         * @param dest SummaryStatistics to copy to
657         * @throws NullArgumentException if either source or dest is null
658         */
659        public static void copy(SummaryStatistics source, SummaryStatistics dest)
660            throws NullArgumentException {
661            MathUtils.checkNotNull(source);
662            MathUtils.checkNotNull(dest);
663            dest.maxImpl = source.maxImpl.copy();
664            dest.meanImpl = source.meanImpl.copy();
665            dest.minImpl = source.minImpl.copy();
666            dest.sumImpl = source.sumImpl.copy();
667            dest.varianceImpl = source.varianceImpl.copy();
668            dest.sumLogImpl = source.sumLogImpl.copy();
669            dest.sumsqImpl = source.sumsqImpl.copy();
670            if (source.getGeoMeanImpl() instanceof GeometricMean) {
671                // Keep geoMeanImpl, sumLogImpl in synch
672                dest.geoMeanImpl = new GeometricMean((SumOfLogs) dest.sumLogImpl);
673            } else {
674                dest.geoMeanImpl = source.geoMeanImpl.copy();
675            }
676            SecondMoment.copy(source.secondMoment, dest.secondMoment);
677            dest.n = source.n;
678    
679            // Make sure that if stat == statImpl in source, same
680            // holds in dest; otherwise copy stat
681            if (source.geoMean == source.geoMeanImpl) {
682                dest.geoMean = (GeometricMean) dest.geoMeanImpl;
683            } else {
684                GeometricMean.copy(source.geoMean, dest.geoMean);
685            }
686            if (source.max == source.maxImpl) {
687                dest.max = (Max) dest.maxImpl;
688            } else {
689                Max.copy(source.max, dest.max);
690            }
691            if (source.mean == source.meanImpl) {
692                dest.mean = (Mean) dest.meanImpl;
693            } else {
694                Mean.copy(source.mean, dest.mean);
695            }
696            if (source.min == source.minImpl) {
697                dest.min = (Min) dest.minImpl;
698            } else {
699                Min.copy(source.min, dest.min);
700            }
701            if (source.sum == source.sumImpl) {
702                dest.sum = (Sum) dest.sumImpl;
703            } else {
704                Sum.copy(source.sum, dest.sum);
705            }
706            if (source.variance == source.varianceImpl) {
707                dest.variance = (Variance) dest.varianceImpl;
708            } else {
709                Variance.copy(source.variance, dest.variance);
710            }
711            if (source.sumLog == source.sumLogImpl) {
712                dest.sumLog = (SumOfLogs) dest.sumLogImpl;
713            } else {
714                SumOfLogs.copy(source.sumLog, dest.sumLog);
715            }
716            if (source.sumsq == source.sumsqImpl) {
717                dest.sumsq = (SumOfSquares) dest.sumsqImpl;
718            } else {
719                SumOfSquares.copy(source.sumsq, dest.sumsq);
720            }
721        }
722    }