View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.math4.legacy.stat.descriptive;
18  
19  import java.util.Arrays;
20  
21  import org.apache.commons.math4.legacy.exception.DimensionMismatchException;
22  import org.apache.commons.math4.legacy.exception.MathIllegalStateException;
23  import org.apache.commons.math4.legacy.exception.util.LocalizedFormats;
24  import org.apache.commons.math4.legacy.linear.RealMatrix;
25  import org.apache.commons.math4.legacy.stat.descriptive.moment.GeometricMean;
26  import org.apache.commons.math4.legacy.stat.descriptive.moment.Mean;
27  import org.apache.commons.math4.legacy.stat.descriptive.moment.VectorialCovariance;
28  import org.apache.commons.math4.legacy.stat.descriptive.rank.Max;
29  import org.apache.commons.math4.legacy.stat.descriptive.rank.Min;
30  import org.apache.commons.math4.legacy.stat.descriptive.summary.Sum;
31  import org.apache.commons.math4.legacy.stat.descriptive.summary.SumOfLogs;
32  import org.apache.commons.math4.legacy.stat.descriptive.summary.SumOfSquares;
33  import org.apache.commons.math4.core.jdkmath.JdkMath;
34  import org.apache.commons.math4.legacy.core.MathArrays;
35  import org.apache.commons.numbers.core.Precision;
36  
37  /**
38   * <p>Computes summary statistics for a stream of n-tuples added using the
39   * {@link #addValue(double[]) addValue} method. The data values are not stored
40   * in memory, so this class can be used to compute statistics for very large
41   * n-tuple streams.</p>
42   *
43   * <p>The {@link StorelessUnivariateStatistic} instances used to maintain
44   * summary state and compute statistics are configurable via setters.
45   * For example, the default implementation for the mean can be overridden by
46   * calling {@link #setMeanImpl(StorelessUnivariateStatistic[])}. Actual
47   * parameters to these methods must implement the
48   * {@link StorelessUnivariateStatistic} interface and configuration must be
49   * completed before <code>addValue</code> is called. No configuration is
50   * necessary to use the default, commons-math provided implementations.</p>
51   *
52   * <p>To compute statistics for a stream of n-tuples, construct a
53   * MultivariateStatistics instance with dimension n and then use
54   * {@link #addValue(double[])} to add n-tuples. The <code>getXxx</code>
55   * methods where Xxx is a statistic return an array of <code>double</code>
56   * values, where for <code>i = 0,...,n-1</code> the i<sup>th</sup> array element is the
57   * value of the given statistic for data range consisting of the i<sup>th</sup> element of
58   * each of the input n-tuples.  For example, if <code>addValue</code> is called
59   * with actual parameters {0, 1, 2}, then {3, 4, 5} and finally {6, 7, 8},
60   * <code>getSum</code> will return a three-element array with values
61   * {0+3+6, 1+4+7, 2+5+8}</p>
62   *
63   * <p>Note: This class is not thread-safe. Use
64   * {@link SynchronizedMultivariateSummaryStatistics} if concurrent access from multiple
65   * threads is required.</p>
66   *
67   * @since 1.2
68   */
69  public class MultivariateSummaryStatistics
70      implements StatisticalMultivariateSummary {
71      /** Dimension of the data. */
72      private final int k;
73  
74      /** Count of values that have been added. */
75      private long n;
76  
77      /** Sum statistic implementation - can be reset by setter. */
78      private final StorelessUnivariateStatistic[] sumImpl;
79  
80      /** Sum of squares statistic implementation - can be reset by setter. */
81      private final StorelessUnivariateStatistic[] sumSqImpl;
82  
83      /** Minimum statistic implementation - can be reset by setter. */
84      private final StorelessUnivariateStatistic[] minImpl;
85  
86      /** Maximum statistic implementation - can be reset by setter. */
87      private final StorelessUnivariateStatistic[] maxImpl;
88  
89      /** Sum of log statistic implementation - can be reset by setter. */
90      private final StorelessUnivariateStatistic[] sumLogImpl;
91  
92      /** Geometric mean statistic implementation - can be reset by setter. */
93      private final StorelessUnivariateStatistic[] geoMeanImpl;
94  
95      /** Mean statistic implementation - can be reset by setter. */
96      private final StorelessUnivariateStatistic[] meanImpl;
97  
98      /** Covariance statistic implementation - cannot be reset. */
99      private final VectorialCovariance covarianceImpl;
100 
101     /**
102      * Construct a MultivariateSummaryStatistics instance.
103      * @param k dimension of the data
104      * @param isCovarianceBiasCorrected if true, the unbiased sample
105      * covariance is computed, otherwise the biased population covariance
106      * is computed
107      */
108     public MultivariateSummaryStatistics(int k, boolean isCovarianceBiasCorrected) {
109         this.k = k;
110 
111         sumImpl     = new StorelessUnivariateStatistic[k];
112         sumSqImpl   = new StorelessUnivariateStatistic[k];
113         minImpl     = new StorelessUnivariateStatistic[k];
114         maxImpl     = new StorelessUnivariateStatistic[k];
115         sumLogImpl  = new StorelessUnivariateStatistic[k];
116         geoMeanImpl = new StorelessUnivariateStatistic[k];
117         meanImpl    = new StorelessUnivariateStatistic[k];
118 
119         for (int i = 0; i < k; ++i) {
120             sumImpl[i]     = new Sum();
121             sumSqImpl[i]   = new SumOfSquares();
122             minImpl[i]     = new Min();
123             maxImpl[i]     = new Max();
124             sumLogImpl[i]  = new SumOfLogs();
125             geoMeanImpl[i] = new GeometricMean();
126             meanImpl[i]    = new Mean();
127         }
128 
129         covarianceImpl =
130             new VectorialCovariance(k, isCovarianceBiasCorrected);
131     }
132 
133     /**
134      * Add an n-tuple to the data.
135      *
136      * @param value  the n-tuple to add
137      * @throws DimensionMismatchException if the length of the array
138      * does not match the one used at construction
139      */
140     public void addValue(double[] value) throws DimensionMismatchException {
141         checkDimension(value.length);
142         for (int i = 0; i < k; ++i) {
143             double v = value[i];
144             sumImpl[i].increment(v);
145             sumSqImpl[i].increment(v);
146             minImpl[i].increment(v);
147             maxImpl[i].increment(v);
148             sumLogImpl[i].increment(v);
149             geoMeanImpl[i].increment(v);
150             meanImpl[i].increment(v);
151         }
152         covarianceImpl.increment(value);
153         n++;
154     }
155 
156     /**
157      * Returns the dimension of the data.
158      * @return The dimension of the data
159      */
160     @Override
161     public int getDimension() {
162         return k;
163     }
164 
165     /**
166      * Returns the number of available values.
167      * @return The number of available values
168      */
169     @Override
170     public long getN() {
171         return n;
172     }
173 
174     /**
175      * Returns an array of the results of a statistic.
176      * @param stats univariate statistic array
177      * @return results array
178      */
179     private double[] getResults(StorelessUnivariateStatistic[] stats) {
180         double[] results = new double[stats.length];
181         for (int i = 0; i < results.length; ++i) {
182             results[i] = stats[i].getResult();
183         }
184         return results;
185     }
186 
187     /**
188      * Returns an array whose i<sup>th</sup> entry is the sum of the.
189      * i<sup>th</sup> entries of the arrays that have been added using
190      * {@link #addValue(double[])}
191      *
192      * @return the array of component sums
193      */
194     @Override
195     public double[] getSum() {
196         return getResults(sumImpl);
197     }
198 
199     /**
200      * Returns an array whose i<sup>th</sup> entry is the sum of squares of the.
201      * i<sup>th</sup> entries of the arrays that have been added using
202      * {@link #addValue(double[])}
203      *
204      * @return the array of component sums of squares
205      */
206     @Override
207     public double[] getSumSq() {
208         return getResults(sumSqImpl);
209     }
210 
211     /**
212      * Returns an array whose i<sup>th</sup> entry is the sum of logs of the.
213      * i<sup>th</sup> entries of the arrays that have been added using
214      * {@link #addValue(double[])}
215      *
216      * @return the array of component log sums
217      */
218     @Override
219     public double[] getSumLog() {
220         return getResults(sumLogImpl);
221     }
222 
223     /**
224      * Returns an array whose i<sup>th</sup> entry is the mean of the.
225      * i<sup>th</sup> entries of the arrays that have been added using
226      * {@link #addValue(double[])}
227      *
228      * @return the array of component means
229      */
230     @Override
231     public double[] getMean() {
232         return getResults(meanImpl);
233     }
234 
235     /**
236      * Returns an array whose i<sup>th</sup> entry is the standard deviation of the.
237      * i<sup>th</sup> entries of the arrays that have been added using
238      * {@link #addValue(double[])}
239      *
240      * @return the array of component standard deviations
241      */
242     @Override
243     public double[] getStandardDeviation() {
244         double[] stdDev = new double[k];
245         if (getN() < 1) {
246             Arrays.fill(stdDev, Double.NaN);
247         } else if (getN() < 2) {
248             Arrays.fill(stdDev, 0.0);
249         } else {
250             RealMatrix matrix = covarianceImpl.getResult();
251             for (int i = 0; i < k; ++i) {
252                 stdDev[i] = JdkMath.sqrt(matrix.getEntry(i, i));
253             }
254         }
255         return stdDev;
256     }
257 
258     /**
259      * Returns the covariance matrix of the values that have been added.
260      *
261      * @return the covariance matrix
262      */
263     @Override
264     public RealMatrix getCovariance() {
265         return covarianceImpl.getResult();
266     }
267 
268     /**
269      * Returns an array whose i<sup>th</sup> entry is the maximum of the.
270      * i<sup>th</sup> entries of the arrays that have been added using
271      * {@link #addValue(double[])}
272      *
273      * @return the array of component maxima
274      */
275     @Override
276     public double[] getMax() {
277         return getResults(maxImpl);
278     }
279 
280     /**
281      * Returns an array whose i<sup>th</sup> entry is the minimum of the.
282      * i<sup>th</sup> entries of the arrays that have been added using
283      * {@link #addValue(double[])}
284      *
285      * @return the array of component minima
286      */
287     @Override
288     public double[] getMin() {
289         return getResults(minImpl);
290     }
291 
292     /**
293      * Returns an array whose i<sup>th</sup> entry is the geometric mean of the.
294      * i<sup>th</sup> entries of the arrays that have been added using
295      * {@link #addValue(double[])}
296      *
297      * @return the array of component geometric means
298      */
299     @Override
300     public double[] getGeometricMean() {
301         return getResults(geoMeanImpl);
302     }
303 
304     /**
305      * Generates a text report displaying
306      * summary statistics from values that
307      * have been added.
308      * @return String with line feeds displaying statistics
309      */
310     @Override
311     public String toString() {
312         final String separator = ", ";
313         final String suffix = System.getProperty("line.separator");
314         StringBuilder outBuffer = new StringBuilder();
315         outBuffer.append("MultivariateSummaryStatistics:").append(suffix);
316         outBuffer.append("n: ").append(getN()).append(suffix);
317         append(outBuffer, getMin(), "min: ", separator, suffix);
318         append(outBuffer, getMax(), "max: ", separator, suffix);
319         append(outBuffer, getMean(), "mean: ", separator, suffix);
320         append(outBuffer, getGeometricMean(), "geometric mean: ", separator, suffix);
321         append(outBuffer, getSumSq(), "sum of squares: ", separator, suffix);
322         append(outBuffer, getSumLog(), "sum of logarithms: ", separator, suffix);
323         append(outBuffer, getStandardDeviation(), "standard deviation: ", separator, suffix);
324         outBuffer.append("covariance: ").append(getCovariance()).append(suffix);
325         return outBuffer.toString();
326     }
327 
328     /**
329      * Append a text representation of an array to a buffer.
330      * @param buffer buffer to fill
331      * @param data data array
332      * @param prefix text prefix
333      * @param separator elements separator
334      * @param suffix text suffix
335      */
336     private void append(StringBuilder buffer, double[] data,
337                         String prefix, String separator, String suffix) {
338         buffer.append(prefix);
339         for (int i = 0; i < data.length; ++i) {
340             if (i > 0) {
341                 buffer.append(separator);
342             }
343             buffer.append(data[i]);
344         }
345         buffer.append(suffix);
346     }
347 
348     /**
349      * Resets all statistics and storage.
350      */
351     public void clear() {
352         this.n = 0;
353         for (int i = 0; i < k; ++i) {
354             minImpl[i].clear();
355             maxImpl[i].clear();
356             sumImpl[i].clear();
357             sumLogImpl[i].clear();
358             sumSqImpl[i].clear();
359             geoMeanImpl[i].clear();
360             meanImpl[i].clear();
361         }
362         covarianceImpl.clear();
363     }
364 
365     /**
366      * Returns true iff <code>object</code> is a <code>MultivariateSummaryStatistics</code>
367      * instance and all statistics have the same values as this.
368      * @param object the object to test equality against.
369      * @return true if object equals this
370      */
371     @Override
372     public boolean equals(Object object) {
373         if (object == this ) {
374             return true;
375         }
376         if (!(object instanceof MultivariateSummaryStatistics)) {
377             return false;
378         }
379         MultivariateSummaryStatistics stat = (MultivariateSummaryStatistics) object;
380         return MathArrays.equalsIncludingNaN(stat.getGeometricMean(), getGeometricMean()) &&
381                MathArrays.equalsIncludingNaN(stat.getMax(),           getMax())           &&
382                MathArrays.equalsIncludingNaN(stat.getMean(),          getMean())          &&
383                MathArrays.equalsIncludingNaN(stat.getMin(),           getMin())           &&
384                Precision.equalsIncludingNaN(stat.getN(),             getN())             &&
385                MathArrays.equalsIncludingNaN(stat.getSum(),           getSum())           &&
386                MathArrays.equalsIncludingNaN(stat.getSumSq(),         getSumSq())         &&
387                MathArrays.equalsIncludingNaN(stat.getSumLog(),        getSumLog())        &&
388                stat.getCovariance().equals( getCovariance());
389     }
390 
391     /**
392      * Returns hash code based on values of statistics.
393      *
394      * @return hash code
395      */
396     @Override
397     public int hashCode() {
398         int result = 31 + Arrays.hashCode(getGeometricMean());
399         result = result * 31 + Arrays.hashCode(getGeometricMean());
400         result = result * 31 + Arrays.hashCode(getMax());
401         result = result * 31 + Arrays.hashCode(getMean());
402         result = result * 31 + Arrays.hashCode(getMin());
403         result = result * 31 + Double.hashCode(getN());
404         result = result * 31 + Arrays.hashCode(getSum());
405         result = result * 31 + Arrays.hashCode(getSumSq());
406         result = result * 31 + Arrays.hashCode(getSumLog());
407         result = result * 31 + getCovariance().hashCode();
408         return result;
409     }
410 
411     // Getters and setters for statistics implementations
412     /**
413      * Sets statistics implementations.
414      * @param newImpl new implementations for statistics
415      * @param oldImpl old implementations for statistics
416      * @throws DimensionMismatchException if the array dimension
417      * does not match the one used at construction
418      * @throws MathIllegalStateException if data has already been added
419      * (i.e. if n > 0)
420      */
421     private void setImpl(StorelessUnivariateStatistic[] newImpl,
422                          StorelessUnivariateStatistic[] oldImpl) throws MathIllegalStateException,
423                          DimensionMismatchException {
424         checkEmpty();
425         checkDimension(newImpl.length);
426         System.arraycopy(newImpl, 0, oldImpl, 0, newImpl.length);
427     }
428 
429     /**
430      * Returns the currently configured Sum implementation.
431      *
432      * @return the StorelessUnivariateStatistic implementing the sum
433      */
434     public StorelessUnivariateStatistic[] getSumImpl() {
435         return sumImpl.clone();
436     }
437 
438     /**
439      * <p>Sets the implementation for the Sum.</p>
440      * <p>This method must be activated before any data has been added - i.e.,
441      * before {@link #addValue(double[]) addValue} has been used to add data;
442      * otherwise an IllegalStateException will be thrown.</p>
443      *
444      * @param sumImpl the StorelessUnivariateStatistic instance to use
445      * for computing the Sum
446      * @throws DimensionMismatchException if the array dimension
447      * does not match the one used at construction
448      * @throws MathIllegalStateException if data has already been added
449      *  (i.e if n &gt; 0)
450      */
451     public void setSumImpl(StorelessUnivariateStatistic[] sumImpl)
452     throws MathIllegalStateException, DimensionMismatchException {
453         setImpl(sumImpl, this.sumImpl);
454     }
455 
456     /**
457      * Returns the currently configured sum of squares implementation.
458      *
459      * @return the StorelessUnivariateStatistic implementing the sum of squares
460      */
461     public StorelessUnivariateStatistic[] getSumsqImpl() {
462         return sumSqImpl.clone();
463     }
464 
465     /**
466      * <p>Sets the implementation for the sum of squares.</p>
467      * <p>This method must be activated before any data has been added - i.e.,
468      * before {@link #addValue(double[]) addValue} has been used to add data;
469      * otherwise an IllegalStateException will be thrown.</p>
470      *
471      * @param sumsqImpl the StorelessUnivariateStatistic instance to use
472      * for computing the sum of squares
473      * @throws DimensionMismatchException if the array dimension
474      * does not match the one used at construction
475      * @throws MathIllegalStateException if data has already been added
476      *  (i.e if n &gt; 0)
477      */
478     public void setSumsqImpl(StorelessUnivariateStatistic[] sumsqImpl)
479     throws MathIllegalStateException, DimensionMismatchException {
480         setImpl(sumsqImpl, this.sumSqImpl);
481     }
482 
483     /**
484      * Returns the currently configured minimum implementation.
485      *
486      * @return the StorelessUnivariateStatistic implementing the minimum
487      */
488     public StorelessUnivariateStatistic[] getMinImpl() {
489         return minImpl.clone();
490     }
491 
492     /**
493      * <p>Sets the implementation for the minimum.</p>
494      * <p>This method must be activated before any data has been added - i.e.,
495      * before {@link #addValue(double[]) addValue} has been used to add data;
496      * otherwise an IllegalStateException will be thrown.</p>
497      *
498      * @param minImpl the StorelessUnivariateStatistic instance to use
499      * for computing the minimum
500      * @throws DimensionMismatchException if the array dimension
501      * does not match the one used at construction
502      * @throws MathIllegalStateException if data has already been added
503      *  (i.e if n &gt; 0)
504      */
505     public void setMinImpl(StorelessUnivariateStatistic[] minImpl)
506     throws MathIllegalStateException, DimensionMismatchException {
507         setImpl(minImpl, this.minImpl);
508     }
509 
510     /**
511      * Returns the currently configured maximum implementation.
512      *
513      * @return the StorelessUnivariateStatistic implementing the maximum
514      */
515     public StorelessUnivariateStatistic[] getMaxImpl() {
516         return maxImpl.clone();
517     }
518 
519     /**
520      * <p>Sets the implementation for the maximum.</p>
521      * <p>This method must be activated before any data has been added - i.e.,
522      * before {@link #addValue(double[]) addValue} has been used to add data;
523      * otherwise an IllegalStateException will be thrown.</p>
524      *
525      * @param maxImpl the StorelessUnivariateStatistic instance to use
526      * for computing the maximum
527      * @throws DimensionMismatchException if the array dimension
528      * does not match the one used at construction
529      * @throws MathIllegalStateException if data has already been added
530      *  (i.e if n &gt; 0)
531      */
532     public void setMaxImpl(StorelessUnivariateStatistic[] maxImpl)
533     throws MathIllegalStateException, DimensionMismatchException{
534         setImpl(maxImpl, this.maxImpl);
535     }
536 
537     /**
538      * Returns the currently configured sum of logs implementation.
539      *
540      * @return the StorelessUnivariateStatistic implementing the log sum
541      */
542     public StorelessUnivariateStatistic[] getSumLogImpl() {
543         return sumLogImpl.clone();
544     }
545 
546     /**
547      * <p>Sets the implementation for the sum of logs.</p>
548      * <p>This method must be activated before any data has been added - i.e.,
549      * before {@link #addValue(double[]) addValue} has been used to add data;
550      * otherwise an IllegalStateException will be thrown.</p>
551      *
552      * @param sumLogImpl the StorelessUnivariateStatistic instance to use
553      * for computing the log sum
554      * @throws DimensionMismatchException if the array dimension
555      * does not match the one used at construction
556      * @throws MathIllegalStateException if data has already been added
557      *  (i.e if n &gt; 0)
558      */
559     public void setSumLogImpl(StorelessUnivariateStatistic[] sumLogImpl)
560     throws MathIllegalStateException, DimensionMismatchException{
561         setImpl(sumLogImpl, this.sumLogImpl);
562     }
563 
564     /**
565      * Returns the currently configured geometric mean implementation.
566      *
567      * @return the StorelessUnivariateStatistic implementing the geometric mean
568      */
569     public StorelessUnivariateStatistic[] getGeoMeanImpl() {
570         return geoMeanImpl.clone();
571     }
572 
573     /**
574      * <p>Sets the implementation for the geometric mean.</p>
575      * <p>This method must be activated before any data has been added - i.e.,
576      * before {@link #addValue(double[]) addValue} has been used to add data;
577      * otherwise an IllegalStateException will be thrown.</p>
578      *
579      * @param geoMeanImpl the StorelessUnivariateStatistic instance to use
580      * for computing the geometric mean
581      * @throws DimensionMismatchException if the array dimension
582      * does not match the one used at construction
583      * @throws MathIllegalStateException if data has already been added
584      *  (i.e if n &gt; 0)
585      */
586     public void setGeoMeanImpl(StorelessUnivariateStatistic[] geoMeanImpl)
587     throws MathIllegalStateException, DimensionMismatchException {
588         setImpl(geoMeanImpl, this.geoMeanImpl);
589     }
590 
591     /**
592      * Returns the currently configured mean implementation.
593      *
594      * @return the StorelessUnivariateStatistic implementing the mean
595      */
596     public StorelessUnivariateStatistic[] getMeanImpl() {
597         return meanImpl.clone();
598     }
599 
600     /**
601      * <p>Sets the implementation for the mean.</p>
602      * <p>This method must be activated before any data has been added - i.e.,
603      * before {@link #addValue(double[]) addValue} has been used to add data;
604      * otherwise an IllegalStateException will be thrown.</p>
605      *
606      * @param meanImpl the StorelessUnivariateStatistic instance to use
607      * for computing the mean
608      * @throws DimensionMismatchException if the array dimension
609      * does not match the one used at construction
610      * @throws MathIllegalStateException if data has already been added
611      *  (i.e if n &gt; 0)
612      */
613     public void setMeanImpl(StorelessUnivariateStatistic[] meanImpl)
614     throws MathIllegalStateException, DimensionMismatchException{
615         setImpl(meanImpl, this.meanImpl);
616     }
617 
618     /**
619      * Throws MathIllegalStateException if the statistic is not empty.
620      * @throws MathIllegalStateException if n > 0.
621      */
622     private void checkEmpty() throws MathIllegalStateException {
623         if (n > 0) {
624             throw new MathIllegalStateException(
625                     LocalizedFormats.VALUES_ADDED_BEFORE_CONFIGURING_STATISTIC, n);
626         }
627     }
628 
629     /**
630      * Throws DimensionMismatchException if dimension != k.
631      * @param dimension dimension to check
632      * @throws DimensionMismatchException if dimension != k
633      */
634     private void checkDimension(int dimension) throws DimensionMismatchException {
635         if (dimension != k) {
636             throw new DimensionMismatchException(dimension, k);
637         }
638     }
639 }