1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.math4.legacy.stat.descriptive;
18
19 import java.util.Arrays;
20
21 import org.apache.commons.math4.legacy.exception.DimensionMismatchException;
22 import org.apache.commons.math4.legacy.exception.MathIllegalStateException;
23 import org.apache.commons.math4.legacy.exception.util.LocalizedFormats;
24 import org.apache.commons.math4.legacy.linear.RealMatrix;
25 import org.apache.commons.math4.legacy.stat.descriptive.moment.GeometricMean;
26 import org.apache.commons.math4.legacy.stat.descriptive.moment.Mean;
27 import org.apache.commons.math4.legacy.stat.descriptive.moment.VectorialCovariance;
28 import org.apache.commons.math4.legacy.stat.descriptive.rank.Max;
29 import org.apache.commons.math4.legacy.stat.descriptive.rank.Min;
30 import org.apache.commons.math4.legacy.stat.descriptive.summary.Sum;
31 import org.apache.commons.math4.legacy.stat.descriptive.summary.SumOfLogs;
32 import org.apache.commons.math4.legacy.stat.descriptive.summary.SumOfSquares;
33 import org.apache.commons.math4.core.jdkmath.JdkMath;
34 import org.apache.commons.math4.legacy.core.MathArrays;
35 import org.apache.commons.numbers.core.Precision;
36
37 /**
38 * <p>Computes summary statistics for a stream of n-tuples added using the
39 * {@link #addValue(double[]) addValue} method. The data values are not stored
40 * in memory, so this class can be used to compute statistics for very large
41 * n-tuple streams.</p>
42 *
43 * <p>The {@link StorelessUnivariateStatistic} instances used to maintain
44 * summary state and compute statistics are configurable via setters.
45 * For example, the default implementation for the mean can be overridden by
46 * calling {@link #setMeanImpl(StorelessUnivariateStatistic[])}. Actual
47 * parameters to these methods must implement the
48 * {@link StorelessUnivariateStatistic} interface and configuration must be
49 * completed before <code>addValue</code> is called. No configuration is
50 * necessary to use the default, commons-math provided implementations.</p>
51 *
52 * <p>To compute statistics for a stream of n-tuples, construct a
53 * MultivariateStatistics instance with dimension n and then use
54 * {@link #addValue(double[])} to add n-tuples. The <code>getXxx</code>
55 * methods where Xxx is a statistic return an array of <code>double</code>
56 * values, where for <code>i = 0,...,n-1</code> the i<sup>th</sup> array element is the
57 * value of the given statistic for data range consisting of the i<sup>th</sup> element of
58 * each of the input n-tuples. For example, if <code>addValue</code> is called
59 * with actual parameters {0, 1, 2}, then {3, 4, 5} and finally {6, 7, 8},
60 * <code>getSum</code> will return a three-element array with values
61 * {0+3+6, 1+4+7, 2+5+8}</p>
62 *
63 * <p>Note: This class is not thread-safe. Use
64 * {@link SynchronizedMultivariateSummaryStatistics} if concurrent access from multiple
65 * threads is required.</p>
66 *
67 * @since 1.2
68 */
69 public class MultivariateSummaryStatistics
70 implements StatisticalMultivariateSummary {
71 /** Dimension of the data. */
72 private final int k;
73
74 /** Count of values that have been added. */
75 private long n;
76
77 /** Sum statistic implementation - can be reset by setter. */
78 private final StorelessUnivariateStatistic[] sumImpl;
79
80 /** Sum of squares statistic implementation - can be reset by setter. */
81 private final StorelessUnivariateStatistic[] sumSqImpl;
82
83 /** Minimum statistic implementation - can be reset by setter. */
84 private final StorelessUnivariateStatistic[] minImpl;
85
86 /** Maximum statistic implementation - can be reset by setter. */
87 private final StorelessUnivariateStatistic[] maxImpl;
88
89 /** Sum of log statistic implementation - can be reset by setter. */
90 private final StorelessUnivariateStatistic[] sumLogImpl;
91
92 /** Geometric mean statistic implementation - can be reset by setter. */
93 private final StorelessUnivariateStatistic[] geoMeanImpl;
94
95 /** Mean statistic implementation - can be reset by setter. */
96 private final StorelessUnivariateStatistic[] meanImpl;
97
98 /** Covariance statistic implementation - cannot be reset. */
99 private final VectorialCovariance covarianceImpl;
100
101 /**
102 * Construct a MultivariateSummaryStatistics instance.
103 * @param k dimension of the data
104 * @param isCovarianceBiasCorrected if true, the unbiased sample
105 * covariance is computed, otherwise the biased population covariance
106 * is computed
107 */
108 public MultivariateSummaryStatistics(int k, boolean isCovarianceBiasCorrected) {
109 this.k = k;
110
111 sumImpl = new StorelessUnivariateStatistic[k];
112 sumSqImpl = new StorelessUnivariateStatistic[k];
113 minImpl = new StorelessUnivariateStatistic[k];
114 maxImpl = new StorelessUnivariateStatistic[k];
115 sumLogImpl = new StorelessUnivariateStatistic[k];
116 geoMeanImpl = new StorelessUnivariateStatistic[k];
117 meanImpl = new StorelessUnivariateStatistic[k];
118
119 for (int i = 0; i < k; ++i) {
120 sumImpl[i] = new Sum();
121 sumSqImpl[i] = new SumOfSquares();
122 minImpl[i] = new Min();
123 maxImpl[i] = new Max();
124 sumLogImpl[i] = new SumOfLogs();
125 geoMeanImpl[i] = new GeometricMean();
126 meanImpl[i] = new Mean();
127 }
128
129 covarianceImpl =
130 new VectorialCovariance(k, isCovarianceBiasCorrected);
131 }
132
133 /**
134 * Add an n-tuple to the data.
135 *
136 * @param value the n-tuple to add
137 * @throws DimensionMismatchException if the length of the array
138 * does not match the one used at construction
139 */
140 public void addValue(double[] value) throws DimensionMismatchException {
141 checkDimension(value.length);
142 for (int i = 0; i < k; ++i) {
143 double v = value[i];
144 sumImpl[i].increment(v);
145 sumSqImpl[i].increment(v);
146 minImpl[i].increment(v);
147 maxImpl[i].increment(v);
148 sumLogImpl[i].increment(v);
149 geoMeanImpl[i].increment(v);
150 meanImpl[i].increment(v);
151 }
152 covarianceImpl.increment(value);
153 n++;
154 }
155
156 /**
157 * Returns the dimension of the data.
158 * @return The dimension of the data
159 */
160 @Override
161 public int getDimension() {
162 return k;
163 }
164
165 /**
166 * Returns the number of available values.
167 * @return The number of available values
168 */
169 @Override
170 public long getN() {
171 return n;
172 }
173
174 /**
175 * Returns an array of the results of a statistic.
176 * @param stats univariate statistic array
177 * @return results array
178 */
179 private double[] getResults(StorelessUnivariateStatistic[] stats) {
180 double[] results = new double[stats.length];
181 for (int i = 0; i < results.length; ++i) {
182 results[i] = stats[i].getResult();
183 }
184 return results;
185 }
186
187 /**
188 * Returns an array whose i<sup>th</sup> entry is the sum of the.
189 * i<sup>th</sup> entries of the arrays that have been added using
190 * {@link #addValue(double[])}
191 *
192 * @return the array of component sums
193 */
194 @Override
195 public double[] getSum() {
196 return getResults(sumImpl);
197 }
198
199 /**
200 * Returns an array whose i<sup>th</sup> entry is the sum of squares of the.
201 * i<sup>th</sup> entries of the arrays that have been added using
202 * {@link #addValue(double[])}
203 *
204 * @return the array of component sums of squares
205 */
206 @Override
207 public double[] getSumSq() {
208 return getResults(sumSqImpl);
209 }
210
211 /**
212 * Returns an array whose i<sup>th</sup> entry is the sum of logs of the.
213 * i<sup>th</sup> entries of the arrays that have been added using
214 * {@link #addValue(double[])}
215 *
216 * @return the array of component log sums
217 */
218 @Override
219 public double[] getSumLog() {
220 return getResults(sumLogImpl);
221 }
222
223 /**
224 * Returns an array whose i<sup>th</sup> entry is the mean of the.
225 * i<sup>th</sup> entries of the arrays that have been added using
226 * {@link #addValue(double[])}
227 *
228 * @return the array of component means
229 */
230 @Override
231 public double[] getMean() {
232 return getResults(meanImpl);
233 }
234
235 /**
236 * Returns an array whose i<sup>th</sup> entry is the standard deviation of the.
237 * i<sup>th</sup> entries of the arrays that have been added using
238 * {@link #addValue(double[])}
239 *
240 * @return the array of component standard deviations
241 */
242 @Override
243 public double[] getStandardDeviation() {
244 double[] stdDev = new double[k];
245 if (getN() < 1) {
246 Arrays.fill(stdDev, Double.NaN);
247 } else if (getN() < 2) {
248 Arrays.fill(stdDev, 0.0);
249 } else {
250 RealMatrix matrix = covarianceImpl.getResult();
251 for (int i = 0; i < k; ++i) {
252 stdDev[i] = JdkMath.sqrt(matrix.getEntry(i, i));
253 }
254 }
255 return stdDev;
256 }
257
258 /**
259 * Returns the covariance matrix of the values that have been added.
260 *
261 * @return the covariance matrix
262 */
263 @Override
264 public RealMatrix getCovariance() {
265 return covarianceImpl.getResult();
266 }
267
268 /**
269 * Returns an array whose i<sup>th</sup> entry is the maximum of the.
270 * i<sup>th</sup> entries of the arrays that have been added using
271 * {@link #addValue(double[])}
272 *
273 * @return the array of component maxima
274 */
275 @Override
276 public double[] getMax() {
277 return getResults(maxImpl);
278 }
279
280 /**
281 * Returns an array whose i<sup>th</sup> entry is the minimum of the.
282 * i<sup>th</sup> entries of the arrays that have been added using
283 * {@link #addValue(double[])}
284 *
285 * @return the array of component minima
286 */
287 @Override
288 public double[] getMin() {
289 return getResults(minImpl);
290 }
291
292 /**
293 * Returns an array whose i<sup>th</sup> entry is the geometric mean of the.
294 * i<sup>th</sup> entries of the arrays that have been added using
295 * {@link #addValue(double[])}
296 *
297 * @return the array of component geometric means
298 */
299 @Override
300 public double[] getGeometricMean() {
301 return getResults(geoMeanImpl);
302 }
303
304 /**
305 * Generates a text report displaying
306 * summary statistics from values that
307 * have been added.
308 * @return String with line feeds displaying statistics
309 */
310 @Override
311 public String toString() {
312 final String separator = ", ";
313 final String suffix = System.getProperty("line.separator");
314 StringBuilder outBuffer = new StringBuilder();
315 outBuffer.append("MultivariateSummaryStatistics:").append(suffix);
316 outBuffer.append("n: ").append(getN()).append(suffix);
317 append(outBuffer, getMin(), "min: ", separator, suffix);
318 append(outBuffer, getMax(), "max: ", separator, suffix);
319 append(outBuffer, getMean(), "mean: ", separator, suffix);
320 append(outBuffer, getGeometricMean(), "geometric mean: ", separator, suffix);
321 append(outBuffer, getSumSq(), "sum of squares: ", separator, suffix);
322 append(outBuffer, getSumLog(), "sum of logarithms: ", separator, suffix);
323 append(outBuffer, getStandardDeviation(), "standard deviation: ", separator, suffix);
324 outBuffer.append("covariance: ").append(getCovariance()).append(suffix);
325 return outBuffer.toString();
326 }
327
328 /**
329 * Append a text representation of an array to a buffer.
330 * @param buffer buffer to fill
331 * @param data data array
332 * @param prefix text prefix
333 * @param separator elements separator
334 * @param suffix text suffix
335 */
336 private void append(StringBuilder buffer, double[] data,
337 String prefix, String separator, String suffix) {
338 buffer.append(prefix);
339 for (int i = 0; i < data.length; ++i) {
340 if (i > 0) {
341 buffer.append(separator);
342 }
343 buffer.append(data[i]);
344 }
345 buffer.append(suffix);
346 }
347
348 /**
349 * Resets all statistics and storage.
350 */
351 public void clear() {
352 this.n = 0;
353 for (int i = 0; i < k; ++i) {
354 minImpl[i].clear();
355 maxImpl[i].clear();
356 sumImpl[i].clear();
357 sumLogImpl[i].clear();
358 sumSqImpl[i].clear();
359 geoMeanImpl[i].clear();
360 meanImpl[i].clear();
361 }
362 covarianceImpl.clear();
363 }
364
365 /**
366 * Returns true iff <code>object</code> is a <code>MultivariateSummaryStatistics</code>
367 * instance and all statistics have the same values as this.
368 * @param object the object to test equality against.
369 * @return true if object equals this
370 */
371 @Override
372 public boolean equals(Object object) {
373 if (object == this ) {
374 return true;
375 }
376 if (!(object instanceof MultivariateSummaryStatistics)) {
377 return false;
378 }
379 MultivariateSummaryStatistics stat = (MultivariateSummaryStatistics) object;
380 return MathArrays.equalsIncludingNaN(stat.getGeometricMean(), getGeometricMean()) &&
381 MathArrays.equalsIncludingNaN(stat.getMax(), getMax()) &&
382 MathArrays.equalsIncludingNaN(stat.getMean(), getMean()) &&
383 MathArrays.equalsIncludingNaN(stat.getMin(), getMin()) &&
384 Precision.equalsIncludingNaN(stat.getN(), getN()) &&
385 MathArrays.equalsIncludingNaN(stat.getSum(), getSum()) &&
386 MathArrays.equalsIncludingNaN(stat.getSumSq(), getSumSq()) &&
387 MathArrays.equalsIncludingNaN(stat.getSumLog(), getSumLog()) &&
388 stat.getCovariance().equals( getCovariance());
389 }
390
391 /**
392 * Returns hash code based on values of statistics.
393 *
394 * @return hash code
395 */
396 @Override
397 public int hashCode() {
398 int result = 31 + Arrays.hashCode(getGeometricMean());
399 result = result * 31 + Arrays.hashCode(getGeometricMean());
400 result = result * 31 + Arrays.hashCode(getMax());
401 result = result * 31 + Arrays.hashCode(getMean());
402 result = result * 31 + Arrays.hashCode(getMin());
403 result = result * 31 + Double.hashCode(getN());
404 result = result * 31 + Arrays.hashCode(getSum());
405 result = result * 31 + Arrays.hashCode(getSumSq());
406 result = result * 31 + Arrays.hashCode(getSumLog());
407 result = result * 31 + getCovariance().hashCode();
408 return result;
409 }
410
411 // Getters and setters for statistics implementations
412 /**
413 * Sets statistics implementations.
414 * @param newImpl new implementations for statistics
415 * @param oldImpl old implementations for statistics
416 * @throws DimensionMismatchException if the array dimension
417 * does not match the one used at construction
418 * @throws MathIllegalStateException if data has already been added
419 * (i.e. if n > 0)
420 */
421 private void setImpl(StorelessUnivariateStatistic[] newImpl,
422 StorelessUnivariateStatistic[] oldImpl) throws MathIllegalStateException,
423 DimensionMismatchException {
424 checkEmpty();
425 checkDimension(newImpl.length);
426 System.arraycopy(newImpl, 0, oldImpl, 0, newImpl.length);
427 }
428
429 /**
430 * Returns the currently configured Sum implementation.
431 *
432 * @return the StorelessUnivariateStatistic implementing the sum
433 */
434 public StorelessUnivariateStatistic[] getSumImpl() {
435 return sumImpl.clone();
436 }
437
438 /**
439 * <p>Sets the implementation for the Sum.</p>
440 * <p>This method must be activated before any data has been added - i.e.,
441 * before {@link #addValue(double[]) addValue} has been used to add data;
442 * otherwise an IllegalStateException will be thrown.</p>
443 *
444 * @param sumImpl the StorelessUnivariateStatistic instance to use
445 * for computing the Sum
446 * @throws DimensionMismatchException if the array dimension
447 * does not match the one used at construction
448 * @throws MathIllegalStateException if data has already been added
449 * (i.e if n > 0)
450 */
451 public void setSumImpl(StorelessUnivariateStatistic[] sumImpl)
452 throws MathIllegalStateException, DimensionMismatchException {
453 setImpl(sumImpl, this.sumImpl);
454 }
455
456 /**
457 * Returns the currently configured sum of squares implementation.
458 *
459 * @return the StorelessUnivariateStatistic implementing the sum of squares
460 */
461 public StorelessUnivariateStatistic[] getSumsqImpl() {
462 return sumSqImpl.clone();
463 }
464
465 /**
466 * <p>Sets the implementation for the sum of squares.</p>
467 * <p>This method must be activated before any data has been added - i.e.,
468 * before {@link #addValue(double[]) addValue} has been used to add data;
469 * otherwise an IllegalStateException will be thrown.</p>
470 *
471 * @param sumsqImpl the StorelessUnivariateStatistic instance to use
472 * for computing the sum of squares
473 * @throws DimensionMismatchException if the array dimension
474 * does not match the one used at construction
475 * @throws MathIllegalStateException if data has already been added
476 * (i.e if n > 0)
477 */
478 public void setSumsqImpl(StorelessUnivariateStatistic[] sumsqImpl)
479 throws MathIllegalStateException, DimensionMismatchException {
480 setImpl(sumsqImpl, this.sumSqImpl);
481 }
482
483 /**
484 * Returns the currently configured minimum implementation.
485 *
486 * @return the StorelessUnivariateStatistic implementing the minimum
487 */
488 public StorelessUnivariateStatistic[] getMinImpl() {
489 return minImpl.clone();
490 }
491
492 /**
493 * <p>Sets the implementation for the minimum.</p>
494 * <p>This method must be activated before any data has been added - i.e.,
495 * before {@link #addValue(double[]) addValue} has been used to add data;
496 * otherwise an IllegalStateException will be thrown.</p>
497 *
498 * @param minImpl the StorelessUnivariateStatistic instance to use
499 * for computing the minimum
500 * @throws DimensionMismatchException if the array dimension
501 * does not match the one used at construction
502 * @throws MathIllegalStateException if data has already been added
503 * (i.e if n > 0)
504 */
505 public void setMinImpl(StorelessUnivariateStatistic[] minImpl)
506 throws MathIllegalStateException, DimensionMismatchException {
507 setImpl(minImpl, this.minImpl);
508 }
509
510 /**
511 * Returns the currently configured maximum implementation.
512 *
513 * @return the StorelessUnivariateStatistic implementing the maximum
514 */
515 public StorelessUnivariateStatistic[] getMaxImpl() {
516 return maxImpl.clone();
517 }
518
519 /**
520 * <p>Sets the implementation for the maximum.</p>
521 * <p>This method must be activated before any data has been added - i.e.,
522 * before {@link #addValue(double[]) addValue} has been used to add data;
523 * otherwise an IllegalStateException will be thrown.</p>
524 *
525 * @param maxImpl the StorelessUnivariateStatistic instance to use
526 * for computing the maximum
527 * @throws DimensionMismatchException if the array dimension
528 * does not match the one used at construction
529 * @throws MathIllegalStateException if data has already been added
530 * (i.e if n > 0)
531 */
532 public void setMaxImpl(StorelessUnivariateStatistic[] maxImpl)
533 throws MathIllegalStateException, DimensionMismatchException{
534 setImpl(maxImpl, this.maxImpl);
535 }
536
537 /**
538 * Returns the currently configured sum of logs implementation.
539 *
540 * @return the StorelessUnivariateStatistic implementing the log sum
541 */
542 public StorelessUnivariateStatistic[] getSumLogImpl() {
543 return sumLogImpl.clone();
544 }
545
546 /**
547 * <p>Sets the implementation for the sum of logs.</p>
548 * <p>This method must be activated before any data has been added - i.e.,
549 * before {@link #addValue(double[]) addValue} has been used to add data;
550 * otherwise an IllegalStateException will be thrown.</p>
551 *
552 * @param sumLogImpl the StorelessUnivariateStatistic instance to use
553 * for computing the log sum
554 * @throws DimensionMismatchException if the array dimension
555 * does not match the one used at construction
556 * @throws MathIllegalStateException if data has already been added
557 * (i.e if n > 0)
558 */
559 public void setSumLogImpl(StorelessUnivariateStatistic[] sumLogImpl)
560 throws MathIllegalStateException, DimensionMismatchException{
561 setImpl(sumLogImpl, this.sumLogImpl);
562 }
563
564 /**
565 * Returns the currently configured geometric mean implementation.
566 *
567 * @return the StorelessUnivariateStatistic implementing the geometric mean
568 */
569 public StorelessUnivariateStatistic[] getGeoMeanImpl() {
570 return geoMeanImpl.clone();
571 }
572
573 /**
574 * <p>Sets the implementation for the geometric mean.</p>
575 * <p>This method must be activated before any data has been added - i.e.,
576 * before {@link #addValue(double[]) addValue} has been used to add data;
577 * otherwise an IllegalStateException will be thrown.</p>
578 *
579 * @param geoMeanImpl the StorelessUnivariateStatistic instance to use
580 * for computing the geometric mean
581 * @throws DimensionMismatchException if the array dimension
582 * does not match the one used at construction
583 * @throws MathIllegalStateException if data has already been added
584 * (i.e if n > 0)
585 */
586 public void setGeoMeanImpl(StorelessUnivariateStatistic[] geoMeanImpl)
587 throws MathIllegalStateException, DimensionMismatchException {
588 setImpl(geoMeanImpl, this.geoMeanImpl);
589 }
590
591 /**
592 * Returns the currently configured mean implementation.
593 *
594 * @return the StorelessUnivariateStatistic implementing the mean
595 */
596 public StorelessUnivariateStatistic[] getMeanImpl() {
597 return meanImpl.clone();
598 }
599
600 /**
601 * <p>Sets the implementation for the mean.</p>
602 * <p>This method must be activated before any data has been added - i.e.,
603 * before {@link #addValue(double[]) addValue} has been used to add data;
604 * otherwise an IllegalStateException will be thrown.</p>
605 *
606 * @param meanImpl the StorelessUnivariateStatistic instance to use
607 * for computing the mean
608 * @throws DimensionMismatchException if the array dimension
609 * does not match the one used at construction
610 * @throws MathIllegalStateException if data has already been added
611 * (i.e if n > 0)
612 */
613 public void setMeanImpl(StorelessUnivariateStatistic[] meanImpl)
614 throws MathIllegalStateException, DimensionMismatchException{
615 setImpl(meanImpl, this.meanImpl);
616 }
617
618 /**
619 * Throws MathIllegalStateException if the statistic is not empty.
620 * @throws MathIllegalStateException if n > 0.
621 */
622 private void checkEmpty() throws MathIllegalStateException {
623 if (n > 0) {
624 throw new MathIllegalStateException(
625 LocalizedFormats.VALUES_ADDED_BEFORE_CONFIGURING_STATISTIC, n);
626 }
627 }
628
629 /**
630 * Throws DimensionMismatchException if dimension != k.
631 * @param dimension dimension to check
632 * @throws DimensionMismatchException if dimension != k
633 */
634 private void checkDimension(int dimension) throws DimensionMismatchException {
635 if (dimension != k) {
636 throw new DimensionMismatchException(dimension, k);
637 }
638 }
639 }