001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.math4.legacy.stat.descriptive; 018 019import java.util.Arrays; 020 021import org.apache.commons.math4.legacy.exception.DimensionMismatchException; 022import org.apache.commons.math4.legacy.exception.MathIllegalStateException; 023import org.apache.commons.math4.legacy.exception.util.LocalizedFormats; 024import org.apache.commons.math4.legacy.linear.RealMatrix; 025import org.apache.commons.math4.legacy.stat.descriptive.moment.GeometricMean; 026import org.apache.commons.math4.legacy.stat.descriptive.moment.Mean; 027import org.apache.commons.math4.legacy.stat.descriptive.moment.VectorialCovariance; 028import org.apache.commons.math4.legacy.stat.descriptive.rank.Max; 029import org.apache.commons.math4.legacy.stat.descriptive.rank.Min; 030import org.apache.commons.math4.legacy.stat.descriptive.summary.Sum; 031import org.apache.commons.math4.legacy.stat.descriptive.summary.SumOfLogs; 032import org.apache.commons.math4.legacy.stat.descriptive.summary.SumOfSquares; 033import org.apache.commons.math4.core.jdkmath.JdkMath; 034import org.apache.commons.math4.legacy.core.MathArrays; 035import org.apache.commons.numbers.core.Precision; 036 037/** 038 * <p>Computes summary statistics for a stream of n-tuples added using the 039 * {@link #addValue(double[]) addValue} method. The data values are not stored 040 * in memory, so this class can be used to compute statistics for very large 041 * n-tuple streams.</p> 042 * 043 * <p>The {@link StorelessUnivariateStatistic} instances used to maintain 044 * summary state and compute statistics are configurable via setters. 045 * For example, the default implementation for the mean can be overridden by 046 * calling {@link #setMeanImpl(StorelessUnivariateStatistic[])}. Actual 047 * parameters to these methods must implement the 048 * {@link StorelessUnivariateStatistic} interface and configuration must be 049 * completed before <code>addValue</code> is called. No configuration is 050 * necessary to use the default, commons-math provided implementations.</p> 051 * 052 * <p>To compute statistics for a stream of n-tuples, construct a 053 * MultivariateStatistics instance with dimension n and then use 054 * {@link #addValue(double[])} to add n-tuples. The <code>getXxx</code> 055 * methods where Xxx is a statistic return an array of <code>double</code> 056 * values, where for <code>i = 0,...,n-1</code> the i<sup>th</sup> array element is the 057 * value of the given statistic for data range consisting of the i<sup>th</sup> element of 058 * each of the input n-tuples. For example, if <code>addValue</code> is called 059 * with actual parameters {0, 1, 2}, then {3, 4, 5} and finally {6, 7, 8}, 060 * <code>getSum</code> will return a three-element array with values 061 * {0+3+6, 1+4+7, 2+5+8}</p> 062 * 063 * <p>Note: This class is not thread-safe. Use 064 * {@link SynchronizedMultivariateSummaryStatistics} if concurrent access from multiple 065 * threads is required.</p> 066 * 067 * @since 1.2 068 */ 069public class MultivariateSummaryStatistics 070 implements StatisticalMultivariateSummary { 071 /** Dimension of the data. */ 072 private final int k; 073 074 /** Count of values that have been added. */ 075 private long n; 076 077 /** Sum statistic implementation - can be reset by setter. */ 078 private final StorelessUnivariateStatistic[] sumImpl; 079 080 /** Sum of squares statistic implementation - can be reset by setter. */ 081 private final StorelessUnivariateStatistic[] sumSqImpl; 082 083 /** Minimum statistic implementation - can be reset by setter. */ 084 private final StorelessUnivariateStatistic[] minImpl; 085 086 /** Maximum statistic implementation - can be reset by setter. */ 087 private final StorelessUnivariateStatistic[] maxImpl; 088 089 /** Sum of log statistic implementation - can be reset by setter. */ 090 private final StorelessUnivariateStatistic[] sumLogImpl; 091 092 /** Geometric mean statistic implementation - can be reset by setter. */ 093 private final StorelessUnivariateStatistic[] geoMeanImpl; 094 095 /** Mean statistic implementation - can be reset by setter. */ 096 private final StorelessUnivariateStatistic[] meanImpl; 097 098 /** Covariance statistic implementation - cannot be reset. */ 099 private final VectorialCovariance covarianceImpl; 100 101 /** 102 * Construct a MultivariateSummaryStatistics instance. 103 * @param k dimension of the data 104 * @param isCovarianceBiasCorrected if true, the unbiased sample 105 * covariance is computed, otherwise the biased population covariance 106 * is computed 107 */ 108 public MultivariateSummaryStatistics(int k, boolean isCovarianceBiasCorrected) { 109 this.k = k; 110 111 sumImpl = new StorelessUnivariateStatistic[k]; 112 sumSqImpl = new StorelessUnivariateStatistic[k]; 113 minImpl = new StorelessUnivariateStatistic[k]; 114 maxImpl = new StorelessUnivariateStatistic[k]; 115 sumLogImpl = new StorelessUnivariateStatistic[k]; 116 geoMeanImpl = new StorelessUnivariateStatistic[k]; 117 meanImpl = new StorelessUnivariateStatistic[k]; 118 119 for (int i = 0; i < k; ++i) { 120 sumImpl[i] = new Sum(); 121 sumSqImpl[i] = new SumOfSquares(); 122 minImpl[i] = new Min(); 123 maxImpl[i] = new Max(); 124 sumLogImpl[i] = new SumOfLogs(); 125 geoMeanImpl[i] = new GeometricMean(); 126 meanImpl[i] = new Mean(); 127 } 128 129 covarianceImpl = 130 new VectorialCovariance(k, isCovarianceBiasCorrected); 131 } 132 133 /** 134 * Add an n-tuple to the data. 135 * 136 * @param value the n-tuple to add 137 * @throws DimensionMismatchException if the length of the array 138 * does not match the one used at construction 139 */ 140 public void addValue(double[] value) throws DimensionMismatchException { 141 checkDimension(value.length); 142 for (int i = 0; i < k; ++i) { 143 double v = value[i]; 144 sumImpl[i].increment(v); 145 sumSqImpl[i].increment(v); 146 minImpl[i].increment(v); 147 maxImpl[i].increment(v); 148 sumLogImpl[i].increment(v); 149 geoMeanImpl[i].increment(v); 150 meanImpl[i].increment(v); 151 } 152 covarianceImpl.increment(value); 153 n++; 154 } 155 156 /** 157 * Returns the dimension of the data. 158 * @return The dimension of the data 159 */ 160 @Override 161 public int getDimension() { 162 return k; 163 } 164 165 /** 166 * Returns the number of available values. 167 * @return The number of available values 168 */ 169 @Override 170 public long getN() { 171 return n; 172 } 173 174 /** 175 * Returns an array of the results of a statistic. 176 * @param stats univariate statistic array 177 * @return results array 178 */ 179 private double[] getResults(StorelessUnivariateStatistic[] stats) { 180 double[] results = new double[stats.length]; 181 for (int i = 0; i < results.length; ++i) { 182 results[i] = stats[i].getResult(); 183 } 184 return results; 185 } 186 187 /** 188 * Returns an array whose i<sup>th</sup> entry is the sum of the. 189 * i<sup>th</sup> entries of the arrays that have been added using 190 * {@link #addValue(double[])} 191 * 192 * @return the array of component sums 193 */ 194 @Override 195 public double[] getSum() { 196 return getResults(sumImpl); 197 } 198 199 /** 200 * Returns an array whose i<sup>th</sup> entry is the sum of squares of the. 201 * i<sup>th</sup> entries of the arrays that have been added using 202 * {@link #addValue(double[])} 203 * 204 * @return the array of component sums of squares 205 */ 206 @Override 207 public double[] getSumSq() { 208 return getResults(sumSqImpl); 209 } 210 211 /** 212 * Returns an array whose i<sup>th</sup> entry is the sum of logs of the. 213 * i<sup>th</sup> entries of the arrays that have been added using 214 * {@link #addValue(double[])} 215 * 216 * @return the array of component log sums 217 */ 218 @Override 219 public double[] getSumLog() { 220 return getResults(sumLogImpl); 221 } 222 223 /** 224 * Returns an array whose i<sup>th</sup> entry is the mean of the. 225 * i<sup>th</sup> entries of the arrays that have been added using 226 * {@link #addValue(double[])} 227 * 228 * @return the array of component means 229 */ 230 @Override 231 public double[] getMean() { 232 return getResults(meanImpl); 233 } 234 235 /** 236 * Returns an array whose i<sup>th</sup> entry is the standard deviation of the. 237 * i<sup>th</sup> entries of the arrays that have been added using 238 * {@link #addValue(double[])} 239 * 240 * @return the array of component standard deviations 241 */ 242 @Override 243 public double[] getStandardDeviation() { 244 double[] stdDev = new double[k]; 245 if (getN() < 1) { 246 Arrays.fill(stdDev, Double.NaN); 247 } else if (getN() < 2) { 248 Arrays.fill(stdDev, 0.0); 249 } else { 250 RealMatrix matrix = covarianceImpl.getResult(); 251 for (int i = 0; i < k; ++i) { 252 stdDev[i] = JdkMath.sqrt(matrix.getEntry(i, i)); 253 } 254 } 255 return stdDev; 256 } 257 258 /** 259 * Returns the covariance matrix of the values that have been added. 260 * 261 * @return the covariance matrix 262 */ 263 @Override 264 public RealMatrix getCovariance() { 265 return covarianceImpl.getResult(); 266 } 267 268 /** 269 * Returns an array whose i<sup>th</sup> entry is the maximum of the. 270 * i<sup>th</sup> entries of the arrays that have been added using 271 * {@link #addValue(double[])} 272 * 273 * @return the array of component maxima 274 */ 275 @Override 276 public double[] getMax() { 277 return getResults(maxImpl); 278 } 279 280 /** 281 * Returns an array whose i<sup>th</sup> entry is the minimum of the. 282 * i<sup>th</sup> entries of the arrays that have been added using 283 * {@link #addValue(double[])} 284 * 285 * @return the array of component minima 286 */ 287 @Override 288 public double[] getMin() { 289 return getResults(minImpl); 290 } 291 292 /** 293 * Returns an array whose i<sup>th</sup> entry is the geometric mean of the. 294 * i<sup>th</sup> entries of the arrays that have been added using 295 * {@link #addValue(double[])} 296 * 297 * @return the array of component geometric means 298 */ 299 @Override 300 public double[] getGeometricMean() { 301 return getResults(geoMeanImpl); 302 } 303 304 /** 305 * Generates a text report displaying 306 * summary statistics from values that 307 * have been added. 308 * @return String with line feeds displaying statistics 309 */ 310 @Override 311 public String toString() { 312 final String separator = ", "; 313 final String suffix = System.getProperty("line.separator"); 314 StringBuilder outBuffer = new StringBuilder(); 315 outBuffer.append("MultivariateSummaryStatistics:").append(suffix); 316 outBuffer.append("n: ").append(getN()).append(suffix); 317 append(outBuffer, getMin(), "min: ", separator, suffix); 318 append(outBuffer, getMax(), "max: ", separator, suffix); 319 append(outBuffer, getMean(), "mean: ", separator, suffix); 320 append(outBuffer, getGeometricMean(), "geometric mean: ", separator, suffix); 321 append(outBuffer, getSumSq(), "sum of squares: ", separator, suffix); 322 append(outBuffer, getSumLog(), "sum of logarithms: ", separator, suffix); 323 append(outBuffer, getStandardDeviation(), "standard deviation: ", separator, suffix); 324 outBuffer.append("covariance: ").append(getCovariance()).append(suffix); 325 return outBuffer.toString(); 326 } 327 328 /** 329 * Append a text representation of an array to a buffer. 330 * @param buffer buffer to fill 331 * @param data data array 332 * @param prefix text prefix 333 * @param separator elements separator 334 * @param suffix text suffix 335 */ 336 private void append(StringBuilder buffer, double[] data, 337 String prefix, String separator, String suffix) { 338 buffer.append(prefix); 339 for (int i = 0; i < data.length; ++i) { 340 if (i > 0) { 341 buffer.append(separator); 342 } 343 buffer.append(data[i]); 344 } 345 buffer.append(suffix); 346 } 347 348 /** 349 * Resets all statistics and storage. 350 */ 351 public void clear() { 352 this.n = 0; 353 for (int i = 0; i < k; ++i) { 354 minImpl[i].clear(); 355 maxImpl[i].clear(); 356 sumImpl[i].clear(); 357 sumLogImpl[i].clear(); 358 sumSqImpl[i].clear(); 359 geoMeanImpl[i].clear(); 360 meanImpl[i].clear(); 361 } 362 covarianceImpl.clear(); 363 } 364 365 /** 366 * Returns true iff <code>object</code> is a <code>MultivariateSummaryStatistics</code> 367 * instance and all statistics have the same values as this. 368 * @param object the object to test equality against. 369 * @return true if object equals this 370 */ 371 @Override 372 public boolean equals(Object object) { 373 if (object == this ) { 374 return true; 375 } 376 if (!(object instanceof MultivariateSummaryStatistics)) { 377 return false; 378 } 379 MultivariateSummaryStatistics stat = (MultivariateSummaryStatistics) object; 380 return MathArrays.equalsIncludingNaN(stat.getGeometricMean(), getGeometricMean()) && 381 MathArrays.equalsIncludingNaN(stat.getMax(), getMax()) && 382 MathArrays.equalsIncludingNaN(stat.getMean(), getMean()) && 383 MathArrays.equalsIncludingNaN(stat.getMin(), getMin()) && 384 Precision.equalsIncludingNaN(stat.getN(), getN()) && 385 MathArrays.equalsIncludingNaN(stat.getSum(), getSum()) && 386 MathArrays.equalsIncludingNaN(stat.getSumSq(), getSumSq()) && 387 MathArrays.equalsIncludingNaN(stat.getSumLog(), getSumLog()) && 388 stat.getCovariance().equals( getCovariance()); 389 } 390 391 /** 392 * Returns hash code based on values of statistics. 393 * 394 * @return hash code 395 */ 396 @Override 397 public int hashCode() { 398 int result = 31 + Arrays.hashCode(getGeometricMean()); 399 result = result * 31 + Arrays.hashCode(getGeometricMean()); 400 result = result * 31 + Arrays.hashCode(getMax()); 401 result = result * 31 + Arrays.hashCode(getMean()); 402 result = result * 31 + Arrays.hashCode(getMin()); 403 result = result * 31 + Double.hashCode(getN()); 404 result = result * 31 + Arrays.hashCode(getSum()); 405 result = result * 31 + Arrays.hashCode(getSumSq()); 406 result = result * 31 + Arrays.hashCode(getSumLog()); 407 result = result * 31 + getCovariance().hashCode(); 408 return result; 409 } 410 411 // Getters and setters for statistics implementations 412 /** 413 * Sets statistics implementations. 414 * @param newImpl new implementations for statistics 415 * @param oldImpl old implementations for statistics 416 * @throws DimensionMismatchException if the array dimension 417 * does not match the one used at construction 418 * @throws MathIllegalStateException if data has already been added 419 * (i.e. if n > 0) 420 */ 421 private void setImpl(StorelessUnivariateStatistic[] newImpl, 422 StorelessUnivariateStatistic[] oldImpl) throws MathIllegalStateException, 423 DimensionMismatchException { 424 checkEmpty(); 425 checkDimension(newImpl.length); 426 System.arraycopy(newImpl, 0, oldImpl, 0, newImpl.length); 427 } 428 429 /** 430 * Returns the currently configured Sum implementation. 431 * 432 * @return the StorelessUnivariateStatistic implementing the sum 433 */ 434 public StorelessUnivariateStatistic[] getSumImpl() { 435 return sumImpl.clone(); 436 } 437 438 /** 439 * <p>Sets the implementation for the Sum.</p> 440 * <p>This method must be activated before any data has been added - i.e., 441 * before {@link #addValue(double[]) addValue} has been used to add data; 442 * otherwise an IllegalStateException will be thrown.</p> 443 * 444 * @param sumImpl the StorelessUnivariateStatistic instance to use 445 * for computing the Sum 446 * @throws DimensionMismatchException if the array dimension 447 * does not match the one used at construction 448 * @throws MathIllegalStateException if data has already been added 449 * (i.e if n > 0) 450 */ 451 public void setSumImpl(StorelessUnivariateStatistic[] sumImpl) 452 throws MathIllegalStateException, DimensionMismatchException { 453 setImpl(sumImpl, this.sumImpl); 454 } 455 456 /** 457 * Returns the currently configured sum of squares implementation. 458 * 459 * @return the StorelessUnivariateStatistic implementing the sum of squares 460 */ 461 public StorelessUnivariateStatistic[] getSumsqImpl() { 462 return sumSqImpl.clone(); 463 } 464 465 /** 466 * <p>Sets the implementation for the sum of squares.</p> 467 * <p>This method must be activated before any data has been added - i.e., 468 * before {@link #addValue(double[]) addValue} has been used to add data; 469 * otherwise an IllegalStateException will be thrown.</p> 470 * 471 * @param sumsqImpl the StorelessUnivariateStatistic instance to use 472 * for computing the sum of squares 473 * @throws DimensionMismatchException if the array dimension 474 * does not match the one used at construction 475 * @throws MathIllegalStateException if data has already been added 476 * (i.e if n > 0) 477 */ 478 public void setSumsqImpl(StorelessUnivariateStatistic[] sumsqImpl) 479 throws MathIllegalStateException, DimensionMismatchException { 480 setImpl(sumsqImpl, this.sumSqImpl); 481 } 482 483 /** 484 * Returns the currently configured minimum implementation. 485 * 486 * @return the StorelessUnivariateStatistic implementing the minimum 487 */ 488 public StorelessUnivariateStatistic[] getMinImpl() { 489 return minImpl.clone(); 490 } 491 492 /** 493 * <p>Sets the implementation for the minimum.</p> 494 * <p>This method must be activated before any data has been added - i.e., 495 * before {@link #addValue(double[]) addValue} has been used to add data; 496 * otherwise an IllegalStateException will be thrown.</p> 497 * 498 * @param minImpl the StorelessUnivariateStatistic instance to use 499 * for computing the minimum 500 * @throws DimensionMismatchException if the array dimension 501 * does not match the one used at construction 502 * @throws MathIllegalStateException if data has already been added 503 * (i.e if n > 0) 504 */ 505 public void setMinImpl(StorelessUnivariateStatistic[] minImpl) 506 throws MathIllegalStateException, DimensionMismatchException { 507 setImpl(minImpl, this.minImpl); 508 } 509 510 /** 511 * Returns the currently configured maximum implementation. 512 * 513 * @return the StorelessUnivariateStatistic implementing the maximum 514 */ 515 public StorelessUnivariateStatistic[] getMaxImpl() { 516 return maxImpl.clone(); 517 } 518 519 /** 520 * <p>Sets the implementation for the maximum.</p> 521 * <p>This method must be activated before any data has been added - i.e., 522 * before {@link #addValue(double[]) addValue} has been used to add data; 523 * otherwise an IllegalStateException will be thrown.</p> 524 * 525 * @param maxImpl the StorelessUnivariateStatistic instance to use 526 * for computing the maximum 527 * @throws DimensionMismatchException if the array dimension 528 * does not match the one used at construction 529 * @throws MathIllegalStateException if data has already been added 530 * (i.e if n > 0) 531 */ 532 public void setMaxImpl(StorelessUnivariateStatistic[] maxImpl) 533 throws MathIllegalStateException, DimensionMismatchException{ 534 setImpl(maxImpl, this.maxImpl); 535 } 536 537 /** 538 * Returns the currently configured sum of logs implementation. 539 * 540 * @return the StorelessUnivariateStatistic implementing the log sum 541 */ 542 public StorelessUnivariateStatistic[] getSumLogImpl() { 543 return sumLogImpl.clone(); 544 } 545 546 /** 547 * <p>Sets the implementation for the sum of logs.</p> 548 * <p>This method must be activated before any data has been added - i.e., 549 * before {@link #addValue(double[]) addValue} has been used to add data; 550 * otherwise an IllegalStateException will be thrown.</p> 551 * 552 * @param sumLogImpl the StorelessUnivariateStatistic instance to use 553 * for computing the log sum 554 * @throws DimensionMismatchException if the array dimension 555 * does not match the one used at construction 556 * @throws MathIllegalStateException if data has already been added 557 * (i.e if n > 0) 558 */ 559 public void setSumLogImpl(StorelessUnivariateStatistic[] sumLogImpl) 560 throws MathIllegalStateException, DimensionMismatchException{ 561 setImpl(sumLogImpl, this.sumLogImpl); 562 } 563 564 /** 565 * Returns the currently configured geometric mean implementation. 566 * 567 * @return the StorelessUnivariateStatistic implementing the geometric mean 568 */ 569 public StorelessUnivariateStatistic[] getGeoMeanImpl() { 570 return geoMeanImpl.clone(); 571 } 572 573 /** 574 * <p>Sets the implementation for the geometric mean.</p> 575 * <p>This method must be activated before any data has been added - i.e., 576 * before {@link #addValue(double[]) addValue} has been used to add data; 577 * otherwise an IllegalStateException will be thrown.</p> 578 * 579 * @param geoMeanImpl the StorelessUnivariateStatistic instance to use 580 * for computing the geometric mean 581 * @throws DimensionMismatchException if the array dimension 582 * does not match the one used at construction 583 * @throws MathIllegalStateException if data has already been added 584 * (i.e if n > 0) 585 */ 586 public void setGeoMeanImpl(StorelessUnivariateStatistic[] geoMeanImpl) 587 throws MathIllegalStateException, DimensionMismatchException { 588 setImpl(geoMeanImpl, this.geoMeanImpl); 589 } 590 591 /** 592 * Returns the currently configured mean implementation. 593 * 594 * @return the StorelessUnivariateStatistic implementing the mean 595 */ 596 public StorelessUnivariateStatistic[] getMeanImpl() { 597 return meanImpl.clone(); 598 } 599 600 /** 601 * <p>Sets the implementation for the mean.</p> 602 * <p>This method must be activated before any data has been added - i.e., 603 * before {@link #addValue(double[]) addValue} has been used to add data; 604 * otherwise an IllegalStateException will be thrown.</p> 605 * 606 * @param meanImpl the StorelessUnivariateStatistic instance to use 607 * for computing the mean 608 * @throws DimensionMismatchException if the array dimension 609 * does not match the one used at construction 610 * @throws MathIllegalStateException if data has already been added 611 * (i.e if n > 0) 612 */ 613 public void setMeanImpl(StorelessUnivariateStatistic[] meanImpl) 614 throws MathIllegalStateException, DimensionMismatchException{ 615 setImpl(meanImpl, this.meanImpl); 616 } 617 618 /** 619 * Throws MathIllegalStateException if the statistic is not empty. 620 * @throws MathIllegalStateException if n > 0. 621 */ 622 private void checkEmpty() throws MathIllegalStateException { 623 if (n > 0) { 624 throw new MathIllegalStateException( 625 LocalizedFormats.VALUES_ADDED_BEFORE_CONFIGURING_STATISTIC, n); 626 } 627 } 628 629 /** 630 * Throws DimensionMismatchException if dimension != k. 631 * @param dimension dimension to check 632 * @throws DimensionMismatchException if dimension != k 633 */ 634 private void checkDimension(int dimension) throws DimensionMismatchException { 635 if (dimension != k) { 636 throw new DimensionMismatchException(dimension, k); 637 } 638 } 639}