MultivariateSummaryStatistics.java
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- package org.apache.commons.math4.legacy.stat.descriptive;
- import java.util.Arrays;
- import org.apache.commons.math4.legacy.exception.DimensionMismatchException;
- import org.apache.commons.math4.legacy.exception.MathIllegalStateException;
- import org.apache.commons.math4.legacy.exception.util.LocalizedFormats;
- import org.apache.commons.math4.legacy.linear.RealMatrix;
- import org.apache.commons.math4.legacy.stat.descriptive.moment.GeometricMean;
- import org.apache.commons.math4.legacy.stat.descriptive.moment.Mean;
- import org.apache.commons.math4.legacy.stat.descriptive.moment.VectorialCovariance;
- import org.apache.commons.math4.legacy.stat.descriptive.rank.Max;
- import org.apache.commons.math4.legacy.stat.descriptive.rank.Min;
- import org.apache.commons.math4.legacy.stat.descriptive.summary.Sum;
- import org.apache.commons.math4.legacy.stat.descriptive.summary.SumOfLogs;
- import org.apache.commons.math4.legacy.stat.descriptive.summary.SumOfSquares;
- import org.apache.commons.math4.core.jdkmath.JdkMath;
- import org.apache.commons.math4.legacy.core.MathArrays;
- import org.apache.commons.numbers.core.Precision;
- /**
- * <p>Computes summary statistics for a stream of n-tuples added using the
- * {@link #addValue(double[]) addValue} method. The data values are not stored
- * in memory, so this class can be used to compute statistics for very large
- * n-tuple streams.</p>
- *
- * <p>The {@link StorelessUnivariateStatistic} instances used to maintain
- * summary state and compute statistics are configurable via setters.
- * For example, the default implementation for the mean can be overridden by
- * calling {@link #setMeanImpl(StorelessUnivariateStatistic[])}. Actual
- * parameters to these methods must implement the
- * {@link StorelessUnivariateStatistic} interface and configuration must be
- * completed before <code>addValue</code> is called. No configuration is
- * necessary to use the default, commons-math provided implementations.</p>
- *
- * <p>To compute statistics for a stream of n-tuples, construct a
- * MultivariateStatistics instance with dimension n and then use
- * {@link #addValue(double[])} to add n-tuples. The <code>getXxx</code>
- * methods where Xxx is a statistic return an array of <code>double</code>
- * values, where for <code>i = 0,...,n-1</code> the i<sup>th</sup> array element is the
- * value of the given statistic for data range consisting of the i<sup>th</sup> element of
- * each of the input n-tuples. For example, if <code>addValue</code> is called
- * with actual parameters {0, 1, 2}, then {3, 4, 5} and finally {6, 7, 8},
- * <code>getSum</code> will return a three-element array with values
- * {0+3+6, 1+4+7, 2+5+8}</p>
- *
- * <p>Note: This class is not thread-safe. Use
- * {@link SynchronizedMultivariateSummaryStatistics} if concurrent access from multiple
- * threads is required.</p>
- *
- * @since 1.2
- */
- public class MultivariateSummaryStatistics
- implements StatisticalMultivariateSummary {
- /** Dimension of the data. */
- private final int k;
- /** Count of values that have been added. */
- private long n;
- /** Sum statistic implementation - can be reset by setter. */
- private final StorelessUnivariateStatistic[] sumImpl;
- /** Sum of squares statistic implementation - can be reset by setter. */
- private final StorelessUnivariateStatistic[] sumSqImpl;
- /** Minimum statistic implementation - can be reset by setter. */
- private final StorelessUnivariateStatistic[] minImpl;
- /** Maximum statistic implementation - can be reset by setter. */
- private final StorelessUnivariateStatistic[] maxImpl;
- /** Sum of log statistic implementation - can be reset by setter. */
- private final StorelessUnivariateStatistic[] sumLogImpl;
- /** Geometric mean statistic implementation - can be reset by setter. */
- private final StorelessUnivariateStatistic[] geoMeanImpl;
- /** Mean statistic implementation - can be reset by setter. */
- private final StorelessUnivariateStatistic[] meanImpl;
- /** Covariance statistic implementation - cannot be reset. */
- private final VectorialCovariance covarianceImpl;
- /**
- * Construct a MultivariateSummaryStatistics instance.
- * @param k dimension of the data
- * @param isCovarianceBiasCorrected if true, the unbiased sample
- * covariance is computed, otherwise the biased population covariance
- * is computed
- */
- public MultivariateSummaryStatistics(int k, boolean isCovarianceBiasCorrected) {
- this.k = k;
- sumImpl = new StorelessUnivariateStatistic[k];
- sumSqImpl = new StorelessUnivariateStatistic[k];
- minImpl = new StorelessUnivariateStatistic[k];
- maxImpl = new StorelessUnivariateStatistic[k];
- sumLogImpl = new StorelessUnivariateStatistic[k];
- geoMeanImpl = new StorelessUnivariateStatistic[k];
- meanImpl = new StorelessUnivariateStatistic[k];
- for (int i = 0; i < k; ++i) {
- sumImpl[i] = new Sum();
- sumSqImpl[i] = new SumOfSquares();
- minImpl[i] = new Min();
- maxImpl[i] = new Max();
- sumLogImpl[i] = new SumOfLogs();
- geoMeanImpl[i] = new GeometricMean();
- meanImpl[i] = new Mean();
- }
- covarianceImpl =
- new VectorialCovariance(k, isCovarianceBiasCorrected);
- }
- /**
- * Add an n-tuple to the data.
- *
- * @param value the n-tuple to add
- * @throws DimensionMismatchException if the length of the array
- * does not match the one used at construction
- */
- public void addValue(double[] value) throws DimensionMismatchException {
- checkDimension(value.length);
- for (int i = 0; i < k; ++i) {
- double v = value[i];
- sumImpl[i].increment(v);
- sumSqImpl[i].increment(v);
- minImpl[i].increment(v);
- maxImpl[i].increment(v);
- sumLogImpl[i].increment(v);
- geoMeanImpl[i].increment(v);
- meanImpl[i].increment(v);
- }
- covarianceImpl.increment(value);
- n++;
- }
- /**
- * Returns the dimension of the data.
- * @return The dimension of the data
- */
- @Override
- public int getDimension() {
- return k;
- }
- /**
- * Returns the number of available values.
- * @return The number of available values
- */
- @Override
- public long getN() {
- return n;
- }
- /**
- * Returns an array of the results of a statistic.
- * @param stats univariate statistic array
- * @return results array
- */
- private double[] getResults(StorelessUnivariateStatistic[] stats) {
- double[] results = new double[stats.length];
- for (int i = 0; i < results.length; ++i) {
- results[i] = stats[i].getResult();
- }
- return results;
- }
- /**
- * Returns an array whose i<sup>th</sup> entry is the sum of the.
- * i<sup>th</sup> entries of the arrays that have been added using
- * {@link #addValue(double[])}
- *
- * @return the array of component sums
- */
- @Override
- public double[] getSum() {
- return getResults(sumImpl);
- }
- /**
- * Returns an array whose i<sup>th</sup> entry is the sum of squares of the.
- * i<sup>th</sup> entries of the arrays that have been added using
- * {@link #addValue(double[])}
- *
- * @return the array of component sums of squares
- */
- @Override
- public double[] getSumSq() {
- return getResults(sumSqImpl);
- }
- /**
- * Returns an array whose i<sup>th</sup> entry is the sum of logs of the.
- * i<sup>th</sup> entries of the arrays that have been added using
- * {@link #addValue(double[])}
- *
- * @return the array of component log sums
- */
- @Override
- public double[] getSumLog() {
- return getResults(sumLogImpl);
- }
- /**
- * Returns an array whose i<sup>th</sup> entry is the mean of the.
- * i<sup>th</sup> entries of the arrays that have been added using
- * {@link #addValue(double[])}
- *
- * @return the array of component means
- */
- @Override
- public double[] getMean() {
- return getResults(meanImpl);
- }
- /**
- * Returns an array whose i<sup>th</sup> entry is the standard deviation of the.
- * i<sup>th</sup> entries of the arrays that have been added using
- * {@link #addValue(double[])}
- *
- * @return the array of component standard deviations
- */
- @Override
- public double[] getStandardDeviation() {
- double[] stdDev = new double[k];
- if (getN() < 1) {
- Arrays.fill(stdDev, Double.NaN);
- } else if (getN() < 2) {
- Arrays.fill(stdDev, 0.0);
- } else {
- RealMatrix matrix = covarianceImpl.getResult();
- for (int i = 0; i < k; ++i) {
- stdDev[i] = JdkMath.sqrt(matrix.getEntry(i, i));
- }
- }
- return stdDev;
- }
- /**
- * Returns the covariance matrix of the values that have been added.
- *
- * @return the covariance matrix
- */
- @Override
- public RealMatrix getCovariance() {
- return covarianceImpl.getResult();
- }
- /**
- * Returns an array whose i<sup>th</sup> entry is the maximum of the.
- * i<sup>th</sup> entries of the arrays that have been added using
- * {@link #addValue(double[])}
- *
- * @return the array of component maxima
- */
- @Override
- public double[] getMax() {
- return getResults(maxImpl);
- }
- /**
- * Returns an array whose i<sup>th</sup> entry is the minimum of the.
- * i<sup>th</sup> entries of the arrays that have been added using
- * {@link #addValue(double[])}
- *
- * @return the array of component minima
- */
- @Override
- public double[] getMin() {
- return getResults(minImpl);
- }
- /**
- * Returns an array whose i<sup>th</sup> entry is the geometric mean of the.
- * i<sup>th</sup> entries of the arrays that have been added using
- * {@link #addValue(double[])}
- *
- * @return the array of component geometric means
- */
- @Override
- public double[] getGeometricMean() {
- return getResults(geoMeanImpl);
- }
- /**
- * Generates a text report displaying
- * summary statistics from values that
- * have been added.
- * @return String with line feeds displaying statistics
- */
- @Override
- public String toString() {
- final String separator = ", ";
- final String suffix = System.getProperty("line.separator");
- StringBuilder outBuffer = new StringBuilder();
- outBuffer.append("MultivariateSummaryStatistics:").append(suffix);
- outBuffer.append("n: ").append(getN()).append(suffix);
- append(outBuffer, getMin(), "min: ", separator, suffix);
- append(outBuffer, getMax(), "max: ", separator, suffix);
- append(outBuffer, getMean(), "mean: ", separator, suffix);
- append(outBuffer, getGeometricMean(), "geometric mean: ", separator, suffix);
- append(outBuffer, getSumSq(), "sum of squares: ", separator, suffix);
- append(outBuffer, getSumLog(), "sum of logarithms: ", separator, suffix);
- append(outBuffer, getStandardDeviation(), "standard deviation: ", separator, suffix);
- outBuffer.append("covariance: ").append(getCovariance()).append(suffix);
- return outBuffer.toString();
- }
- /**
- * Append a text representation of an array to a buffer.
- * @param buffer buffer to fill
- * @param data data array
- * @param prefix text prefix
- * @param separator elements separator
- * @param suffix text suffix
- */
- private void append(StringBuilder buffer, double[] data,
- String prefix, String separator, String suffix) {
- buffer.append(prefix);
- for (int i = 0; i < data.length; ++i) {
- if (i > 0) {
- buffer.append(separator);
- }
- buffer.append(data[i]);
- }
- buffer.append(suffix);
- }
- /**
- * Resets all statistics and storage.
- */
- public void clear() {
- this.n = 0;
- for (int i = 0; i < k; ++i) {
- minImpl[i].clear();
- maxImpl[i].clear();
- sumImpl[i].clear();
- sumLogImpl[i].clear();
- sumSqImpl[i].clear();
- geoMeanImpl[i].clear();
- meanImpl[i].clear();
- }
- covarianceImpl.clear();
- }
- /**
- * Returns true iff <code>object</code> is a <code>MultivariateSummaryStatistics</code>
- * instance and all statistics have the same values as this.
- * @param object the object to test equality against.
- * @return true if object equals this
- */
- @Override
- public boolean equals(Object object) {
- if (object == this ) {
- return true;
- }
- if (!(object instanceof MultivariateSummaryStatistics)) {
- return false;
- }
- MultivariateSummaryStatistics stat = (MultivariateSummaryStatistics) object;
- return MathArrays.equalsIncludingNaN(stat.getGeometricMean(), getGeometricMean()) &&
- MathArrays.equalsIncludingNaN(stat.getMax(), getMax()) &&
- MathArrays.equalsIncludingNaN(stat.getMean(), getMean()) &&
- MathArrays.equalsIncludingNaN(stat.getMin(), getMin()) &&
- Precision.equalsIncludingNaN(stat.getN(), getN()) &&
- MathArrays.equalsIncludingNaN(stat.getSum(), getSum()) &&
- MathArrays.equalsIncludingNaN(stat.getSumSq(), getSumSq()) &&
- MathArrays.equalsIncludingNaN(stat.getSumLog(), getSumLog()) &&
- stat.getCovariance().equals( getCovariance());
- }
- /**
- * Returns hash code based on values of statistics.
- *
- * @return hash code
- */
- @Override
- public int hashCode() {
- int result = 31 + Arrays.hashCode(getGeometricMean());
- result = result * 31 + Arrays.hashCode(getGeometricMean());
- result = result * 31 + Arrays.hashCode(getMax());
- result = result * 31 + Arrays.hashCode(getMean());
- result = result * 31 + Arrays.hashCode(getMin());
- result = result * 31 + Double.hashCode(getN());
- result = result * 31 + Arrays.hashCode(getSum());
- result = result * 31 + Arrays.hashCode(getSumSq());
- result = result * 31 + Arrays.hashCode(getSumLog());
- result = result * 31 + getCovariance().hashCode();
- return result;
- }
- // Getters and setters for statistics implementations
- /**
- * Sets statistics implementations.
- * @param newImpl new implementations for statistics
- * @param oldImpl old implementations for statistics
- * @throws DimensionMismatchException if the array dimension
- * does not match the one used at construction
- * @throws MathIllegalStateException if data has already been added
- * (i.e. if n > 0)
- */
- private void setImpl(StorelessUnivariateStatistic[] newImpl,
- StorelessUnivariateStatistic[] oldImpl) throws MathIllegalStateException,
- DimensionMismatchException {
- checkEmpty();
- checkDimension(newImpl.length);
- System.arraycopy(newImpl, 0, oldImpl, 0, newImpl.length);
- }
- /**
- * Returns the currently configured Sum implementation.
- *
- * @return the StorelessUnivariateStatistic implementing the sum
- */
- public StorelessUnivariateStatistic[] getSumImpl() {
- return sumImpl.clone();
- }
- /**
- * <p>Sets the implementation for the Sum.</p>
- * <p>This method must be activated before any data has been added - i.e.,
- * before {@link #addValue(double[]) addValue} has been used to add data;
- * otherwise an IllegalStateException will be thrown.</p>
- *
- * @param sumImpl the StorelessUnivariateStatistic instance to use
- * for computing the Sum
- * @throws DimensionMismatchException if the array dimension
- * does not match the one used at construction
- * @throws MathIllegalStateException if data has already been added
- * (i.e if n > 0)
- */
- public void setSumImpl(StorelessUnivariateStatistic[] sumImpl)
- throws MathIllegalStateException, DimensionMismatchException {
- setImpl(sumImpl, this.sumImpl);
- }
- /**
- * Returns the currently configured sum of squares implementation.
- *
- * @return the StorelessUnivariateStatistic implementing the sum of squares
- */
- public StorelessUnivariateStatistic[] getSumsqImpl() {
- return sumSqImpl.clone();
- }
- /**
- * <p>Sets the implementation for the sum of squares.</p>
- * <p>This method must be activated before any data has been added - i.e.,
- * before {@link #addValue(double[]) addValue} has been used to add data;
- * otherwise an IllegalStateException will be thrown.</p>
- *
- * @param sumsqImpl the StorelessUnivariateStatistic instance to use
- * for computing the sum of squares
- * @throws DimensionMismatchException if the array dimension
- * does not match the one used at construction
- * @throws MathIllegalStateException if data has already been added
- * (i.e if n > 0)
- */
- public void setSumsqImpl(StorelessUnivariateStatistic[] sumsqImpl)
- throws MathIllegalStateException, DimensionMismatchException {
- setImpl(sumsqImpl, this.sumSqImpl);
- }
- /**
- * Returns the currently configured minimum implementation.
- *
- * @return the StorelessUnivariateStatistic implementing the minimum
- */
- public StorelessUnivariateStatistic[] getMinImpl() {
- return minImpl.clone();
- }
- /**
- * <p>Sets the implementation for the minimum.</p>
- * <p>This method must be activated before any data has been added - i.e.,
- * before {@link #addValue(double[]) addValue} has been used to add data;
- * otherwise an IllegalStateException will be thrown.</p>
- *
- * @param minImpl the StorelessUnivariateStatistic instance to use
- * for computing the minimum
- * @throws DimensionMismatchException if the array dimension
- * does not match the one used at construction
- * @throws MathIllegalStateException if data has already been added
- * (i.e if n > 0)
- */
- public void setMinImpl(StorelessUnivariateStatistic[] minImpl)
- throws MathIllegalStateException, DimensionMismatchException {
- setImpl(minImpl, this.minImpl);
- }
- /**
- * Returns the currently configured maximum implementation.
- *
- * @return the StorelessUnivariateStatistic implementing the maximum
- */
- public StorelessUnivariateStatistic[] getMaxImpl() {
- return maxImpl.clone();
- }
- /**
- * <p>Sets the implementation for the maximum.</p>
- * <p>This method must be activated before any data has been added - i.e.,
- * before {@link #addValue(double[]) addValue} has been used to add data;
- * otherwise an IllegalStateException will be thrown.</p>
- *
- * @param maxImpl the StorelessUnivariateStatistic instance to use
- * for computing the maximum
- * @throws DimensionMismatchException if the array dimension
- * does not match the one used at construction
- * @throws MathIllegalStateException if data has already been added
- * (i.e if n > 0)
- */
- public void setMaxImpl(StorelessUnivariateStatistic[] maxImpl)
- throws MathIllegalStateException, DimensionMismatchException{
- setImpl(maxImpl, this.maxImpl);
- }
- /**
- * Returns the currently configured sum of logs implementation.
- *
- * @return the StorelessUnivariateStatistic implementing the log sum
- */
- public StorelessUnivariateStatistic[] getSumLogImpl() {
- return sumLogImpl.clone();
- }
- /**
- * <p>Sets the implementation for the sum of logs.</p>
- * <p>This method must be activated before any data has been added - i.e.,
- * before {@link #addValue(double[]) addValue} has been used to add data;
- * otherwise an IllegalStateException will be thrown.</p>
- *
- * @param sumLogImpl the StorelessUnivariateStatistic instance to use
- * for computing the log sum
- * @throws DimensionMismatchException if the array dimension
- * does not match the one used at construction
- * @throws MathIllegalStateException if data has already been added
- * (i.e if n > 0)
- */
- public void setSumLogImpl(StorelessUnivariateStatistic[] sumLogImpl)
- throws MathIllegalStateException, DimensionMismatchException{
- setImpl(sumLogImpl, this.sumLogImpl);
- }
- /**
- * Returns the currently configured geometric mean implementation.
- *
- * @return the StorelessUnivariateStatistic implementing the geometric mean
- */
- public StorelessUnivariateStatistic[] getGeoMeanImpl() {
- return geoMeanImpl.clone();
- }
- /**
- * <p>Sets the implementation for the geometric mean.</p>
- * <p>This method must be activated before any data has been added - i.e.,
- * before {@link #addValue(double[]) addValue} has been used to add data;
- * otherwise an IllegalStateException will be thrown.</p>
- *
- * @param geoMeanImpl the StorelessUnivariateStatistic instance to use
- * for computing the geometric mean
- * @throws DimensionMismatchException if the array dimension
- * does not match the one used at construction
- * @throws MathIllegalStateException if data has already been added
- * (i.e if n > 0)
- */
- public void setGeoMeanImpl(StorelessUnivariateStatistic[] geoMeanImpl)
- throws MathIllegalStateException, DimensionMismatchException {
- setImpl(geoMeanImpl, this.geoMeanImpl);
- }
- /**
- * Returns the currently configured mean implementation.
- *
- * @return the StorelessUnivariateStatistic implementing the mean
- */
- public StorelessUnivariateStatistic[] getMeanImpl() {
- return meanImpl.clone();
- }
- /**
- * <p>Sets the implementation for the mean.</p>
- * <p>This method must be activated before any data has been added - i.e.,
- * before {@link #addValue(double[]) addValue} has been used to add data;
- * otherwise an IllegalStateException will be thrown.</p>
- *
- * @param meanImpl the StorelessUnivariateStatistic instance to use
- * for computing the mean
- * @throws DimensionMismatchException if the array dimension
- * does not match the one used at construction
- * @throws MathIllegalStateException if data has already been added
- * (i.e if n > 0)
- */
- public void setMeanImpl(StorelessUnivariateStatistic[] meanImpl)
- throws MathIllegalStateException, DimensionMismatchException{
- setImpl(meanImpl, this.meanImpl);
- }
- /**
- * Throws MathIllegalStateException if the statistic is not empty.
- * @throws MathIllegalStateException if n > 0.
- */
- private void checkEmpty() throws MathIllegalStateException {
- if (n > 0) {
- throw new MathIllegalStateException(
- LocalizedFormats.VALUES_ADDED_BEFORE_CONFIGURING_STATISTIC, n);
- }
- }
- /**
- * Throws DimensionMismatchException if dimension != k.
- * @param dimension dimension to check
- * @throws DimensionMismatchException if dimension != k
- */
- private void checkDimension(int dimension) throws DimensionMismatchException {
- if (dimension != k) {
- throw new DimensionMismatchException(dimension, k);
- }
- }
- }