SummaryStatistics.java
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- package org.apache.commons.math4.legacy.stat.descriptive;
- import org.apache.commons.math4.legacy.exception.MathIllegalStateException;
- import org.apache.commons.math4.legacy.exception.NullArgumentException;
- import org.apache.commons.math4.legacy.exception.util.LocalizedFormats;
- import org.apache.commons.math4.legacy.stat.descriptive.moment.GeometricMean;
- import org.apache.commons.math4.legacy.stat.descriptive.moment.Mean;
- import org.apache.commons.math4.legacy.stat.descriptive.moment.SecondMoment;
- import org.apache.commons.math4.legacy.stat.descriptive.moment.Variance;
- import org.apache.commons.math4.legacy.stat.descriptive.rank.Max;
- import org.apache.commons.math4.legacy.stat.descriptive.rank.Min;
- import org.apache.commons.math4.legacy.stat.descriptive.summary.Sum;
- import org.apache.commons.math4.legacy.stat.descriptive.summary.SumOfLogs;
- import org.apache.commons.math4.legacy.stat.descriptive.summary.SumOfSquares;
- import org.apache.commons.math4.core.jdkmath.JdkMath;
- import org.apache.commons.numbers.core.Precision;
- /**
- * <p>
- * Computes summary statistics for a stream of data values added using the
- * {@link #addValue(double) addValue} method. The data values are not stored in
- * memory, so this class can be used to compute statistics for very large data
- * streams.
- * </p>
- * <p>
- * The {@link StorelessUnivariateStatistic} instances used to maintain summary
- * state and compute statistics are configurable via setters. For example, the
- * default implementation for the variance can be overridden by calling
- * {@link #setVarianceImpl(StorelessUnivariateStatistic)}. Actual parameters to
- * these methods must implement the {@link StorelessUnivariateStatistic}
- * interface and configuration must be completed before <code>addValue</code>
- * is called. No configuration is necessary to use the default, commons-math
- * provided implementations.
- * </p>
- * <p>
- * Note: This class is not thread-safe. Use
- * {@link SynchronizedSummaryStatistics} if concurrent access from multiple
- * threads is required.
- * </p>
- */
- public class SummaryStatistics implements StatisticalSummary {
- /** count of values that have been added. */
- private long n;
- /** SecondMoment is used to compute the mean and variance. */
- private SecondMoment secondMoment = new SecondMoment();
- /** sum of values that have been added. */
- private Sum sum = new Sum();
- /** sum of the square of each value that has been added. */
- private SumOfSquares sumsq = new SumOfSquares();
- /** min of values that have been added. */
- private Min min = new Min();
- /** max of values that have been added. */
- private Max max = new Max();
- /** sumLog of values that have been added. */
- private SumOfLogs sumLog = new SumOfLogs();
- /** geoMean of values that have been added. */
- private GeometricMean geoMean = new GeometricMean(sumLog);
- /** mean of values that have been added. */
- private Mean mean = new Mean(secondMoment);
- /** variance of values that have been added. */
- private Variance variance = new Variance(secondMoment);
- /** Sum statistic implementation - can be reset by setter. */
- private StorelessUnivariateStatistic sumImpl = sum;
- /** Sum of squares statistic implementation - can be reset by setter. */
- private StorelessUnivariateStatistic sumsqImpl = sumsq;
- /** Minimum statistic implementation - can be reset by setter. */
- private StorelessUnivariateStatistic minImpl = min;
- /** Maximum statistic implementation - can be reset by setter. */
- private StorelessUnivariateStatistic maxImpl = max;
- /** Sum of log statistic implementation - can be reset by setter. */
- private StorelessUnivariateStatistic sumLogImpl = sumLog;
- /** Geometric mean statistic implementation - can be reset by setter. */
- private StorelessUnivariateStatistic geoMeanImpl = geoMean;
- /** Mean statistic implementation - can be reset by setter. */
- private StorelessUnivariateStatistic meanImpl = mean;
- /** Variance statistic implementation - can be reset by setter. */
- private StorelessUnivariateStatistic varianceImpl = variance;
- /**
- * Construct a SummaryStatistics instance.
- */
- public SummaryStatistics() {
- }
- /**
- * A copy constructor. Creates a deep-copy of the {@code original}.
- *
- * @param original the {@code SummaryStatistics} instance to copy
- * @throws NullArgumentException if original is null
- */
- public SummaryStatistics(SummaryStatistics original) throws NullArgumentException {
- copy(original, this);
- }
- /**
- * Return a {@link StatisticalSummaryValues} instance reporting current
- * statistics.
- * @return Current values of statistics
- */
- public StatisticalSummary getSummary() {
- return new StatisticalSummaryValues(getMean(), getVariance(), getN(),
- getMax(), getMin(), getSum());
- }
- /**
- * Add a value to the data.
- * @param value the value to add
- */
- public void addValue(double value) {
- sumImpl.increment(value);
- sumsqImpl.increment(value);
- minImpl.increment(value);
- maxImpl.increment(value);
- sumLogImpl.increment(value);
- secondMoment.increment(value);
- // If mean, variance or geomean have been overridden,
- // need to increment these
- if (meanImpl != mean) {
- meanImpl.increment(value);
- }
- if (varianceImpl != variance) {
- varianceImpl.increment(value);
- }
- if (geoMeanImpl != geoMean) {
- geoMeanImpl.increment(value);
- }
- n++;
- }
- /**
- * Returns the number of available values.
- * @return The number of available values
- */
- @Override
- public long getN() {
- return n;
- }
- /**
- * Returns the sum of the values that have been added.
- * @return The sum or <code>Double.NaN</code> if no values have been added
- */
- @Override
- public double getSum() {
- return sumImpl.getResult();
- }
- /**
- * Returns the sum of the squares of the values that have been added.
- * <p>
- * Double.NaN is returned if no values have been added.
- * </p>
- * @return The sum of squares
- */
- public double getSumsq() {
- return sumsqImpl.getResult();
- }
- /**
- * Returns the mean of the values that have been added.
- * <p>
- * Double.NaN is returned if no values have been added.
- * </p>
- * @return the mean
- */
- @Override
- public double getMean() {
- return meanImpl.getResult();
- }
- /**
- * Returns the standard deviation of the values that have been added.
- * <p>
- * Double.NaN is returned if no values have been added.
- * </p>
- * @return the standard deviation
- */
- @Override
- public double getStandardDeviation() {
- double stdDev = Double.NaN;
- if (getN() > 0) {
- if (getN() > 1) {
- stdDev = JdkMath.sqrt(getVariance());
- } else {
- stdDev = 0.0;
- }
- }
- return stdDev;
- }
- /**
- * Returns the quadratic mean, a.k.a.
- * <a href="http://mathworld.wolfram.com/Root-Mean-Square.html">
- * root-mean-square</a> of the available values
- * @return The quadratic mean or {@code Double.NaN} if no values
- * have been added.
- */
- public double getQuadraticMean() {
- final long size = getN();
- return size > 0 ? JdkMath.sqrt(getSumsq() / size) : Double.NaN;
- }
- /**
- * Returns the (sample) variance of the available values.
- *
- * <p>This method returns the bias-corrected sample variance (using {@code n - 1} in
- * the denominator). Use {@link #getPopulationVariance()} for the non-bias-corrected
- * population variance.</p>
- *
- * <p>Double.NaN is returned if no values have been added.</p>
- *
- * @return the variance
- */
- @Override
- public double getVariance() {
- return varianceImpl.getResult();
- }
- /**
- * Returns the <a href="http://en.wikibooks.org/wiki/Statistics/Summary/Variance">
- * population variance</a> of the values that have been added.
- *
- * <p>Double.NaN is returned if no values have been added.</p>
- *
- * @return the population variance
- */
- public double getPopulationVariance() {
- Variance populationVariance = new Variance(secondMoment);
- populationVariance.setBiasCorrected(false);
- return populationVariance.getResult();
- }
- /**
- * Returns the maximum of the values that have been added.
- * <p>
- * Double.NaN is returned if no values have been added.
- * </p>
- * @return the maximum
- */
- @Override
- public double getMax() {
- return maxImpl.getResult();
- }
- /**
- * Returns the minimum of the values that have been added.
- * <p>
- * Double.NaN is returned if no values have been added.
- * </p>
- * @return the minimum
- */
- @Override
- public double getMin() {
- return minImpl.getResult();
- }
- /**
- * Returns the geometric mean of the values that have been added.
- * <p>
- * Double.NaN is returned if no values have been added.
- * </p>
- * @return the geometric mean
- */
- public double getGeometricMean() {
- return geoMeanImpl.getResult();
- }
- /**
- * Returns the sum of the logs of the values that have been added.
- * <p>
- * Double.NaN is returned if no values have been added.
- * </p>
- * @return the sum of logs
- * @since 1.2
- */
- public double getSumOfLogs() {
- return sumLogImpl.getResult();
- }
- /**
- * Returns a statistic related to the Second Central Moment. Specifically,
- * what is returned is the sum of squared deviations from the sample mean
- * among the values that have been added.
- * <p>
- * Returns <code>Double.NaN</code> if no data values have been added and
- * returns <code>0</code> if there is just one value in the data set.
- * </p>
- * @return second central moment statistic
- * @since 2.0
- */
- public double getSecondMoment() {
- return secondMoment.getResult();
- }
- /**
- * Generates a text report displaying summary statistics from values that
- * have been added.
- * @return String with line feeds displaying statistics
- * @since 1.2
- */
- @Override
- public String toString() {
- StringBuilder outBuffer = new StringBuilder();
- String endl = "\n";
- outBuffer.append("SummaryStatistics:").append(endl);
- outBuffer.append("n: ").append(getN()).append(endl);
- outBuffer.append("min: ").append(getMin()).append(endl);
- outBuffer.append("max: ").append(getMax()).append(endl);
- outBuffer.append("sum: ").append(getSum()).append(endl);
- outBuffer.append("mean: ").append(getMean()).append(endl);
- outBuffer.append("geometric mean: ").append(getGeometricMean())
- .append(endl);
- outBuffer.append("variance: ").append(getVariance()).append(endl);
- outBuffer.append("population variance: ").append(getPopulationVariance()).append(endl);
- outBuffer.append("second moment: ").append(getSecondMoment()).append(endl);
- outBuffer.append("sum of squares: ").append(getSumsq()).append(endl);
- outBuffer.append("standard deviation: ").append(getStandardDeviation())
- .append(endl);
- outBuffer.append("sum of logs: ").append(getSumOfLogs()).append(endl);
- return outBuffer.toString();
- }
- /**
- * Resets all statistics and storage.
- */
- public void clear() {
- this.n = 0;
- minImpl.clear();
- maxImpl.clear();
- sumImpl.clear();
- sumLogImpl.clear();
- sumsqImpl.clear();
- geoMeanImpl.clear();
- secondMoment.clear();
- if (meanImpl != mean) {
- meanImpl.clear();
- }
- if (varianceImpl != variance) {
- varianceImpl.clear();
- }
- }
- /**
- * Returns true iff <code>object</code> is a
- * <code>SummaryStatistics</code> instance and all statistics have the
- * same values as this.
- * @param object the object to test equality against.
- * @return true if object equals this
- */
- @Override
- public boolean equals(Object object) {
- if (object == this) {
- return true;
- }
- if (!(object instanceof SummaryStatistics)) {
- return false;
- }
- SummaryStatistics stat = (SummaryStatistics)object;
- return Precision.equalsIncludingNaN(stat.getGeometricMean(), getGeometricMean()) &&
- Precision.equalsIncludingNaN(stat.getMax(), getMax()) &&
- Precision.equalsIncludingNaN(stat.getMean(), getMean()) &&
- Precision.equalsIncludingNaN(stat.getMin(), getMin()) &&
- Precision.equalsIncludingNaN(stat.getN(), getN()) &&
- Precision.equalsIncludingNaN(stat.getSum(), getSum()) &&
- Precision.equalsIncludingNaN(stat.getSumsq(), getSumsq()) &&
- Precision.equalsIncludingNaN(stat.getVariance(), getVariance());
- }
- /**
- * Returns hash code based on values of statistics.
- * @return hash code
- */
- @Override
- public int hashCode() {
- int result = 31 + Double.hashCode(getGeometricMean());
- result = result * 31 + Double.hashCode(getGeometricMean());
- result = result * 31 + Double.hashCode(getMax());
- result = result * 31 + Double.hashCode(getMean());
- result = result * 31 + Double.hashCode(getMin());
- result = result * 31 + Double.hashCode(getN());
- result = result * 31 + Double.hashCode(getSum());
- result = result * 31 + Double.hashCode(getSumsq());
- result = result * 31 + Double.hashCode(getVariance());
- return result;
- }
- // Getters and setters for statistics implementations
- /**
- * Returns the currently configured Sum implementation.
- * @return the StorelessUnivariateStatistic implementing the sum
- * @since 1.2
- */
- public StorelessUnivariateStatistic getSumImpl() {
- return sumImpl;
- }
- /**
- * <p>
- * Sets the implementation for the Sum.
- * </p>
- * <p>
- * This method cannot be activated after data has been added - i.e.,
- * after {@link #addValue(double) addValue} has been used to add data.
- * If it is activated after data has been added, an IllegalStateException
- * will be thrown.
- * </p>
- * @param sumImpl the StorelessUnivariateStatistic instance to use for
- * computing the Sum
- * @throws MathIllegalStateException if data has already been added (i.e if n >0)
- * @since 1.2
- */
- public void setSumImpl(StorelessUnivariateStatistic sumImpl)
- throws MathIllegalStateException {
- checkEmpty();
- this.sumImpl = sumImpl;
- }
- /**
- * Returns the currently configured sum of squares implementation.
- * @return the StorelessUnivariateStatistic implementing the sum of squares
- * @since 1.2
- */
- public StorelessUnivariateStatistic getSumsqImpl() {
- return sumsqImpl;
- }
- /**
- * <p>
- * Sets the implementation for the sum of squares.
- * </p>
- * <p>
- * This method cannot be activated after data has been added - i.e.,
- * after {@link #addValue(double) addValue} has been used to add data.
- * If it is activated after data has been added, an IllegalStateException
- * will be thrown.
- * </p>
- * @param sumsqImpl the StorelessUnivariateStatistic instance to use for
- * computing the sum of squares
- * @throws MathIllegalStateException if data has already been added (i.e if n > 0)
- * @since 1.2
- */
- public void setSumsqImpl(StorelessUnivariateStatistic sumsqImpl)
- throws MathIllegalStateException {
- checkEmpty();
- this.sumsqImpl = sumsqImpl;
- }
- /**
- * Returns the currently configured minimum implementation.
- * @return the StorelessUnivariateStatistic implementing the minimum
- * @since 1.2
- */
- public StorelessUnivariateStatistic getMinImpl() {
- return minImpl;
- }
- /**
- * <p>
- * Sets the implementation for the minimum.
- * </p>
- * <p>
- * This method cannot be activated after data has been added - i.e.,
- * after {@link #addValue(double) addValue} has been used to add data.
- * If it is activated after data has been added, an IllegalStateException
- * will be thrown.
- * </p>
- * @param minImpl the StorelessUnivariateStatistic instance to use for
- * computing the minimum
- * @throws MathIllegalStateException if data has already been added (i.e if n > 0)
- * @since 1.2
- */
- public void setMinImpl(StorelessUnivariateStatistic minImpl)
- throws MathIllegalStateException {
- checkEmpty();
- this.minImpl = minImpl;
- }
- /**
- * Returns the currently configured maximum implementation.
- * @return the StorelessUnivariateStatistic implementing the maximum
- * @since 1.2
- */
- public StorelessUnivariateStatistic getMaxImpl() {
- return maxImpl;
- }
- /**
- * <p>
- * Sets the implementation for the maximum.
- * </p>
- * <p>
- * This method cannot be activated after data has been added - i.e.,
- * after {@link #addValue(double) addValue} has been used to add data.
- * If it is activated after data has been added, an IllegalStateException
- * will be thrown.
- * </p>
- * @param maxImpl the StorelessUnivariateStatistic instance to use for
- * computing the maximum
- * @throws MathIllegalStateException if data has already been added (i.e if n > 0)
- * @since 1.2
- */
- public void setMaxImpl(StorelessUnivariateStatistic maxImpl)
- throws MathIllegalStateException {
- checkEmpty();
- this.maxImpl = maxImpl;
- }
- /**
- * Returns the currently configured sum of logs implementation.
- * @return the StorelessUnivariateStatistic implementing the log sum
- * @since 1.2
- */
- public StorelessUnivariateStatistic getSumLogImpl() {
- return sumLogImpl;
- }
- /**
- * <p>
- * Sets the implementation for the sum of logs.
- * </p>
- * <p>
- * This method cannot be activated after data has been added - i.e.,
- * after {@link #addValue(double) addValue} has been used to add data.
- * If it is activated after data has been added, an IllegalStateException
- * will be thrown.
- * </p>
- * @param sumLogImpl the StorelessUnivariateStatistic instance to use for
- * computing the log sum
- * @throws MathIllegalStateException if data has already been added (i.e if n > 0)
- * @since 1.2
- */
- public void setSumLogImpl(StorelessUnivariateStatistic sumLogImpl)
- throws MathIllegalStateException {
- checkEmpty();
- this.sumLogImpl = sumLogImpl;
- geoMean.setSumLogImpl(sumLogImpl);
- }
- /**
- * Returns the currently configured geometric mean implementation.
- * @return the StorelessUnivariateStatistic implementing the geometric mean
- * @since 1.2
- */
- public StorelessUnivariateStatistic getGeoMeanImpl() {
- return geoMeanImpl;
- }
- /**
- * <p>
- * Sets the implementation for the geometric mean.
- * </p>
- * <p>
- * This method cannot be activated after data has been added - i.e.,
- * after {@link #addValue(double) addValue} has been used to add data.
- * If it is activated after data has been added, an IllegalStateException
- * will be thrown.
- * </p>
- * @param geoMeanImpl the StorelessUnivariateStatistic instance to use for
- * computing the geometric mean
- * @throws MathIllegalStateException if data has already been added (i.e if n > 0)
- * @since 1.2
- */
- public void setGeoMeanImpl(StorelessUnivariateStatistic geoMeanImpl)
- throws MathIllegalStateException {
- checkEmpty();
- this.geoMeanImpl = geoMeanImpl;
- }
- /**
- * Returns the currently configured mean implementation.
- * @return the StorelessUnivariateStatistic implementing the mean
- * @since 1.2
- */
- public StorelessUnivariateStatistic getMeanImpl() {
- return meanImpl;
- }
- /**
- * <p>
- * Sets the implementation for the mean.
- * </p>
- * <p>
- * This method cannot be activated after data has been added - i.e.,
- * after {@link #addValue(double) addValue} has been used to add data.
- * If it is activated after data has been added, an IllegalStateException
- * will be thrown.
- * </p>
- * @param meanImpl the StorelessUnivariateStatistic instance to use for
- * computing the mean
- * @throws MathIllegalStateException if data has already been added (i.e if n > 0)
- * @since 1.2
- */
- public void setMeanImpl(StorelessUnivariateStatistic meanImpl)
- throws MathIllegalStateException {
- checkEmpty();
- this.meanImpl = meanImpl;
- }
- /**
- * Returns the currently configured variance implementation.
- * @return the StorelessUnivariateStatistic implementing the variance
- * @since 1.2
- */
- public StorelessUnivariateStatistic getVarianceImpl() {
- return varianceImpl;
- }
- /**
- * <p>
- * Sets the implementation for the variance.
- * </p>
- * <p>
- * This method cannot be activated after data has been added - i.e.,
- * after {@link #addValue(double) addValue} has been used to add data.
- * If it is activated after data has been added, an IllegalStateException
- * will be thrown.
- * </p>
- * @param varianceImpl the StorelessUnivariateStatistic instance to use for
- * computing the variance
- * @throws MathIllegalStateException if data has already been added (i.e if n > 0)
- * @since 1.2
- */
- public void setVarianceImpl(StorelessUnivariateStatistic varianceImpl)
- throws MathIllegalStateException {
- checkEmpty();
- this.varianceImpl = varianceImpl;
- }
- /**
- * Throws IllegalStateException if n > 0.
- * @throws MathIllegalStateException if data has been added
- */
- private void checkEmpty() throws MathIllegalStateException {
- if (n > 0) {
- throw new MathIllegalStateException(
- LocalizedFormats.VALUES_ADDED_BEFORE_CONFIGURING_STATISTIC, n);
- }
- }
- /**
- * Returns a copy of this SummaryStatistics instance with the same internal state.
- *
- * @return a copy of this
- */
- public SummaryStatistics copy() {
- SummaryStatistics result = new SummaryStatistics();
- // No try-catch or advertised exception because arguments are guaranteed non-null
- copy(this, result);
- return result;
- }
- /**
- * Copies source to dest.
- * <p>Neither source nor dest can be null.</p>
- *
- * @param source SummaryStatistics to copy
- * @param dest SummaryStatistics to copy to
- * @throws NullArgumentException if either source or dest is null
- */
- public static void copy(SummaryStatistics source, SummaryStatistics dest)
- throws NullArgumentException {
- NullArgumentException.check(source);
- NullArgumentException.check(dest);
- dest.maxImpl = source.maxImpl.copy();
- dest.minImpl = source.minImpl.copy();
- dest.sumImpl = source.sumImpl.copy();
- dest.sumLogImpl = source.sumLogImpl.copy();
- dest.sumsqImpl = source.sumsqImpl.copy();
- dest.secondMoment = source.secondMoment.copy();
- dest.n = source.n;
- // Keep commons-math supplied statistics with embedded moments in synch
- if (source.getVarianceImpl() instanceof Variance) {
- dest.varianceImpl = new Variance(dest.secondMoment);
- } else {
- dest.varianceImpl = source.varianceImpl.copy();
- }
- if (source.meanImpl instanceof Mean) {
- dest.meanImpl = new Mean(dest.secondMoment);
- } else {
- dest.meanImpl = source.meanImpl.copy();
- }
- if (source.getGeoMeanImpl() instanceof GeometricMean) {
- dest.geoMeanImpl = new GeometricMean((SumOfLogs) dest.sumLogImpl);
- } else {
- dest.geoMeanImpl = source.geoMeanImpl.copy();
- }
- // Make sure that if stat == statImpl in source, same
- // holds in dest; otherwise copy stat
- if (source.geoMean == source.geoMeanImpl) {
- dest.geoMean = (GeometricMean) dest.geoMeanImpl;
- } else {
- GeometricMean.copy(source.geoMean, dest.geoMean);
- }
- if (source.max == source.maxImpl) {
- dest.max = (Max) dest.maxImpl;
- } else {
- Max.copy(source.max, dest.max);
- }
- if (source.mean == source.meanImpl) {
- dest.mean = (Mean) dest.meanImpl;
- } else {
- Mean.copy(source.mean, dest.mean);
- }
- if (source.min == source.minImpl) {
- dest.min = (Min) dest.minImpl;
- } else {
- Min.copy(source.min, dest.min);
- }
- if (source.sum == source.sumImpl) {
- dest.sum = (Sum) dest.sumImpl;
- } else {
- Sum.copy(source.sum, dest.sum);
- }
- if (source.variance == source.varianceImpl) {
- dest.variance = (Variance) dest.varianceImpl;
- } else {
- Variance.copy(source.variance, dest.variance);
- }
- if (source.sumLog == source.sumLogImpl) {
- dest.sumLog = (SumOfLogs) dest.sumLogImpl;
- } else {
- SumOfLogs.copy(source.sumLog, dest.sumLog);
- }
- if (source.sumsq == source.sumsqImpl) {
- dest.sumsq = (SumOfSquares) dest.sumsqImpl;
- } else {
- SumOfSquares.copy(source.sumsq, dest.sumsq);
- }
- }
- }