AggregateSummaryStatistics.java
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- package org.apache.commons.math4.legacy.stat.descriptive;
- import java.util.Collection;
- import java.util.Iterator;
- import org.apache.commons.math4.legacy.exception.NullArgumentException;
- /**
- * <p>
- * An aggregator for {@code SummaryStatistics} from several data sets or
- * data set partitions. In its simplest usage mode, the client creates an
- * instance via the zero-argument constructor, then uses
- * {@link #createContributingStatistics()} to obtain a {@code SummaryStatistics}
- * for each individual data set / partition. The per-set statistics objects
- * are used as normal, and at any time the aggregate statistics for all the
- * contributors can be obtained from this object.
- * </p><p>
- * Clients with specialized requirements can use alternative constructors to
- * control the statistics implementations and initial values used by the
- * contributing and the internal aggregate {@code SummaryStatistics} objects.
- * </p><p>
- * A static {@link #aggregate(Collection)} method is also included that computes
- * aggregate statistics directly from a Collection of SummaryStatistics instances.
- * </p><p>
- * When {@link #createContributingStatistics()} is used to create SummaryStatistics
- * instances to be aggregated concurrently, the created instances'
- * {@link SummaryStatistics#addValue(double)} methods must synchronize on the aggregating
- * instance maintained by this class. In multithreaded environments, if the functionality
- * provided by {@link #aggregate(Collection)} is adequate, that method should be used
- * to avoid unnecessary computation and synchronization delays.</p>
- *
- * @since 2.0
- *
- */
- public class AggregateSummaryStatistics implements StatisticalSummary {
- /**
- * A SummaryStatistics serving as a prototype for creating SummaryStatistics.
- * contributing to this aggregate
- */
- private final SummaryStatistics statisticsPrototype;
- /**
- * The SummaryStatistics in which aggregate statistics are accumulated.
- */
- private final SummaryStatistics statistics;
- /**
- * Initializes a new AggregateSummaryStatistics with default statistics
- * implementations.
- *
- */
- public AggregateSummaryStatistics() {
- // No try-catch or throws NAE because arg is guaranteed non-null
- this(new SummaryStatistics());
- }
- /**
- * Initializes a new AggregateSummaryStatistics with the specified statistics
- * object as a prototype for contributing statistics and for the internal
- * aggregate statistics. This provides for customized statistics implementations
- * to be used by contributing and aggregate statistics.
- *
- * @param prototypeStatistics a {@code SummaryStatistics} serving as a
- * prototype both for the internal aggregate statistics and for
- * contributing statistics obtained via the
- * {@code createContributingStatistics()} method. Being a prototype
- * means that other objects are initialized by copying this object's state.
- * If {@code null}, a new, default statistics object is used. Any statistic
- * values in the prototype are propagated to contributing statistics
- * objects and (once) into these aggregate statistics.
- * @throws NullArgumentException if prototypeStatistics is null
- * @see #createContributingStatistics()
- */
- public AggregateSummaryStatistics(SummaryStatistics prototypeStatistics) throws NullArgumentException {
- this(prototypeStatistics,
- prototypeStatistics == null ? null : new SummaryStatistics(prototypeStatistics));
- }
- /**
- * Initializes a new AggregateSummaryStatistics with the specified statistics
- * object as a prototype for contributing statistics and for the internal
- * aggregate statistics. This provides for different statistics implementations
- * to be used by contributing and aggregate statistics and for an initial
- * state to be supplied for the aggregate statistics.
- *
- * @param prototypeStatistics a {@code SummaryStatistics} serving as a
- * prototype both for the internal aggregate statistics and for
- * contributing statistics obtained via the
- * {@code createContributingStatistics()} method. Being a prototype
- * means that other objects are initialized by copying this object's state.
- * If {@code null}, a new, default statistics object is used. Any statistic
- * values in the prototype are propagated to contributing statistics
- * objects, but not into these aggregate statistics.
- * @param initialStatistics a {@code SummaryStatistics} to serve as the
- * internal aggregate statistics object. If {@code null}, a new, default
- * statistics object is used.
- * @see #createContributingStatistics()
- */
- public AggregateSummaryStatistics(SummaryStatistics prototypeStatistics,
- SummaryStatistics initialStatistics) {
- this.statisticsPrototype =
- (prototypeStatistics == null) ? new SummaryStatistics() : prototypeStatistics;
- this.statistics =
- (initialStatistics == null) ? new SummaryStatistics() : initialStatistics;
- }
- /**
- * {@inheritDoc}. This version returns the maximum over all the aggregated
- * data.
- *
- * @see StatisticalSummary#getMax()
- */
- @Override
- public double getMax() {
- synchronized (statistics) {
- return statistics.getMax();
- }
- }
- /**
- * {@inheritDoc}. This version returns the mean of all the aggregated data.
- *
- * @see StatisticalSummary#getMean()
- */
- @Override
- public double getMean() {
- synchronized (statistics) {
- return statistics.getMean();
- }
- }
- /**
- * {@inheritDoc}. This version returns the minimum over all the aggregated
- * data.
- *
- * @see StatisticalSummary#getMin()
- */
- @Override
- public double getMin() {
- synchronized (statistics) {
- return statistics.getMin();
- }
- }
- /**
- * {@inheritDoc}. This version returns a count of all the aggregated data.
- *
- * @see StatisticalSummary#getN()
- */
- @Override
- public long getN() {
- synchronized (statistics) {
- return statistics.getN();
- }
- }
- /**
- * {@inheritDoc}. This version returns the standard deviation of all the
- * aggregated data.
- *
- * @see StatisticalSummary#getStandardDeviation()
- */
- @Override
- public double getStandardDeviation() {
- synchronized (statistics) {
- return statistics.getStandardDeviation();
- }
- }
- /**
- * {@inheritDoc}. This version returns a sum of all the aggregated data.
- *
- * @see StatisticalSummary#getSum()
- */
- @Override
- public double getSum() {
- synchronized (statistics) {
- return statistics.getSum();
- }
- }
- /**
- * {@inheritDoc}. This version returns the variance of all the aggregated
- * data.
- *
- * @see StatisticalSummary#getVariance()
- */
- @Override
- public double getVariance() {
- synchronized (statistics) {
- return statistics.getVariance();
- }
- }
- /**
- * Returns the sum of the logs of all the aggregated data.
- *
- * @return the sum of logs
- * @see SummaryStatistics#getSumOfLogs()
- */
- public double getSumOfLogs() {
- synchronized (statistics) {
- return statistics.getSumOfLogs();
- }
- }
- /**
- * Returns the geometric mean of all the aggregated data.
- *
- * @return the geometric mean
- * @see SummaryStatistics#getGeometricMean()
- */
- public double getGeometricMean() {
- synchronized (statistics) {
- return statistics.getGeometricMean();
- }
- }
- /**
- * Returns the sum of the squares of all the aggregated data.
- *
- * @return The sum of squares
- * @see SummaryStatistics#getSumsq()
- */
- public double getSumsq() {
- synchronized (statistics) {
- return statistics.getSumsq();
- }
- }
- /**
- * Returns a statistic related to the Second Central Moment. Specifically,
- * what is returned is the sum of squared deviations from the sample mean
- * among the all of the aggregated data.
- *
- * @return second central moment statistic
- * @see SummaryStatistics#getSecondMoment()
- */
- public double getSecondMoment() {
- synchronized (statistics) {
- return statistics.getSecondMoment();
- }
- }
- /**
- * Return a {@link StatisticalSummaryValues} instance reporting current
- * aggregate statistics.
- *
- * @return Current values of aggregate statistics
- */
- public StatisticalSummary getSummary() {
- synchronized (statistics) {
- return new StatisticalSummaryValues(getMean(), getVariance(), getN(),
- getMax(), getMin(), getSum());
- }
- }
- /**
- * Creates and returns a {@code SummaryStatistics} whose data will be
- * aggregated with those of this {@code AggregateSummaryStatistics}.
- *
- * @return a {@code SummaryStatistics} whose data will be aggregated with
- * those of this {@code AggregateSummaryStatistics}. The initial state
- * is a copy of the configured prototype statistics.
- */
- public SummaryStatistics createContributingStatistics() {
- SummaryStatistics contributingStatistics
- = new AggregatingSummaryStatistics(statistics);
- // No try - catch or advertising NAE because neither argument will ever be null
- SummaryStatistics.copy(statisticsPrototype, contributingStatistics);
- return contributingStatistics;
- }
- /**
- * Computes aggregate summary statistics. This method can be used to combine statistics
- * computed over partitions or subsamples - i.e., the StatisticalSummaryValues returned
- * should contain the same values that would have been obtained by computing a single
- * StatisticalSummary over the combined dataset.
- * <p>
- * Returns null if the collection is empty or null.
- * </p>
- *
- * @param statistics collection of SummaryStatistics to aggregate
- * @return summary statistics for the combined dataset
- */
- public static StatisticalSummaryValues aggregate(Collection<? extends StatisticalSummary> statistics) {
- if (statistics == null) {
- return null;
- }
- Iterator<? extends StatisticalSummary> iterator = statistics.iterator();
- if (!iterator.hasNext()) {
- return null;
- }
- StatisticalSummary current = iterator.next();
- long n = current.getN();
- double min = current.getMin();
- double sum = current.getSum();
- double max = current.getMax();
- double var = current.getVariance();
- double m2 = var * (n - 1d);
- double mean = current.getMean();
- while (iterator.hasNext()) {
- current = iterator.next();
- if (current.getMin() < min || Double.isNaN(min)) {
- min = current.getMin();
- }
- if (current.getMax() > max || Double.isNaN(max)) {
- max = current.getMax();
- }
- sum += current.getSum();
- final double oldN = n;
- final double curN = current.getN();
- n += curN;
- final double meanDiff = current.getMean() - mean;
- mean = sum / n;
- final double curM2 = current.getVariance() * (curN - 1d);
- m2 = m2 + curM2 + meanDiff * meanDiff * oldN * curN / n;
- }
- final double variance;
- if (n == 0) {
- variance = Double.NaN;
- } else if (n == 1) {
- variance = 0d;
- } else {
- variance = m2 / (n - 1);
- }
- return new StatisticalSummaryValues(mean, variance, n, max, min, sum);
- }
- /**
- * A SummaryStatistics that also forwards all values added to it to a second
- * {@code SummaryStatistics} for aggregation.
- *
- * @since 2.0
- */
- private static final class AggregatingSummaryStatistics extends SummaryStatistics {
- /**
- * The serialization version of this class.
- */
- private static final long serialVersionUID = 1L;
- /**
- * An additional SummaryStatistics into which values added to these.
- * statistics (and possibly others) are aggregated
- */
- private final SummaryStatistics aggregateStatistics;
- /**
- * Initializes a new AggregatingSummaryStatistics with the specified.
- * aggregate statistics object
- *
- * @param aggregateStatistics a {@code SummaryStatistics} into which
- * values added to this statistics object should be aggregated
- */
- AggregatingSummaryStatistics(SummaryStatistics aggregateStatistics) {
- this.aggregateStatistics = aggregateStatistics;
- }
- /**
- * {@inheritDoc}. This version adds the provided value to the configured
- * aggregate after adding it to these statistics.
- *
- * @see SummaryStatistics#addValue(double)
- */
- @Override
- public void addValue(double value) {
- super.addValue(value);
- synchronized (aggregateStatistics) {
- aggregateStatistics.addValue(value);
- }
- }
- /**
- * Returns true iff <code>object</code> is a
- * <code>SummaryStatistics</code> instance and all statistics have the
- * same values as this.
- * @param object the object to test equality against.
- * @return true if object equals this
- */
- @Override
- public boolean equals(Object object) {
- if (object == this) {
- return true;
- }
- if (!(object instanceof AggregatingSummaryStatistics)) {
- return false;
- }
- AggregatingSummaryStatistics stat = (AggregatingSummaryStatistics)object;
- return super.equals(stat) &&
- aggregateStatistics.equals(stat.aggregateStatistics);
- }
- /**
- * Returns hash code based on values of statistics.
- * @return hash code
- */
- @Override
- public int hashCode() {
- return 123 + super.hashCode() + aggregateStatistics.hashCode();
- }
- }
- }