DoubleStatistics.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.apache.commons.statistics.descriptive;

  18. import java.util.Objects;
  19. import java.util.Set;
  20. import java.util.function.BiFunction;
  21. import java.util.function.DoubleConsumer;
  22. import java.util.function.Function;

  23. /**
  24.  * Statistics for {@code double} values.
  25.  *
  26.  * <p>This class provides combinations of individual statistic implementations in the
  27.  * {@code org.apache.commons.statistics.descriptive} package.
  28.  *
  29.  * <p>Supports up to 2<sup>63</sup> (exclusive) observations.
  30.  * This implementation does not check for overflow of the count.
  31.  *
  32.  * @since 1.1
  33.  */
  34. public final class DoubleStatistics implements DoubleConsumer {
  35.     /** Error message for non configured statistics. */
  36.     private static final String NO_CONFIGURED_STATISTICS = "No configured statistics";
  37.     /** Error message for an unsupported statistic. */
  38.     private static final String UNSUPPORTED_STATISTIC = "Unsupported statistic: ";

  39.     /** Count of values recorded. */
  40.     private long count;
  41.     /** The consumer of values. */
  42.     private final DoubleConsumer consumer;
  43.     /** The {@link Min} implementation. */
  44.     private final Min min;
  45.     /** The {@link Max} implementation. */
  46.     private final Max max;
  47.     /** The moment implementation. May be any instance of {@link FirstMoment}. */
  48.     private final FirstMoment moment;
  49.     /** The {@link Sum} implementation. */
  50.     private final Sum sum;
  51.     /** The {@link Product} implementation. */
  52.     private final Product product;
  53.     /** The {@link SumOfSquares} implementation. */
  54.     private final SumOfSquares sumOfSquares;
  55.     /** The {@link SumOfLogs} implementation. */
  56.     private final SumOfLogs sumOfLogs;
  57.     /** Configuration options for computation of statistics. */
  58.     private StatisticsConfiguration config;

  59.     /**
  60.      * A builder for {@link DoubleStatistics}.
  61.      */
  62.     public static final class Builder {
  63.         /** An empty double array. */
  64.         private static final double[] NO_VALUES = {};

  65.         /** The {@link Min} constructor. */
  66.         private Function<double[], Min> min;
  67.         /** The {@link Max} constructor. */
  68.         private Function<double[], Max> max;
  69.         /** The moment constructor. May return any instance of {@link FirstMoment}. */
  70.         private BiFunction<org.apache.commons.numbers.core.Sum, double[], FirstMoment> moment;
  71.         /** The {@link Sum} constructor. */
  72.         private Function<org.apache.commons.numbers.core.Sum, Sum> sum;
  73.         /** The {@link Product} constructor. */
  74.         private Function<double[], Product> product;
  75.         /** The {@link SumOfSquares} constructor. */
  76.         private Function<double[], SumOfSquares> sumOfSquares;
  77.         /** The {@link SumOfLogs} constructor. */
  78.         private Function<double[], SumOfLogs> sumOfLogs;
  79.         /** The order of the moment. It corresponds to the power computed by the {@link FirstMoment}
  80.          * instance constructed by {@link #moment}. This should only be increased from the default
  81.          * of zero (corresponding to no moment computation). */
  82.         private int momentOrder;
  83.         /** Configuration options for computation of statistics. */
  84.         private StatisticsConfiguration config = StatisticsConfiguration.withDefaults();

  85.         /**
  86.          * Create an instance.
  87.          */
  88.         Builder() {
  89.             // Do nothing
  90.         }

  91.         /**
  92.          * Add the statistic to the statistics to compute.
  93.          *
  94.          * @param statistic Statistic to compute.
  95.          * @return {@code this} instance
  96.          */
  97.         Builder add(Statistic statistic) {
  98.             switch (statistic) {
  99.             case GEOMETRIC_MEAN:
  100.             case SUM_OF_LOGS:
  101.                 sumOfLogs = SumOfLogs::of;
  102.                 break;
  103.             case KURTOSIS:
  104.                 createMoment(4);
  105.                 break;
  106.             case MAX:
  107.                 max = Max::of;
  108.                 break;
  109.             case MEAN:
  110.                 createMoment(1);
  111.                 break;
  112.             case MIN:
  113.                 min = Min::of;
  114.                 break;
  115.             case PRODUCT:
  116.                 product = Product::of;
  117.                 break;
  118.             case SKEWNESS:
  119.                 createMoment(3);
  120.                 break;
  121.             case STANDARD_DEVIATION:
  122.             case VARIANCE:
  123.                 createMoment(2);
  124.                 break;
  125.             case SUM:
  126.                 sum = Sum::new;
  127.                 break;
  128.             case SUM_OF_SQUARES:
  129.                 sumOfSquares = SumOfSquares::of;
  130.                 break;
  131.             default:
  132.                 throw new IllegalArgumentException(UNSUPPORTED_STATISTIC + statistic);
  133.             }
  134.             return this;
  135.         }

  136.         /**
  137.          * Creates the moment constructor for the specified {@code order},
  138.          * e.g. order=2 is sum of squared deviations.
  139.          *
  140.          * @param order Order.
  141.          */
  142.         private void createMoment(int order) {
  143.             if (order > momentOrder) {
  144.                 momentOrder = order;
  145.                 if (order == 4) {
  146.                     moment = SumOfFourthDeviations::create;
  147.                 } else if (order == 3) {
  148.                     moment = SumOfCubedDeviations::create;
  149.                 } else if (order == 2) {
  150.                     moment = SumOfSquaredDeviations::create;
  151.                 } else {
  152.                     // Assume order == 1
  153.                     moment = FirstMoment::create;
  154.                 }
  155.             }
  156.         }

  157.         /**
  158.          * Sets the statistics configuration options for computation of statistics.
  159.          *
  160.          * @param v Value.
  161.          * @return the builder
  162.          * @throws NullPointerException if the value is null
  163.          */
  164.         public Builder setConfiguration(StatisticsConfiguration v) {
  165.             config = Objects.requireNonNull(v);
  166.             return this;
  167.         }

  168.         /**
  169.          * Builds a {@code DoubleStatistics} instance.
  170.          *
  171.          * @return {@code DoubleStatistics} instance.
  172.          */
  173.         public DoubleStatistics build() {
  174.             return build(NO_VALUES);
  175.         }

  176.         /**
  177.          * Builds a {@code DoubleStatistics} instance using the input {@code values}.
  178.          *
  179.          * <p>Note: {@code DoubleStatistics} computed using
  180.          * {@link DoubleStatistics#accept(double) accept} may be
  181.          * different from this instance.
  182.          *
  183.          * @param values Values.
  184.          * @return {@code DoubleStatistics} instance.
  185.          */
  186.         public DoubleStatistics build(double... values) {
  187.             Objects.requireNonNull(values, "values");
  188.             // Create related statistics
  189.             FirstMoment m = null;
  190.             Sum sumStat = null;
  191.             if (moment != null || sum != null) {
  192.                 final org.apache.commons.numbers.core.Sum s =
  193.                     org.apache.commons.numbers.core.Sum.of(values);
  194.                 m = create(moment, s, values);
  195.                 sumStat = create(sum, s);
  196.             }
  197.             return new DoubleStatistics(
  198.                 values.length,
  199.                 create(min, values),
  200.                 create(max, values),
  201.                 m,
  202.                 sumStat,
  203.                 create(product, values),
  204.                 create(sumOfSquares, values),
  205.                 create(sumOfLogs, values),
  206.                 config);
  207.         }

  208.         /**
  209.          * Creates the object from the {@code values}.
  210.          *
  211.          * @param <S> value type
  212.          * @param <T> object type
  213.          * @param constructor Constructor.
  214.          * @param values Values
  215.          * @return the instance
  216.          */
  217.         private static <S, T> T create(Function<S, T> constructor, S values) {
  218.             if (constructor != null) {
  219.                 return constructor.apply(values);
  220.             }
  221.             return null;
  222.         }

  223.         /**
  224.          * Creates the object from the values {@code r} and {@code s}.
  225.          *
  226.          * @param <R> value type
  227.          * @param <S> value type
  228.          * @param <T> object type
  229.          * @param constructor Constructor.
  230.          * @param r Value.
  231.          * @param s Value.
  232.          * @return the instance
  233.          */
  234.         private static <R, S, T> T create(BiFunction<R, S, T> constructor, R r, S s) {
  235.             if (constructor != null) {
  236.                 return constructor.apply(r, s);
  237.             }
  238.             return null;
  239.         }
  240.     }

  241.     /**
  242.      * Create an instance.
  243.      *
  244.      * @param count Count of values.
  245.      * @param min Min implementation.
  246.      * @param max Max implementation.
  247.      * @param moment Moment implementation.
  248.      * @param sum Sum implementation.
  249.      * @param product Product implementation.
  250.      * @param sumOfSquares Sum of squares implementation.
  251.      * @param sumOfLogs Sum of logs implementation.
  252.      * @param config Statistics configuration.
  253.      */
  254.     DoubleStatistics(long count, Min min, Max max, FirstMoment moment, Sum sum,
  255.                      Product product, SumOfSquares sumOfSquares, SumOfLogs sumOfLogs,
  256.                      StatisticsConfiguration config) {
  257.         this.count = count;
  258.         this.min = min;
  259.         this.max = max;
  260.         this.moment = moment;
  261.         this.sum = sum;
  262.         this.product = product;
  263.         this.sumOfSquares = sumOfSquares;
  264.         this.sumOfLogs = sumOfLogs;
  265.         this.config = config;
  266.         consumer = Statistics.compose(min, max, moment, sum, product, sumOfSquares, sumOfLogs);
  267.     }

  268.     /**
  269.      * Returns a new instance configured to compute the specified {@code statistics}.
  270.      *
  271.      * <p>The statistics will be empty and so will return the default values for each
  272.      * computed statistic.
  273.      *
  274.      * @param statistics Statistics to compute.
  275.      * @return the instance
  276.      * @throws IllegalArgumentException if there are no {@code statistics} to compute.
  277.      */
  278.     public static DoubleStatistics of(Statistic... statistics) {
  279.         return builder(statistics).build();
  280.     }

  281.     /**
  282.      * Returns a new instance configured to compute the specified {@code statistics}
  283.      * populated using the input {@code values}.
  284.      *
  285.      * <p>Use this method to create an instance populated with a (variable) array of
  286.      * {@code double[]} data:
  287.      *
  288.      * <pre>
  289.      * DoubleStatistics stats = DoubleStatistics.of(
  290.      *     EnumSet.of(Statistic.MIN, Statistic.MAX),
  291.      *     1, 1, 2, 3, 5, 8, 13);
  292.      * </pre>
  293.      *
  294.      * @param statistics Statistics to compute.
  295.      * @param values Values.
  296.      * @return the instance
  297.      * @throws IllegalArgumentException if there are no {@code statistics} to compute.
  298.      */
  299.     public static DoubleStatistics of(Set<Statistic> statistics, double... values) {
  300.         if (statistics.isEmpty()) {
  301.             throw new IllegalArgumentException(NO_CONFIGURED_STATISTICS);
  302.         }
  303.         final Builder b = new Builder();
  304.         statistics.forEach(b::add);
  305.         return b.build(values);
  306.     }

  307.     /**
  308.      * Returns a new builder configured to create instances to compute the specified
  309.      * {@code statistics}.
  310.      *
  311.      * <p>Use this method to create an instance populated with an array of {@code double[]}
  312.      * data using the {@link Builder#build(double...)} method:
  313.      *
  314.      * <pre>
  315.      * double[] data = ...
  316.      * DoubleStatistics stats = DoubleStatistics.builder(
  317.      *     Statistic.MIN, Statistic.MAX, Statistic.VARIANCE)
  318.      *     .build(data);
  319.      * </pre>
  320.      *
  321.      * <p>The builder can be used to create multiple instances of {@link DoubleStatistics}
  322.      * to be used in parallel, or on separate arrays of {@code double[]} data. These may
  323.      * be {@link #combine(DoubleStatistics) combined}. For example:
  324.      *
  325.      * <pre>
  326.      * double[][] data = ...
  327.      * DoubleStatistics.Builder builder = DoubleStatistics.builder(
  328.      *     Statistic.MIN, Statistic.MAX, Statistic.VARIANCE);
  329.      * DoubleStatistics stats = Arrays.stream(data)
  330.      *     .parallel()
  331.      *     .map(builder::build)
  332.      *     .reduce(DoubleStatistics::combine)
  333.      *     .get();
  334.      * </pre>
  335.      *
  336.      * <p>The builder can be used to create a {@link java.util.stream.Collector} for repeat
  337.      * use on multiple data:
  338.      *
  339.      * <pre>{@code
  340.      * DoubleStatistics.Builder builder = DoubleStatistics.builder(
  341.      *     Statistic.MIN, Statistic.MAX, Statistic.VARIANCE);
  342.      * Collector<double[], DoubleStatistics, DoubleStatistics> collector =
  343.      *     Collector.of(builder::build,
  344.      *                  (s, d) -> s.combine(builder.build(d)),
  345.      *                  DoubleStatistics::combine);
  346.      *
  347.      * // Repeated
  348.      * double[][] data = ...
  349.      * DoubleStatistics stats = Arrays.stream(data).collect(collector);
  350.      * }</pre>
  351.      *
  352.      * @param statistics Statistics to compute.
  353.      * @return the builder
  354.      * @throws IllegalArgumentException if there are no {@code statistics} to compute.
  355.      */
  356.     public static Builder builder(Statistic... statistics) {
  357.         if (statistics.length == 0) {
  358.             throw new IllegalArgumentException(NO_CONFIGURED_STATISTICS);
  359.         }
  360.         final Builder b = new Builder();
  361.         for (final Statistic s : statistics) {
  362.             b.add(s);
  363.         }
  364.         return b;
  365.     }

  366.     /**
  367.      * Updates the state of the statistics to reflect the addition of {@code value}.
  368.      *
  369.      * @param value Value.
  370.      */
  371.     @Override
  372.     public void accept(double value) {
  373.         count++;
  374.         consumer.accept(value);
  375.     }

  376.     /**
  377.      * Return the count of values recorded.
  378.      *
  379.      * @return the count of values
  380.      */
  381.     public long getCount() {
  382.         return count;
  383.     }

  384.     /**
  385.      * Check if the specified {@code statistic} is supported.
  386.      *
  387.      * <p>Note: This method will not return {@code false} if the argument is {@code null}.
  388.      *
  389.      * @param statistic Statistic.
  390.      * @return {@code true} if supported
  391.      * @throws NullPointerException if the {@code statistic} is {@code null}
  392.      * @see #getAsDouble(Statistic)
  393.      */
  394.     public boolean isSupported(Statistic statistic) {
  395.         // Check for the appropriate underlying implementation
  396.         switch (statistic) {
  397.         case GEOMETRIC_MEAN:
  398.         case SUM_OF_LOGS:
  399.             return sumOfLogs != null;
  400.         case KURTOSIS:
  401.             return moment instanceof SumOfFourthDeviations;
  402.         case MAX:
  403.             return max != null;
  404.         case MEAN:
  405.             return moment != null;
  406.         case MIN:
  407.             return min != null;
  408.         case PRODUCT:
  409.             return product != null;
  410.         case SKEWNESS:
  411.             return moment instanceof SumOfCubedDeviations;
  412.         case STANDARD_DEVIATION:
  413.         case VARIANCE:
  414.             return moment instanceof SumOfSquaredDeviations;
  415.         case SUM:
  416.             return sum != null;
  417.         case SUM_OF_SQUARES:
  418.             return sumOfSquares != null;
  419.         default:
  420.             return false;
  421.         }
  422.     }

  423.     /**
  424.      * Gets the value of the specified {@code statistic} as a {@code double}.
  425.      *
  426.      * @param statistic Statistic.
  427.      * @return the value
  428.      * @throws IllegalArgumentException if the {@code statistic} is not supported
  429.      * @see #isSupported(Statistic)
  430.      * @see #getResult(Statistic)
  431.      */
  432.     public double getAsDouble(Statistic statistic) {
  433.         return getResult(statistic).getAsDouble();
  434.     }

  435.     /**
  436.      * Gets a supplier for the value of the specified {@code statistic}.
  437.      *
  438.      * <p>The returned function will supply the correct result after
  439.      * calls to {@link #accept(double) accept} or
  440.      * {@link #combine(DoubleStatistics) combine} further values into
  441.      * {@code this} instance.
  442.      *
  443.      * <p>This method can be used to perform a one-time look-up of the statistic
  444.      * function to compute statistics as values are dynamically added.
  445.      *
  446.      * @param statistic Statistic.
  447.      * @return the supplier
  448.      * @throws IllegalArgumentException if the {@code statistic} is not supported
  449.      * @see #isSupported(Statistic)
  450.      * @see #getAsDouble(Statistic)
  451.      */
  452.     public StatisticResult getResult(Statistic statistic) {
  453.         // Locate the implementation.
  454.         // Statistics that wrap an underlying implementation are created in methods.
  455.         // The return argument should be a method reference and not an instance
  456.         // of DoubleStatistic. This ensures the statistic implementation cannot
  457.         // be updated with new values by casting the result and calling accept(double).
  458.         StatisticResult stat = null;
  459.         switch (statistic) {
  460.         case GEOMETRIC_MEAN:
  461.             stat = getGeometricMean();
  462.             break;
  463.         case KURTOSIS:
  464.             stat = getKurtosis();
  465.             break;
  466.         case MAX:
  467.             stat = max;
  468.             break;
  469.         case MEAN:
  470.             stat = getMean();
  471.             break;
  472.         case MIN:
  473.             stat = min;
  474.             break;
  475.         case PRODUCT:
  476.             stat = product;
  477.             break;
  478.         case SKEWNESS:
  479.             stat = getSkewness();
  480.             break;
  481.         case STANDARD_DEVIATION:
  482.             stat = getStandardDeviation();
  483.             break;
  484.         case SUM:
  485.             stat = sum;
  486.             break;
  487.         case SUM_OF_LOGS:
  488.             stat = sumOfLogs;
  489.             break;
  490.         case SUM_OF_SQUARES:
  491.             stat = sumOfSquares;
  492.             break;
  493.         case VARIANCE:
  494.             stat = getVariance();
  495.             break;
  496.         default:
  497.             break;
  498.         }
  499.         if (stat != null) {
  500.             return stat instanceof DoubleStatistic ?
  501.                 ((DoubleStatistic) stat)::getAsDouble :
  502.                 stat;
  503.         }
  504.         throw new IllegalArgumentException(UNSUPPORTED_STATISTIC + statistic);
  505.     }

  506.     /**
  507.      * Gets the geometric mean.
  508.      *
  509.      * @return a geometric mean supplier (or null if unsupported)
  510.      */
  511.     private StatisticResult getGeometricMean() {
  512.         if (sumOfLogs != null) {
  513.             // Return a function that has access to the count and sumOfLogs
  514.             return () -> GeometricMean.computeGeometricMean(count, sumOfLogs);
  515.         }
  516.         return null;
  517.     }

  518.     /**
  519.      * Gets the kurtosis.
  520.      *
  521.      * @return a kurtosis supplier (or null if unsupported)
  522.      */
  523.     private StatisticResult getKurtosis() {
  524.         if (moment instanceof SumOfFourthDeviations) {
  525.             return new Kurtosis((SumOfFourthDeviations) moment)
  526.                 .setBiased(config.isBiased())::getAsDouble;
  527.         }
  528.         return null;
  529.     }

  530.     /**
  531.      * Gets the mean.
  532.      *
  533.      * @return a mean supplier (or null if unsupported)
  534.      */
  535.     private StatisticResult getMean() {
  536.         if (moment != null) {
  537.             // Special case where wrapping with a Mean is not required
  538.             return moment::getFirstMoment;
  539.         }
  540.         return null;
  541.     }

  542.     /**
  543.      * Gets the skewness.
  544.      *
  545.      * @return a skewness supplier (or null if unsupported)
  546.      */
  547.     private StatisticResult getSkewness() {
  548.         if (moment instanceof SumOfCubedDeviations) {
  549.             return new Skewness((SumOfCubedDeviations) moment)
  550.                 .setBiased(config.isBiased())::getAsDouble;
  551.         }
  552.         return null;
  553.     }

  554.     /**
  555.      * Gets the standard deviation.
  556.      *
  557.      * @return a standard deviation supplier (or null if unsupported)
  558.      */
  559.     private StatisticResult getStandardDeviation() {
  560.         if (moment instanceof SumOfSquaredDeviations) {
  561.             return new StandardDeviation((SumOfSquaredDeviations) moment)
  562.                 .setBiased(config.isBiased())::getAsDouble;
  563.         }
  564.         return null;
  565.     }

  566.     /**
  567.      * Gets the variance.
  568.      *
  569.      * @return a variance supplier (or null if unsupported)
  570.      */
  571.     private StatisticResult getVariance() {
  572.         if (moment instanceof SumOfSquaredDeviations) {
  573.             return new Variance((SumOfSquaredDeviations) moment)
  574.                 .setBiased(config.isBiased())::getAsDouble;
  575.         }
  576.         return null;
  577.     }

  578.     /**
  579.      * Combines the state of the {@code other} statistics into this one.
  580.      * Only {@code this} instance is modified by the {@code combine} operation.
  581.      *
  582.      * <p>The {@code other} instance must be <em>compatible</em>. This is {@code true} if the
  583.      * {@code other} instance returns {@code true} for {@link #isSupported(Statistic)} for
  584.      * all values of the {@link Statistic} enum which are supported by {@code this}
  585.      * instance.
  586.      *
  587.      * <p>Note that this operation is <em>not symmetric</em>. It may be possible to perform
  588.      * {@code a.combine(b)} but not {@code b.combine(a)}. In the event that the {@code other}
  589.      * instance is not compatible then an exception is raised before any state is modified.
  590.      *
  591.      * @param other Another set of statistics to be combined.
  592.      * @return {@code this} instance after combining {@code other}.
  593.      * @throws IllegalArgumentException if the {@code other} is not compatible
  594.      */
  595.     public DoubleStatistics combine(DoubleStatistics other) {
  596.         // Check compatibility
  597.         Statistics.checkCombineCompatible(min, other.min);
  598.         Statistics.checkCombineCompatible(max, other.max);
  599.         Statistics.checkCombineCompatible(sum, other.sum);
  600.         Statistics.checkCombineCompatible(product, other.product);
  601.         Statistics.checkCombineCompatible(sumOfSquares, other.sumOfSquares);
  602.         Statistics.checkCombineCompatible(sumOfLogs, other.sumOfLogs);
  603.         Statistics.checkCombineAssignable(moment, other.moment);
  604.         // Combine
  605.         count += other.count;
  606.         Statistics.combine(min, other.min);
  607.         Statistics.combine(max, other.max);
  608.         Statistics.combine(sum, other.sum);
  609.         Statistics.combine(product, other.product);
  610.         Statistics.combine(sumOfSquares, other.sumOfSquares);
  611.         Statistics.combine(sumOfLogs, other.sumOfLogs);
  612.         Statistics.combineMoment(moment, other.moment);
  613.         return this;
  614.     }

  615.     /**
  616.      * Sets the statistics configuration.
  617.      *
  618.      * <p>These options only control the final computation of statistics. The configuration
  619.      * will not affect compatibility between instances during a
  620.      * {@link #combine(DoubleStatistics) combine} operation.
  621.      *
  622.      * <p>Note: These options will affect any future computation of statistics. Supplier functions
  623.      * that have been previously created will not be updated with the new configuration.
  624.      *
  625.      * @param v Value.
  626.      * @return {@code this} instance
  627.      * @throws NullPointerException if the value is null
  628.      * @see #getResult(Statistic)
  629.      */
  630.     public DoubleStatistics setConfiguration(StatisticsConfiguration v) {
  631.         config = Objects.requireNonNull(v);
  632.         return this;
  633.     }
  634. }