View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.statistics.descriptive;
18  
19  import java.util.Objects;
20  import java.util.Set;
21  import java.util.function.BiFunction;
22  import java.util.function.DoubleConsumer;
23  import java.util.function.Function;
24  
25  /**
26   * Statistics for {@code double} values.
27   *
28   * <p>This class provides combinations of individual statistic implementations in the
29   * {@code org.apache.commons.statistics.descriptive} package.
30   *
31   * <p>Supports up to 2<sup>63</sup> (exclusive) observations.
32   * This implementation does not check for overflow of the count.
33   *
34   * @since 1.1
35   */
36  public final class DoubleStatistics implements DoubleConsumer {
37      /** Error message for non configured statistics. */
38      private static final String NO_CONFIGURED_STATISTICS = "No configured statistics";
39      /** Error message for an unsupported statistic. */
40      private static final String UNSUPPORTED_STATISTIC = "Unsupported statistic: ";
41  
42      /** Count of values recorded. */
43      private long count;
44      /** The consumer of values. */
45      private final DoubleConsumer consumer;
46      /** The {@link Min} implementation. */
47      private final Min min;
48      /** The {@link Max} implementation. */
49      private final Max max;
50      /** The moment implementation. May be any instance of {@link FirstMoment}. */
51      private final FirstMoment moment;
52      /** The {@link Sum} implementation. */
53      private final Sum sum;
54      /** The {@link Product} implementation. */
55      private final Product product;
56      /** The {@link SumOfSquares} implementation. */
57      private final SumOfSquares sumOfSquares;
58      /** The {@link SumOfLogs} implementation. */
59      private final SumOfLogs sumOfLogs;
60      /** Configuration options for computation of statistics. */
61      private StatisticsConfiguration config;
62  
63      /**
64       * A builder for {@link DoubleStatistics}.
65       */
66      public static final class Builder {
67          /** An empty double array. */
68          private static final double[] NO_VALUES = {};
69  
70          /** The {@link Min} constructor. */
71          private Function<double[], Min> min;
72          /** The {@link Max} constructor. */
73          private Function<double[], Max> max;
74          /** The moment constructor. May return any instance of {@link FirstMoment}. */
75          private BiFunction<org.apache.commons.numbers.core.Sum, double[], FirstMoment> moment;
76          /** The {@link Sum} constructor. */
77          private Function<org.apache.commons.numbers.core.Sum, Sum> sum;
78          /** The {@link Product} constructor. */
79          private Function<double[], Product> product;
80          /** The {@link SumOfSquares} constructor. */
81          private Function<double[], SumOfSquares> sumOfSquares;
82          /** The {@link SumOfLogs} constructor. */
83          private Function<double[], SumOfLogs> sumOfLogs;
84          /** The order of the moment. It corresponds to the power computed by the {@link FirstMoment}
85           * instance constructed by {@link #moment}. This should only be increased from the default
86           * of zero (corresponding to no moment computation). */
87          private int momentOrder;
88          /** Configuration options for computation of statistics. */
89          private StatisticsConfiguration config = StatisticsConfiguration.withDefaults();
90  
91          /**
92           * Create an instance.
93           */
94          Builder() {
95              // Do nothing
96          }
97  
98          /**
99           * Add the statistic to the statistics to compute.
100          *
101          * @param statistic Statistic to compute.
102          * @return {@code this} instance
103          */
104         Builder add(Statistic statistic) {
105             switch (statistic) {
106             case GEOMETRIC_MEAN:
107             case SUM_OF_LOGS:
108                 sumOfLogs = SumOfLogs::of;
109                 break;
110             case KURTOSIS:
111                 createMoment(4);
112                 break;
113             case MAX:
114                 max = Max::of;
115                 break;
116             case MEAN:
117                 createMoment(1);
118                 break;
119             case MIN:
120                 min = Min::of;
121                 break;
122             case PRODUCT:
123                 product = Product::of;
124                 break;
125             case SKEWNESS:
126                 createMoment(3);
127                 break;
128             case STANDARD_DEVIATION:
129             case VARIANCE:
130                 createMoment(2);
131                 break;
132             case SUM:
133                 sum = Sum::new;
134                 break;
135             case SUM_OF_SQUARES:
136                 sumOfSquares = SumOfSquares::of;
137                 break;
138             default:
139                 throw new IllegalArgumentException(UNSUPPORTED_STATISTIC + statistic);
140             }
141             return this;
142         }
143 
144         /**
145          * Creates the moment constructor for the specified {@code order},
146          * e.g. order=2 is sum of squared deviations.
147          *
148          * @param order Order.
149          */
150         private void createMoment(int order) {
151             if (order > momentOrder) {
152                 momentOrder = order;
153                 if (order == 4) {
154                     moment = SumOfFourthDeviations::create;
155                 } else if (order == 3) {
156                     moment = SumOfCubedDeviations::create;
157                 } else if (order == 2) {
158                     moment = SumOfSquaredDeviations::create;
159                 } else {
160                     // Assume order == 1
161                     moment = FirstMoment::create;
162                 }
163             }
164         }
165 
166         /**
167          * Sets the statistics configuration options for computation of statistics.
168          *
169          * @param v Value.
170          * @return the builder
171          * @throws NullPointerException if the value is null
172          */
173         public Builder setConfiguration(StatisticsConfiguration v) {
174             config = Objects.requireNonNull(v);
175             return this;
176         }
177 
178         /**
179          * Builds a {@code DoubleStatistics} instance.
180          *
181          * @return {@code DoubleStatistics} instance.
182          */
183         public DoubleStatistics build() {
184             return build(NO_VALUES);
185         }
186 
187         /**
188          * Builds a {@code DoubleStatistics} instance using the input {@code values}.
189          *
190          * <p>Note: {@code DoubleStatistics} computed using
191          * {@link DoubleStatistics#accept(double) accept} may be
192          * different from this instance.
193          *
194          * @param values Values.
195          * @return {@code DoubleStatistics} instance.
196          */
197         public DoubleStatistics build(double... values) {
198             Objects.requireNonNull(values, "values");
199             // Create related statistics
200             FirstMoment m = null;
201             Sum sumStat = null;
202             if (moment != null || sum != null) {
203                 final org.apache.commons.numbers.core.Sum s =
204                     org.apache.commons.numbers.core.Sum.of(values);
205                 m = create(moment, s, values);
206                 sumStat = create(sum, s);
207             }
208             return new DoubleStatistics(
209                 values.length,
210                 create(min, values),
211                 create(max, values),
212                 m,
213                 sumStat,
214                 create(product, values),
215                 create(sumOfSquares, values),
216                 create(sumOfLogs, values),
217                 config);
218         }
219 
220         /**
221          * Creates the object from the {@code values}.
222          *
223          * @param <S> value type
224          * @param <T> object type
225          * @param constructor Constructor.
226          * @param values Values
227          * @return the instance
228          */
229         private static <S, T> T create(Function<S, T> constructor, S values) {
230             if (constructor != null) {
231                 return constructor.apply(values);
232             }
233             return null;
234         }
235 
236         /**
237          * Creates the object from the values {@code r} and {@code s}.
238          *
239          * @param <R> value type
240          * @param <S> value type
241          * @param <T> object type
242          * @param constructor Constructor.
243          * @param r Value.
244          * @param s Value.
245          * @return the instance
246          */
247         private static <R, S, T> T create(BiFunction<R, S, T> constructor, R r, S s) {
248             if (constructor != null) {
249                 return constructor.apply(r, s);
250             }
251             return null;
252         }
253     }
254 
255     /**
256      * Create an instance.
257      *
258      * @param count Count of values.
259      * @param min Min implementation.
260      * @param max Max implementation.
261      * @param moment Moment implementation.
262      * @param sum Sum implementation.
263      * @param product Product implementation.
264      * @param sumOfSquares Sum of squares implementation.
265      * @param sumOfLogs Sum of logs implementation.
266      * @param config Statistics configuration.
267      */
268     DoubleStatistics(long count, Min min, Max max, FirstMoment moment, Sum sum,
269                      Product product, SumOfSquares sumOfSquares, SumOfLogs sumOfLogs,
270                      StatisticsConfiguration config) {
271         this.count = count;
272         this.min = min;
273         this.max = max;
274         this.moment = moment;
275         this.sum = sum;
276         this.product = product;
277         this.sumOfSquares = sumOfSquares;
278         this.sumOfLogs = sumOfLogs;
279         this.config = config;
280         consumer = Statistics.compose(min, max, moment, sum, product, sumOfSquares, sumOfLogs);
281     }
282 
283     /**
284      * Returns a new instance configured to compute the specified {@code statistics}.
285      *
286      * <p>The statistics will be empty and so will return the default values for each
287      * computed statistic.
288      *
289      * @param statistics Statistics to compute.
290      * @return the instance
291      * @throws IllegalArgumentException if there are no {@code statistics} to compute.
292      */
293     public static DoubleStatistics of(Statistic... statistics) {
294         return builder(statistics).build();
295     }
296 
297     /**
298      * Returns a new instance configured to compute the specified {@code statistics}
299      * populated using the input {@code values}.
300      *
301      * <p>Use this method to create an instance populated with a (variable) array of
302      * {@code double[]} data:
303      *
304      * <pre>
305      * DoubleStatistics stats = DoubleStatistics.of(
306      *     EnumSet.of(Statistic.MIN, Statistic.MAX),
307      *     1, 1, 2, 3, 5, 8, 13);
308      * </pre>
309      *
310      * @param statistics Statistics to compute.
311      * @param values Values.
312      * @return the instance
313      * @throws IllegalArgumentException if there are no {@code statistics} to compute.
314      */
315     public static DoubleStatistics of(Set<Statistic> statistics, double... values) {
316         if (statistics.isEmpty()) {
317             throw new IllegalArgumentException(NO_CONFIGURED_STATISTICS);
318         }
319         final Builder b = new Builder();
320         statistics.forEach(b::add);
321         return b.build(values);
322     }
323 
324     /**
325      * Returns a new builder configured to create instances to compute the specified
326      * {@code statistics}.
327      *
328      * <p>Use this method to create an instance populated with an array of {@code double[]}
329      * data using the {@link Builder#build(double...)} method:
330      *
331      * <pre>
332      * double[] data = ...
333      * DoubleStatistics stats = DoubleStatistics.builder(
334      *     Statistic.MIN, Statistic.MAX, Statistic.VARIANCE)
335      *     .build(data);
336      * </pre>
337      *
338      * <p>The builder can be used to create multiple instances of {@link DoubleStatistics}
339      * to be used in parallel, or on separate arrays of {@code double[]} data. These may
340      * be {@link #combine(DoubleStatistics) combined}. For example:
341      *
342      * <pre>
343      * double[][] data = ...
344      * DoubleStatistics.Builder builder = DoubleStatistics.builder(
345      *     Statistic.MIN, Statistic.MAX, Statistic.VARIANCE);
346      * DoubleStatistics stats = Arrays.stream(data)
347      *     .parallel()
348      *     .map(builder::build)
349      *     .reduce(DoubleStatistics::combine)
350      *     .get();
351      * </pre>
352      *
353      * <p>The builder can be used to create a {@link java.util.stream.Collector} for repeat
354      * use on multiple data:
355      *
356      * <pre>{@code
357      * DoubleStatistics.Builder builder = DoubleStatistics.builder(
358      *     Statistic.MIN, Statistic.MAX, Statistic.VARIANCE);
359      * Collector<double[], DoubleStatistics, DoubleStatistics> collector =
360      *     Collector.of(builder::build,
361      *                  (s, d) -> s.combine(builder.build(d)),
362      *                  DoubleStatistics::combine);
363      *
364      * // Repeated
365      * double[][] data = ...
366      * DoubleStatistics stats = Arrays.stream(data).collect(collector);
367      * }</pre>
368      *
369      * @param statistics Statistics to compute.
370      * @return the builder
371      * @throws IllegalArgumentException if there are no {@code statistics} to compute.
372      */
373     public static Builder builder(Statistic... statistics) {
374         if (statistics.length == 0) {
375             throw new IllegalArgumentException(NO_CONFIGURED_STATISTICS);
376         }
377         final Builder b = new Builder();
378         for (final Statistic s : statistics) {
379             b.add(s);
380         }
381         return b;
382     }
383 
384     /**
385      * Updates the state of the statistics to reflect the addition of {@code value}.
386      *
387      * @param value Value.
388      */
389     @Override
390     public void accept(double value) {
391         count++;
392         consumer.accept(value);
393     }
394 
395     /**
396      * Return the count of values recorded.
397      *
398      * @return the count of values
399      */
400     public long getCount() {
401         return count;
402     }
403 
404     /**
405      * Check if the specified {@code statistic} is supported.
406      *
407      * <p>Note: This method will not return {@code false} if the argument is {@code null}.
408      *
409      * @param statistic Statistic.
410      * @return {@code true} if supported
411      * @throws NullPointerException if the {@code statistic} is {@code null}
412      * @see #getAsDouble(Statistic)
413      */
414     public boolean isSupported(Statistic statistic) {
415         // Check for the appropriate underlying implementation
416         switch (statistic) {
417         case GEOMETRIC_MEAN:
418         case SUM_OF_LOGS:
419             return sumOfLogs != null;
420         case KURTOSIS:
421             return moment instanceof SumOfFourthDeviations;
422         case MAX:
423             return max != null;
424         case MEAN:
425             return moment != null;
426         case MIN:
427             return min != null;
428         case PRODUCT:
429             return product != null;
430         case SKEWNESS:
431             return moment instanceof SumOfCubedDeviations;
432         case STANDARD_DEVIATION:
433         case VARIANCE:
434             return moment instanceof SumOfSquaredDeviations;
435         case SUM:
436             return sum != null;
437         case SUM_OF_SQUARES:
438             return sumOfSquares != null;
439         default:
440             return false;
441         }
442     }
443 
444     /**
445      * Gets the value of the specified {@code statistic} as a {@code double}.
446      *
447      * @param statistic Statistic.
448      * @return the value
449      * @throws IllegalArgumentException if the {@code statistic} is not supported
450      * @see #isSupported(Statistic)
451      * @see #getResult(Statistic)
452      */
453     public double getAsDouble(Statistic statistic) {
454         return getResult(statistic).getAsDouble();
455     }
456 
457     /**
458      * Gets a supplier for the value of the specified {@code statistic}.
459      *
460      * <p>The returned function will supply the correct result after
461      * calls to {@link #accept(double) accept} or
462      * {@link #combine(DoubleStatistics) combine} further values into
463      * {@code this} instance.
464      *
465      * <p>This method can be used to perform a one-time look-up of the statistic
466      * function to compute statistics as values are dynamically added.
467      *
468      * @param statistic Statistic.
469      * @return the supplier
470      * @throws IllegalArgumentException if the {@code statistic} is not supported
471      * @see #isSupported(Statistic)
472      * @see #getAsDouble(Statistic)
473      */
474     public StatisticResult getResult(Statistic statistic) {
475         // Locate the implementation.
476         // Statistics that wrap an underlying implementation are created in methods.
477         // The return argument should be a method reference and not an instance
478         // of DoubleStatistic. This ensures the statistic implementation cannot
479         // be updated with new values by casting the result and calling accept(double).
480         StatisticResult stat = null;
481         switch (statistic) {
482         case GEOMETRIC_MEAN:
483             stat = getGeometricMean();
484             break;
485         case KURTOSIS:
486             stat = getKurtosis();
487             break;
488         case MAX:
489             stat = max;
490             break;
491         case MEAN:
492             stat = getMean();
493             break;
494         case MIN:
495             stat = min;
496             break;
497         case PRODUCT:
498             stat = product;
499             break;
500         case SKEWNESS:
501             stat = getSkewness();
502             break;
503         case STANDARD_DEVIATION:
504             stat = getStandardDeviation();
505             break;
506         case SUM:
507             stat = sum;
508             break;
509         case SUM_OF_LOGS:
510             stat = sumOfLogs;
511             break;
512         case SUM_OF_SQUARES:
513             stat = sumOfSquares;
514             break;
515         case VARIANCE:
516             stat = getVariance();
517             break;
518         default:
519             break;
520         }
521         if (stat != null) {
522             return stat instanceof DoubleStatistic ?
523                 ((DoubleStatistic) stat)::getAsDouble :
524                 stat;
525         }
526         throw new IllegalArgumentException(UNSUPPORTED_STATISTIC + statistic);
527     }
528 
529     /**
530      * Gets the geometric mean.
531      *
532      * @return a geometric mean supplier (or null if unsupported)
533      */
534     private StatisticResult getGeometricMean() {
535         if (sumOfLogs != null) {
536             // Return a function that has access to the count and sumOfLogs
537             return () -> GeometricMean.computeGeometricMean(count, sumOfLogs);
538         }
539         return null;
540     }
541 
542     /**
543      * Gets the kurtosis.
544      *
545      * @return a kurtosis supplier (or null if unsupported)
546      */
547     private StatisticResult getKurtosis() {
548         if (moment instanceof SumOfFourthDeviations) {
549             return new Kurtosis((SumOfFourthDeviations) moment)
550                 .setBiased(config.isBiased())::getAsDouble;
551         }
552         return null;
553     }
554 
555     /**
556      * Gets the mean.
557      *
558      * @return a mean supplier (or null if unsupported)
559      */
560     private StatisticResult getMean() {
561         if (moment != null) {
562             // Special case where wrapping with a Mean is not required
563             return moment::getFirstMoment;
564         }
565         return null;
566     }
567 
568     /**
569      * Gets the skewness.
570      *
571      * @return a skewness supplier (or null if unsupported)
572      */
573     private StatisticResult getSkewness() {
574         if (moment instanceof SumOfCubedDeviations) {
575             return new Skewness((SumOfCubedDeviations) moment)
576                 .setBiased(config.isBiased())::getAsDouble;
577         }
578         return null;
579     }
580 
581     /**
582      * Gets the standard deviation.
583      *
584      * @return a standard deviation supplier (or null if unsupported)
585      */
586     private StatisticResult getStandardDeviation() {
587         if (moment instanceof SumOfSquaredDeviations) {
588             return new StandardDeviation((SumOfSquaredDeviations) moment)
589                 .setBiased(config.isBiased())::getAsDouble;
590         }
591         return null;
592     }
593 
594     /**
595      * Gets the variance.
596      *
597      * @return a variance supplier (or null if unsupported)
598      */
599     private StatisticResult getVariance() {
600         if (moment instanceof SumOfSquaredDeviations) {
601             return new Variance((SumOfSquaredDeviations) moment)
602                 .setBiased(config.isBiased())::getAsDouble;
603         }
604         return null;
605     }
606 
607     /**
608      * Combines the state of the {@code other} statistics into this one.
609      * Only {@code this} instance is modified by the {@code combine} operation.
610      *
611      * <p>The {@code other} instance must be <em>compatible</em>. This is {@code true} if the
612      * {@code other} instance returns {@code true} for {@link #isSupported(Statistic)} for
613      * all values of the {@link Statistic} enum which are supported by {@code this}
614      * instance.
615      *
616      * <p>Note that this operation is <em>not symmetric</em>. It may be possible to perform
617      * {@code a.combine(b)} but not {@code b.combine(a)}. In the event that the {@code other}
618      * instance is not compatible then an exception is raised before any state is modified.
619      *
620      * @param other Another set of statistics to be combined.
621      * @return {@code this} instance after combining {@code other}.
622      * @throws IllegalArgumentException if the {@code other} is not compatible
623      */
624     public DoubleStatistics combine(DoubleStatistics other) {
625         // Check compatibility
626         Statistics.checkCombineCompatible(min, other.min);
627         Statistics.checkCombineCompatible(max, other.max);
628         Statistics.checkCombineCompatible(sum, other.sum);
629         Statistics.checkCombineCompatible(product, other.product);
630         Statistics.checkCombineCompatible(sumOfSquares, other.sumOfSquares);
631         Statistics.checkCombineCompatible(sumOfLogs, other.sumOfLogs);
632         Statistics.checkCombineAssignable(moment, other.moment);
633         // Combine
634         count += other.count;
635         Statistics.combine(min, other.min);
636         Statistics.combine(max, other.max);
637         Statistics.combine(sum, other.sum);
638         Statistics.combine(product, other.product);
639         Statistics.combine(sumOfSquares, other.sumOfSquares);
640         Statistics.combine(sumOfLogs, other.sumOfLogs);
641         Statistics.combineMoment(moment, other.moment);
642         return this;
643     }
644 
645     /**
646      * Sets the statistics configuration.
647      *
648      * <p>These options only control the final computation of statistics. The configuration
649      * will not affect compatibility between instances during a
650      * {@link #combine(DoubleStatistics) combine} operation.
651      *
652      * <p>Note: These options will affect any future computation of statistics. Supplier functions
653      * that have been previously created will not be updated with the new configuration.
654      *
655      * @param v Value.
656      * @return {@code this} instance
657      * @throws NullPointerException if the value is null
658      * @see #getResult(Statistic)
659      */
660     public DoubleStatistics setConfiguration(StatisticsConfiguration v) {
661         config = Objects.requireNonNull(v);
662         return this;
663     }
664 }