1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.statistics.descriptive;
18
19 import java.util.Objects;
20 import java.util.Set;
21 import java.util.function.DoubleConsumer;
22 import java.util.function.Function;
23
24 /**
25 * Statistics for {@code double} values.
26 *
27 * <p>This class provides combinations of individual statistic implementations in the
28 * {@code org.apache.commons.statistics.descriptive} package.
29 *
30 * <p>Supports up to 2<sup>63</sup> (exclusive) observations.
31 * This implementation does not check for overflow of the count.
32 *
33 * @since 1.1
34 */
35 public final class DoubleStatistics implements DoubleConsumer {
36 /** Error message for non configured statistics. */
37 private static final String NO_CONFIGURED_STATISTICS = "No configured statistics";
38 /** Error message for an unsupported statistic. */
39 private static final String UNSUPPORTED_STATISTIC = "Unsupported statistic: ";
40
41 /** Count of values recorded. */
42 private long count;
43 /** The consumer of values. */
44 private final DoubleConsumer consumer;
45 /** The {@link Min} implementation. */
46 private final Min min;
47 /** The {@link Max} implementation. */
48 private final Max max;
49 /** The moment implementation. May be any instance of {@link FirstMoment}. */
50 private final FirstMoment moment;
51 /** The {@link Sum} implementation. */
52 private final Sum sum;
53 /** The {@link Product} implementation. */
54 private final Product product;
55 /** The {@link SumOfSquares} implementation. */
56 private final SumOfSquares sumOfSquares;
57 /** The {@link SumOfLogs} implementation. */
58 private final SumOfLogs sumOfLogs;
59 /** Configuration options for computation of statistics. */
60 private StatisticsConfiguration config;
61
62 /**
63 * A builder for {@link DoubleStatistics}.
64 */
65 public static final class Builder {
66 /** An empty double array. */
67 private static final double[] NO_VALUES = {};
68
69 /** The {@link Min} constructor. */
70 private RangeFunction<double[], Min> min;
71 /** The {@link Max} constructor. */
72 private RangeFunction<double[], Max> max;
73 /** The moment constructor. May return any instance of {@link FirstMoment}. */
74 private RangeBiFunction<org.apache.commons.numbers.core.Sum, double[], FirstMoment> moment;
75 /** The {@link Sum} constructor. */
76 private Function<org.apache.commons.numbers.core.Sum, Sum> sum;
77 /** The {@link Product} constructor. */
78 private RangeFunction<double[], Product> product;
79 /** The {@link SumOfSquares} constructor. */
80 private RangeFunction<double[], SumOfSquares> sumOfSquares;
81 /** The {@link SumOfLogs} constructor. */
82 private RangeFunction<double[], SumOfLogs> sumOfLogs;
83 /** The order of the moment. It corresponds to the power computed by the {@link FirstMoment}
84 * instance constructed by {@link #moment}. This should only be increased from the default
85 * of zero (corresponding to no moment computation). */
86 private int momentOrder;
87 /** Configuration options for computation of statistics. */
88 private StatisticsConfiguration config = StatisticsConfiguration.withDefaults();
89
90 /**
91 * Create an instance.
92 */
93 Builder() {
94 // Do nothing
95 }
96
97 /**
98 * Add the statistic to the statistics to compute.
99 *
100 * @param statistic Statistic to compute.
101 * @return {@code this} instance
102 */
103 Builder add(Statistic statistic) {
104 // Exhaustive switch statement
105 switch (statistic) {
106 case GEOMETRIC_MEAN:
107 case SUM_OF_LOGS:
108 sumOfLogs = SumOfLogs::createFromRange;
109 break;
110 case KURTOSIS:
111 createMoment(4);
112 break;
113 case MAX:
114 max = Max::createFromRange;
115 break;
116 case MEAN:
117 createMoment(1);
118 break;
119 case MIN:
120 min = Min::createFromRange;
121 break;
122 case PRODUCT:
123 product = Product::createFromRange;
124 break;
125 case SKEWNESS:
126 createMoment(3);
127 break;
128 case STANDARD_DEVIATION:
129 case VARIANCE:
130 createMoment(2);
131 break;
132 case SUM:
133 sum = Sum::new;
134 break;
135 case SUM_OF_SQUARES:
136 sumOfSquares = SumOfSquares::createFromRange;
137 break;
138 }
139 return this;
140 }
141
142 /**
143 * Creates the moment constructor for the specified {@code order},
144 * e.g. order=2 is sum of squared deviations.
145 *
146 * @param order Order.
147 */
148 private void createMoment(int order) {
149 if (order > momentOrder) {
150 momentOrder = order;
151 if (order == 4) {
152 moment = SumOfFourthDeviations::createFromRange;
153 } else if (order == 3) {
154 moment = SumOfCubedDeviations::createFromRange;
155 } else if (order == 2) {
156 moment = SumOfSquaredDeviations::createFromRange;
157 } else {
158 // Assume order == 1
159 moment = FirstMoment::createFromRange;
160 }
161 }
162 }
163
164 /**
165 * Sets the statistics configuration options for computation of statistics.
166 *
167 * @param v Value.
168 * @return the builder
169 * @throws NullPointerException if the value is null
170 */
171 public Builder setConfiguration(StatisticsConfiguration v) {
172 config = Objects.requireNonNull(v);
173 return this;
174 }
175
176 /**
177 * Builds a {@code DoubleStatistics} instance.
178 *
179 * @return {@code DoubleStatistics} instance.
180 */
181 public DoubleStatistics build() {
182 return create(NO_VALUES, 0, 0);
183 }
184
185 /**
186 * Builds a {@code DoubleStatistics} instance using the input {@code values}.
187 *
188 * <p>Note: {@code DoubleStatistics} computed using
189 * {@link DoubleStatistics#accept(double) accept} may be
190 * different from this instance.
191 *
192 * @param values Values.
193 * @return {@code DoubleStatistics} instance.
194 */
195 public DoubleStatistics build(double... values) {
196 Objects.requireNonNull(values, "values");
197 return create(values, 0, values.length);
198 }
199
200 /**
201 * Builds a {@code DoubleStatistics} instance using the specified range of {@code values}.
202 *
203 * <p>Note: {@code DoubleStatistics} computed using
204 * {@link DoubleStatistics#accept(double) accept} may be
205 * different from this instance.
206 *
207 * @param values Values.
208 * @param from Inclusive start of the range.
209 * @param to Exclusive end of the range.
210 * @return {@code DoubleStatistics} instance.
211 * @throws IndexOutOfBoundsException if the sub-range is out of bounds
212 * @since 1.2
213 */
214 public DoubleStatistics build(double[] values, int from, int to) {
215 Statistics.checkFromToIndex(from, to, values.length);
216 return create(values, from, to);
217 }
218
219 /**
220 * Builds a {@code DoubleStatistics} instance using the input {@code values}.
221 *
222 * <p>Note: {@code DoubleStatistics} computed using
223 * {@link DoubleStatistics#accept(double) accept} may be
224 * different from this instance.
225 *
226 * <p>Warning: No range checks are performed.
227 *
228 * @param values Values.
229 * @param from Inclusive start of the range.
230 * @param to Exclusive end of the range.
231 * @return {@code DoubleStatistics} instance.
232 */
233 private DoubleStatistics create(double[] values, int from, int to) {
234 // Create related statistics
235 FirstMoment m = null;
236 Sum sumStat = null;
237 if (moment != null || sum != null) {
238 final org.apache.commons.numbers.core.Sum s = Statistics.sum(values, from, to);
239 m = create(moment, s, values, from, to);
240 sumStat = create(sum, s);
241 }
242 return new DoubleStatistics(
243 to - from,
244 create(min, values, from, to),
245 create(max, values, from, to),
246 m,
247 sumStat,
248 create(product, values, from, to),
249 create(sumOfSquares, values, from, to),
250 create(sumOfLogs, values, from, to),
251 config);
252 }
253
254 /**
255 * Creates the object from the {@code values}.
256 *
257 * @param <S> value type
258 * @param <T> object type
259 * @param constructor Constructor.
260 * @param values Values
261 * @return the instance
262 */
263 private static <S, T> T create(Function<S, T> constructor, S values) {
264 if (constructor != null) {
265 return constructor.apply(values);
266 }
267 return null;
268 }
269
270 /**
271 * Creates the object from the {@code values}.
272 *
273 * @param <S> value type
274 * @param <T> object type
275 * @param constructor Constructor.
276 * @param values Values
277 * @param from Inclusive start of the range.
278 * @param to Exclusive end of the range.
279 * @return the instance
280 */
281 private static <S, T> T create(RangeFunction<S, T> constructor, S values, int from, int to) {
282 if (constructor != null) {
283 return constructor.apply(values, from, to);
284 }
285 return null;
286 }
287
288 /**
289 * Creates the object from the values {@code r} and {@code s}.
290 *
291 * @param <R> value type
292 * @param <S> value type
293 * @param <T> object type
294 * @param constructor Constructor.
295 * @param r Value.
296 * @param s Value.
297 * @param from Inclusive start of the range.
298 * @param to Exclusive end of the range.
299 * @return the instance
300 */
301 private static <R, S, T> T create(RangeBiFunction<R, S, T> constructor, R r, S s, int from, int to) {
302 if (constructor != null) {
303 return constructor.apply(r, s, from, to);
304 }
305 return null;
306 }
307 }
308
309 /**
310 * Create an instance.
311 *
312 * @param count Count of values.
313 * @param min Min implementation.
314 * @param max Max implementation.
315 * @param moment Moment implementation.
316 * @param sum Sum implementation.
317 * @param product Product implementation.
318 * @param sumOfSquares Sum of squares implementation.
319 * @param sumOfLogs Sum of logs implementation.
320 * @param config Statistics configuration.
321 */
322 DoubleStatistics(long count, Min min, Max max, FirstMoment moment, Sum sum,
323 Product product, SumOfSquares sumOfSquares, SumOfLogs sumOfLogs,
324 StatisticsConfiguration config) {
325 this.count = count;
326 this.min = min;
327 this.max = max;
328 this.moment = moment;
329 this.sum = sum;
330 this.product = product;
331 this.sumOfSquares = sumOfSquares;
332 this.sumOfLogs = sumOfLogs;
333 this.config = config;
334 consumer = Statistics.composeDoubleConsumers(min, max, moment, sum, product,
335 sumOfSquares, sumOfLogs);
336 }
337
338 /**
339 * Returns a new instance configured to compute the specified {@code statistics}.
340 *
341 * <p>The statistics will be empty and so will return the default values for each
342 * computed statistic.
343 *
344 * @param statistics Statistics to compute.
345 * @return the instance
346 * @throws IllegalArgumentException if there are no {@code statistics} to compute.
347 */
348 public static DoubleStatistics of(Statistic... statistics) {
349 return builder(statistics).build();
350 }
351
352 /**
353 * Returns a new instance configured to compute the specified {@code statistics}
354 * populated using the input {@code values}.
355 *
356 * <p>Use this method to create an instance populated with a (variable) array of
357 * {@code double[]} data:
358 *
359 * <pre>
360 * DoubleStatistics stats = DoubleStatistics.of(
361 * EnumSet.of(Statistic.MIN, Statistic.MAX),
362 * 1, 1, 2, 3, 5, 8, 13);
363 * </pre>
364 *
365 * @param statistics Statistics to compute.
366 * @param values Values.
367 * @return the instance
368 * @throws IllegalArgumentException if there are no {@code statistics} to compute.
369 */
370 public static DoubleStatistics of(Set<Statistic> statistics, double... values) {
371 if (statistics.isEmpty()) {
372 throw new IllegalArgumentException(NO_CONFIGURED_STATISTICS);
373 }
374 final Builder b = new Builder();
375 statistics.forEach(b::add);
376 return b.build(values);
377 }
378
379 /**
380 * Returns a new instance configured to compute the specified {@code statistics}
381 * populated using the specified range of {@code values}.
382 *
383 * <p>Use this method to create an instance populated with part of an array of
384 * {@code double[]} data, e.g. to use the first half of the data:
385 *
386 * <pre>
387 * double[] data = ...
388 * DoubleStatistics stats = DoubleStatistics.of(
389 * EnumSet.of(Statistic.MIN, Statistic.MAX),
390 * data, 0, data.length / 2);
391 * </pre>
392 *
393 * @param statistics Statistics to compute.
394 * @param values Values.
395 * @param from Inclusive start of the range.
396 * @param to Exclusive end of the range.
397 * @return the instance
398 * @throws IllegalArgumentException if there are no {@code statistics} to compute.
399 * @throws IndexOutOfBoundsException if the sub-range is out of bounds
400 * @since 1.2
401 */
402 public static DoubleStatistics ofRange(Set<Statistic> statistics, double[] values, int from, int to) {
403 if (statistics.isEmpty()) {
404 throw new IllegalArgumentException(NO_CONFIGURED_STATISTICS);
405 }
406 final Builder b = new Builder();
407 statistics.forEach(b::add);
408 return b.build(values, from, to);
409 }
410
411 /**
412 * Returns a new builder configured to create instances to compute the specified
413 * {@code statistics}.
414 *
415 * <p>Use this method to create an instance populated with an array of {@code double[]}
416 * data using the {@link Builder#build(double...)} method:
417 *
418 * <pre>
419 * double[] data = ...
420 * DoubleStatistics stats = DoubleStatistics.builder(
421 * Statistic.MIN, Statistic.MAX, Statistic.VARIANCE)
422 * .build(data);
423 * </pre>
424 *
425 * <p>The builder can be used to create multiple instances of {@link DoubleStatistics}
426 * to be used in parallel, or on separate arrays of {@code double[]} data. These may
427 * be {@link #combine(DoubleStatistics) combined}. For example:
428 *
429 * <pre>
430 * double[][] data = ...
431 * DoubleStatistics.Builder builder = DoubleStatistics.builder(
432 * Statistic.MIN, Statistic.MAX, Statistic.VARIANCE);
433 * DoubleStatistics stats = Arrays.stream(data)
434 * .parallel()
435 * .map(builder::build)
436 * .reduce(DoubleStatistics::combine)
437 * .get();
438 * </pre>
439 *
440 * <p>The builder can be used to create a {@link java.util.stream.Collector} for repeat
441 * use on multiple data:
442 *
443 * <pre>{@code
444 * DoubleStatistics.Builder builder = DoubleStatistics.builder(
445 * Statistic.MIN, Statistic.MAX, Statistic.VARIANCE);
446 * Collector<double[], DoubleStatistics, DoubleStatistics> collector =
447 * Collector.of(builder::build,
448 * (s, d) -> s.combine(builder.build(d)),
449 * DoubleStatistics::combine);
450 *
451 * // Repeated
452 * double[][] data = ...
453 * DoubleStatistics stats = Arrays.stream(data).collect(collector);
454 * }</pre>
455 *
456 * @param statistics Statistics to compute.
457 * @return the builder
458 * @throws IllegalArgumentException if there are no {@code statistics} to compute.
459 */
460 public static Builder builder(Statistic... statistics) {
461 if (statistics.length == 0) {
462 throw new IllegalArgumentException(NO_CONFIGURED_STATISTICS);
463 }
464 final Builder b = new Builder();
465 for (final Statistic s : statistics) {
466 b.add(s);
467 }
468 return b;
469 }
470
471 /**
472 * Updates the state of the statistics to reflect the addition of {@code value}.
473 *
474 * @param value Value.
475 */
476 @Override
477 public void accept(double value) {
478 count++;
479 consumer.accept(value);
480 }
481
482 /**
483 * Return the count of values recorded.
484 *
485 * @return the count of values
486 */
487 public long getCount() {
488 return count;
489 }
490
491 /**
492 * Check if the specified {@code statistic} is supported.
493 *
494 * <p>Note: This method will not return {@code false} if the argument is {@code null}.
495 *
496 * @param statistic Statistic.
497 * @return {@code true} if supported
498 * @throws NullPointerException if the {@code statistic} is {@code null}
499 * @see #getAsDouble(Statistic)
500 */
501 public boolean isSupported(Statistic statistic) {
502 // Check for the appropriate underlying implementation
503 // Exhaustive switch statement
504 switch (statistic) {
505 case GEOMETRIC_MEAN:
506 case SUM_OF_LOGS:
507 return sumOfLogs != null;
508 case KURTOSIS:
509 return moment instanceof SumOfFourthDeviations;
510 case MAX:
511 return max != null;
512 case MEAN:
513 return moment != null;
514 case MIN:
515 return min != null;
516 case PRODUCT:
517 return product != null;
518 case SKEWNESS:
519 return moment instanceof SumOfCubedDeviations;
520 case STANDARD_DEVIATION:
521 case VARIANCE:
522 return moment instanceof SumOfSquaredDeviations;
523 case SUM:
524 return sum != null;
525 case SUM_OF_SQUARES:
526 return sumOfSquares != null;
527 }
528 // Unreachable code
529 throw new IllegalArgumentException(UNSUPPORTED_STATISTIC + statistic);
530 }
531
532 /**
533 * Gets the value of the specified {@code statistic} as a {@code double}.
534 *
535 * @param statistic Statistic.
536 * @return the value
537 * @throws IllegalArgumentException if the {@code statistic} is not supported
538 * @see #isSupported(Statistic)
539 * @see #getResult(Statistic)
540 */
541 public double getAsDouble(Statistic statistic) {
542 return getResult(statistic).getAsDouble();
543 }
544
545 /**
546 * Gets a supplier for the value of the specified {@code statistic}.
547 *
548 * <p>The returned function will supply the correct result after
549 * calls to {@link #accept(double) accept} or
550 * {@link #combine(DoubleStatistics) combine} further values into
551 * {@code this} instance.
552 *
553 * <p>This method can be used to perform a one-time look-up of the statistic
554 * function to compute statistics as values are dynamically added.
555 *
556 * @param statistic Statistic.
557 * @return the supplier
558 * @throws IllegalArgumentException if the {@code statistic} is not supported
559 * @see #isSupported(Statistic)
560 * @see #getAsDouble(Statistic)
561 */
562 public StatisticResult getResult(Statistic statistic) {
563 // Locate the implementation.
564 // Statistics that wrap an underlying implementation are created in methods.
565 // The return argument should be a method reference and not an instance
566 // of DoubleStatistic. This ensures the statistic implementation cannot
567 // be updated with new values by casting the result and calling accept(double).
568 StatisticResult stat = null;
569 // Exhaustive switch statement
570 switch (statistic) {
571 case GEOMETRIC_MEAN:
572 stat = getGeometricMean();
573 break;
574 case KURTOSIS:
575 stat = getKurtosis();
576 break;
577 case MAX:
578 stat = max;
579 break;
580 case MEAN:
581 stat = getMean();
582 break;
583 case MIN:
584 stat = min;
585 break;
586 case PRODUCT:
587 stat = product;
588 break;
589 case SKEWNESS:
590 stat = getSkewness();
591 break;
592 case STANDARD_DEVIATION:
593 stat = getStandardDeviation();
594 break;
595 case SUM:
596 stat = sum;
597 break;
598 case SUM_OF_LOGS:
599 stat = sumOfLogs;
600 break;
601 case SUM_OF_SQUARES:
602 stat = sumOfSquares;
603 break;
604 case VARIANCE:
605 stat = getVariance();
606 break;
607 }
608 if (stat != null) {
609 return stat instanceof DoubleStatistic ?
610 ((DoubleStatistic) stat)::getAsDouble :
611 stat;
612 }
613 throw new IllegalArgumentException(UNSUPPORTED_STATISTIC + statistic);
614 }
615
616 /**
617 * Gets the geometric mean.
618 *
619 * @return a geometric mean supplier (or null if unsupported)
620 */
621 private StatisticResult getGeometricMean() {
622 if (sumOfLogs != null) {
623 // Return a function that has access to the count and sumOfLogs
624 return () -> GeometricMean.computeGeometricMean(count, sumOfLogs);
625 }
626 return null;
627 }
628
629 /**
630 * Gets the kurtosis.
631 *
632 * @return a kurtosis supplier (or null if unsupported)
633 */
634 private StatisticResult getKurtosis() {
635 if (moment instanceof SumOfFourthDeviations) {
636 return new Kurtosis((SumOfFourthDeviations) moment)
637 .setBiased(config.isBiased())::getAsDouble;
638 }
639 return null;
640 }
641
642 /**
643 * Gets the mean.
644 *
645 * @return a mean supplier (or null if unsupported)
646 */
647 private StatisticResult getMean() {
648 if (moment != null) {
649 // Special case where wrapping with a Mean is not required
650 return moment::getFirstMoment;
651 }
652 return null;
653 }
654
655 /**
656 * Gets the skewness.
657 *
658 * @return a skewness supplier (or null if unsupported)
659 */
660 private StatisticResult getSkewness() {
661 if (moment instanceof SumOfCubedDeviations) {
662 return new Skewness((SumOfCubedDeviations) moment)
663 .setBiased(config.isBiased())::getAsDouble;
664 }
665 return null;
666 }
667
668 /**
669 * Gets the standard deviation.
670 *
671 * @return a standard deviation supplier (or null if unsupported)
672 */
673 private StatisticResult getStandardDeviation() {
674 if (moment instanceof SumOfSquaredDeviations) {
675 return new StandardDeviation((SumOfSquaredDeviations) moment)
676 .setBiased(config.isBiased())::getAsDouble;
677 }
678 return null;
679 }
680
681 /**
682 * Gets the variance.
683 *
684 * @return a variance supplier (or null if unsupported)
685 */
686 private StatisticResult getVariance() {
687 if (moment instanceof SumOfSquaredDeviations) {
688 return new Variance((SumOfSquaredDeviations) moment)
689 .setBiased(config.isBiased())::getAsDouble;
690 }
691 return null;
692 }
693
694 /**
695 * Combines the state of the {@code other} statistics into this one.
696 * Only {@code this} instance is modified by the {@code combine} operation.
697 *
698 * <p>The {@code other} instance must be <em>compatible</em>. This is {@code true} if the
699 * {@code other} instance returns {@code true} for {@link #isSupported(Statistic)} for
700 * all values of the {@link Statistic} enum which are supported by {@code this}
701 * instance.
702 *
703 * <p>Note that this operation is <em>not symmetric</em>. It may be possible to perform
704 * {@code a.combine(b)} but not {@code b.combine(a)}. In the event that the {@code other}
705 * instance is not compatible then an exception is raised before any state is modified.
706 *
707 * @param other Another set of statistics to be combined.
708 * @return {@code this} instance after combining {@code other}.
709 * @throws IllegalArgumentException if the {@code other} is not compatible
710 */
711 public DoubleStatistics combine(DoubleStatistics other) {
712 // Check compatibility
713 Statistics.checkCombineCompatible(min, other.min);
714 Statistics.checkCombineCompatible(max, other.max);
715 Statistics.checkCombineCompatible(sum, other.sum);
716 Statistics.checkCombineCompatible(product, other.product);
717 Statistics.checkCombineCompatible(sumOfSquares, other.sumOfSquares);
718 Statistics.checkCombineCompatible(sumOfLogs, other.sumOfLogs);
719 Statistics.checkCombineAssignable(moment, other.moment);
720 // Combine
721 count += other.count;
722 Statistics.combine(min, other.min);
723 Statistics.combine(max, other.max);
724 Statistics.combine(sum, other.sum);
725 Statistics.combine(product, other.product);
726 Statistics.combine(sumOfSquares, other.sumOfSquares);
727 Statistics.combine(sumOfLogs, other.sumOfLogs);
728 Statistics.combineMoment(moment, other.moment);
729 return this;
730 }
731
732 /**
733 * Sets the statistics configuration.
734 *
735 * <p>These options only control the final computation of statistics. The configuration
736 * will not affect compatibility between instances during a
737 * {@link #combine(DoubleStatistics) combine} operation.
738 *
739 * <p>Note: These options will affect any future computation of statistics. Supplier functions
740 * that have been previously created will not be updated with the new configuration.
741 *
742 * @param v Value.
743 * @return {@code this} instance
744 * @throws NullPointerException if the value is null
745 * @see #getResult(Statistic)
746 */
747 public DoubleStatistics setConfiguration(StatisticsConfiguration v) {
748 config = Objects.requireNonNull(v);
749 return this;
750 }
751 }