SumOfCubedDeviations.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.apache.commons.statistics.descriptive;

  18. /**
  19.  * Computes the sum of cubed deviations from the sample mean. This
  20.  * statistic is related to the third moment.
  21.  *
  22.  * <p>Uses a recursive updating formula as defined in Manca and Marin (2010), equation 10.
  23.  * Note that the denominator in the third term in that equation has been corrected to
  24.  * \( (N_1 + N_2)^2 \). Two sum of cubed deviations (SC) can be combined using:
  25.  *
  26.  * <p>\[ SC(X) = {SC}_1 + {SC}_2 + \frac{3(m_1 - m_2)({s_1}^2 - {s_2}^2) N_1 N_2}{N_1 + N_2}
  27.  *                               + \frac{(m_1 - m_2)^3((N_2 - N_1) N_1 N_2}{(N_1 + N_2)^2} \]
  28.  *
  29.  * <p>where \( N \) is the group size, \( m \) is the mean, and \( s^2 \) is the biased variance
  30.  * such that \( s^2 * N \) is the sum of squared deviations from the mean. Note the term
  31.  * \( ({s_1}^2 - {s_2}^2) N_1 N_2 == (ss_1 * N_2 - ss_2 * N_1 \) where \( ss \) is the sum
  32.  * of square deviations.
  33.  *
  34.  * <p>If \( N_1 \) is size 1 this reduces to:
  35.  *
  36.  * <p>\[ SC_{N+1} = {SC}_N + \frac{3(x - m) -s^2 N}{N + 1}
  37.  *                         + \frac{(x - m)^3((N - 1) N}{(N + 1)^2} \]
  38.  *
  39.  * <p>where \( s^2 N \) is the sum of squared deviations.
  40.  * This updating formula is identical to that used in
  41.  * {@code org.apache.commons.math3.stat.descriptive.moment.ThirdMoment}.
  42.  *
  43.  * <p>Supports up to 2<sup>63</sup> (exclusive) observations.
  44.  * This implementation does not check for overflow of the count.
  45.  *
  46.  * <p><strong>Note that this implementation is not synchronized.</strong> If
  47.  * multiple threads access an instance of this class concurrently, and at least
  48.  * one of the threads invokes the {@link java.util.function.DoubleConsumer#accept(double) accept} or
  49.  * {@link StatisticAccumulator#combine(StatisticResult) combine} method, it must be synchronized externally.
  50.  *
  51.  * <p>However, it is safe to use {@link java.util.function.DoubleConsumer#accept(double) accept}
  52.  * and {@link StatisticAccumulator#combine(StatisticResult) combine}
  53.  * as {@code accumulator} and {@code combiner} functions of
  54.  * {@link java.util.stream.Collector Collector} on a parallel stream,
  55.  * because the parallel implementation of {@link java.util.stream.Stream#collect Stream.collect()}
  56.  * provides the necessary partitioning, isolation, and merging of results for
  57.  * safe and efficient parallel execution.
  58.  *
  59.  * <p>References:
  60.  * <ul>
  61.  *   <li>Manca and Marin (2020)
  62.  *       Decomposition of the Sum of Cubes, the Sum Raised to the
  63.  *       Power of Four and Codeviance.
  64.  *       Applied Mathematics, 11, 1013-1020.
  65.  *       <a href="https://doi.org/10.4236/am.2020.1110067">doi: 10.4236/am.2020.1110067</a>
  66.  * </ul>
  67.  *
  68.  * @since 1.1
  69.  */
  70. class SumOfCubedDeviations extends SumOfSquaredDeviations {
  71.     /** 2, the length limit where the sum-of-cubed deviations is zero. */
  72.     static final int LENGTH_TWO = 2;

  73.     /** Sum of cubed deviations of the values that have been added. */
  74.     protected double sumCubedDev;

  75.     /**
  76.      * Create an instance.
  77.      */
  78.     SumOfCubedDeviations() {
  79.         // No-op
  80.     }

  81.     /**
  82.      * Copy constructor.
  83.      *
  84.      * @param source Source to copy.
  85.      */
  86.     SumOfCubedDeviations(SumOfCubedDeviations source) {
  87.         super(source);
  88.         sumCubedDev = source.sumCubedDev;
  89.     }

  90.     /**
  91.      * Create an instance with the given sum of cubed and squared deviations.
  92.      *
  93.      * @param sc Sum of cubed deviations.
  94.      * @param ss Sum of squared deviations.
  95.      */
  96.     SumOfCubedDeviations(double sc, SumOfSquaredDeviations ss) {
  97.         super(ss);
  98.         this.sumCubedDev = sc;
  99.     }

  100.     /**
  101.      * Create an instance with the given sum of cubed and squared deviations,
  102.      * and first moment.
  103.      *
  104.      * @param sc Sum of cubed deviations.
  105.      * @param ss Sum of squared deviations.
  106.      * @param m1 First moment.
  107.      * @param n Count of values.
  108.      */
  109.     SumOfCubedDeviations(double sc, double ss, double m1, long n) {
  110.         super(ss, m1, n);
  111.         this.sumCubedDev = sc;
  112.     }

  113.     /**
  114.      * Returns an instance populated using the input {@code values}.
  115.      *
  116.      * <p>Note: {@code SumOfCubedDeviations} computed using {@link #accept(double) accept} may be
  117.      * different from this instance.
  118.      *
  119.      * @param values Values.
  120.      * @return {@code SumOfCubedDeviations} instance.
  121.      */
  122.     static SumOfCubedDeviations of(double... values) {
  123.         if (values.length == 0) {
  124.             return new SumOfCubedDeviations();
  125.         }
  126.         return create(SumOfSquaredDeviations.of(values), values, 0, values.length);
  127.     }

  128.     /**
  129.      * Returns an instance populated using the specified range of {@code values}.
  130.      *
  131.      * <p>Note: {@code SumOfCubedDeviations} computed using {@link #accept(double) accept} may be
  132.      * different from this instance.
  133.      *
  134.      * <p>Warning: No range checks are performed.
  135.      *
  136.      * @param values Values.
  137.      * @param from Inclusive start of the range.
  138.      * @param to Exclusive end of the range.
  139.      * @return {@code SumOfCubedDeviations} instance.
  140.      */
  141.     static SumOfCubedDeviations ofRange(double[] values, int from, int to) {
  142.         if (from == to) {
  143.             return new SumOfCubedDeviations();
  144.         }
  145.         return create(SumOfSquaredDeviations.ofRange(values, from, to), values, from, to);
  146.     }

  147.     /**
  148.      * Creates the sum of cubed deviations.
  149.      *
  150.      * <p>Uses the provided {@code sum} to create the first moment.
  151.      * This method is used by {@link DoubleStatistics} using a sum that can be reused
  152.      * for the {@link Sum} statistic.
  153.      *
  154.      * <p>Warning: No range checks are performed.
  155.      *
  156.      * @param sum Sum of the values.
  157.      * @param values Values.
  158.      * @param from Inclusive start of the range.
  159.      * @param to Exclusive end of the range.
  160.      * @return {@code SumOfCubedDeviations} instance.
  161.      */
  162.     static SumOfCubedDeviations createFromRange(org.apache.commons.numbers.core.Sum sum,
  163.                                                 double[] values, int from, int to) {
  164.         if (from == to) {
  165.             return new SumOfCubedDeviations();
  166.         }
  167.         return create(SumOfSquaredDeviations.createFromRange(sum, values, from, to), values, from, to);
  168.     }

  169.     /**
  170.      * Creates the sum of cubed deviations.
  171.      *
  172.      * @param ss Sum of squared deviations.
  173.      * @param values Values.
  174.      * @param from Inclusive start of the range.
  175.      * @param to Exclusive end of the range.
  176.      * @return {@code SumOfCubedDeviations} instance.
  177.      */
  178.     private static SumOfCubedDeviations create(SumOfSquaredDeviations ss, double[] values, int from, int to) {
  179.         // Edge cases
  180.         final double xbar = ss.getFirstMoment();
  181.         if (!Double.isFinite(xbar)) {
  182.             return new SumOfCubedDeviations(Double.NaN, ss);
  183.         }
  184.         if (!Double.isFinite(ss.sumSquaredDev)) {
  185.             // Note: If the sum-of-squared (SS) overflows then the same deviations when cubed
  186.             // will overflow. The *smallest* deviation to overflow SS is a full-length array of
  187.             // +/- values around a mean of zero, or approximately sqrt(MAX_VALUE / 2^31) = 2.89e149.
  188.             // In this case the sum cubed could be finite due to cancellation
  189.             // but this cannot be computed. Only a small array can be known to be zero.
  190.             return new SumOfCubedDeviations(ss.n <= LENGTH_TWO ? 0 : Double.NaN, ss);
  191.         }
  192.         // Compute the sum of cubed deviations.
  193.         double s = 0;
  194.         // n=1: no deviation
  195.         // n=2: the two deviations from the mean are equal magnitude
  196.         // and opposite sign. So the sum-of-cubed deviations is zero.
  197.         if (ss.n > LENGTH_TWO) {
  198.             for (int i = from; i < to; i++) {
  199.                 s += pow3(values[i] - xbar);
  200.             }
  201.         }
  202.         return new SumOfCubedDeviations(s, ss);
  203.     }

  204.     /**
  205.      * Returns an instance populated using the input {@code values}.
  206.      *
  207.      * <p>Note: {@code SumOfCubedDeviations} computed using {@link #accept(double) accept} may be
  208.      * different from this instance.
  209.      *
  210.      * @param values Values.
  211.      * @return {@code SumOfCubedDeviations} instance.
  212.      */
  213.     static SumOfCubedDeviations of(int... values) {
  214.         return ofRange(values, 0, values.length);
  215.     }

  216.     /**
  217.      * Returns an instance populated using the specified range of {@code values}.
  218.      *
  219.      * <p>Note: {@code SumOfCubedDeviations} computed using {@link #accept(double) accept} may be
  220.      * different from this instance.
  221.      *
  222.      * <p>Warning: No range checks are performed.
  223.      *
  224.      * @param values Values.
  225.      * @param from Inclusive start of the range.
  226.      * @param to Exclusive end of the range.
  227.      * @return {@code SumOfCubedDeviations} instance.
  228.      */
  229.     static SumOfCubedDeviations ofRange(int[] values, int from, int to) {
  230.         // Logic shared with the double[] version with int[] lower order moments
  231.         if (from == to) {
  232.             return new SumOfCubedDeviations();
  233.         }
  234.         final IntVariance variance = IntVariance.createFromRange(values, from, to);
  235.         final double xbar = variance.computeMean();
  236.         final double ss = variance.computeSumOfSquaredDeviations();

  237.         double sc = 0;
  238.         if (to - from > LENGTH_TWO) {
  239.             for (int i = from; i < to; i++) {
  240.                 sc += pow3(values[i] - xbar);
  241.             }
  242.         }
  243.         return new SumOfCubedDeviations(sc, ss, xbar, to - from);
  244.     }

  245.     /**
  246.      * Returns an instance populated using the input {@code values}.
  247.      *
  248.      * <p>Note: {@code SumOfCubedDeviations} computed using {@link #accept(double) accept} may be
  249.      * different from this instance.
  250.      *
  251.      * @param values Values.
  252.      * @return {@code SumOfCubedDeviations} instance.
  253.      */
  254.     static SumOfCubedDeviations of(long... values) {
  255.         return ofRange(values, 0, values.length);
  256.     }

  257.     /**
  258.      * Returns an instance populated using the specified range of {@code values}.
  259.      *
  260.      * <p>Note: {@code SumOfCubedDeviations} computed using {@link #accept(double) accept} may be
  261.      * different from this instance.
  262.      *
  263.      * <p>Warning: No range checks are performed.
  264.      *
  265.      * @param values Values.
  266.      * @param from Inclusive start of the range.
  267.      * @param to Exclusive end of the range.
  268.      * @return {@code SumOfCubedDeviations} instance.
  269.      * @throws IndexOutOfBoundsException if the sub-range is out of bounds
  270.      */
  271.     static SumOfCubedDeviations ofRange(long[] values, int from, int to) {
  272.         // Logic shared with the double[] version with int[] lower order moments
  273.         if (from == to) {
  274.             return new SumOfCubedDeviations();
  275.         }
  276.         final LongVariance variance = LongVariance.createFromRange(values, from, to);
  277.         final double xbar = variance.computeMean();
  278.         final double ss = variance.computeSumOfSquaredDeviations();

  279.         double sc = 0;
  280.         if (to - from > LENGTH_TWO) {
  281.             for (int i = from; i < to; i++) {
  282.                 sc += pow3(values[i] - xbar);
  283.             }
  284.         }
  285.         return new SumOfCubedDeviations(sc, ss, xbar, to - from);
  286.     }

  287.     /**
  288.      * Compute {@code x^3}.
  289.      * Uses compound multiplication.
  290.      *
  291.      * @param x Value.
  292.      * @return x^3
  293.      */
  294.     private static double pow3(double x) {
  295.         return x * x * x;
  296.     }

  297.     /**
  298.      * Updates the state of the statistic to reflect the addition of {@code value}.
  299.      *
  300.      * @param value Value.
  301.      */
  302.     @Override
  303.     public void accept(double value) {
  304.         // Require current s^2 * N == sum-of-square deviations
  305.         final double ss = sumSquaredDev;
  306.         final double np = n;
  307.         super.accept(value);
  308.         // Terms are arranged so that values that may be zero
  309.         // (np, ss) are first. This will cancel any overflow in
  310.         // multiplication of later terms (nDev * 3 and nDev^2).
  311.         // This handles initialisation when np in {0, 1) to zero
  312.         // for any deviation (e.g. series MAX_VALUE, -MAX_VALUE).
  313.         // Note: account for the half-deviation representation by scaling by 6=3*2; 8=2^3
  314.         sumCubedDev = sumCubedDev -
  315.             ss * nDev * 6 +
  316.             (np - 1.0) * np * nDev * nDev * dev * 8;
  317.     }

  318.     /**
  319.      * Gets the sum of cubed deviations of all input values.
  320.      *
  321.      * <p>Note that the sum is subject to cancellation of potentially large
  322.      * positive and negative terms. A non-finite result may be returned
  323.      * due to intermediate overflow when the exact result may be a representable
  324.      * {@code double}.
  325.      *
  326.      * <p>Note: Any non-finite result should be considered a failed computation.
  327.      * The result is returned as computed and not consolidated to a single NaN.
  328.      * This is done for testing purposes to allow the result to be reported.
  329.      * In particular the sign of an infinity may not indicate the direction
  330.      * of the asymmetry (if any), only the direction of the first overflow in the
  331.      * computation. In the event of further overflow of a term to an opposite signed
  332.      * infinity the sum will be {@code NaN}.
  333.      *
  334.      * @return sum of cubed deviations of all values.
  335.      */
  336.     double getSumOfCubedDeviations() {
  337.         return Double.isFinite(getFirstMoment()) ? sumCubedDev : Double.NaN;
  338.     }

  339.     /**
  340.      * Combines the state of another {@code SumOfCubedDeviations} into this one.
  341.      *
  342.      * @param other Another {@code SumOfCubedDeviations} to be combined.
  343.      * @return {@code this} instance after combining {@code other}.
  344.      */
  345.     SumOfCubedDeviations combine(SumOfCubedDeviations other) {
  346.         if (n == 0) {
  347.             sumCubedDev = other.sumCubedDev;
  348.         } else if (other.n != 0) {
  349.             // Avoid overflow to compute the difference.
  350.             // This allows any samples of size n=1 to be combined as their SS=0.
  351.             // The result is a SC=0 for the combined n=2.
  352.             final double halfDiffOfMean = getFirstMomentHalfDifference(other);
  353.             sumCubedDev += other.sumCubedDev;
  354.             // Add additional terms that do not cancel to zero
  355.             if (halfDiffOfMean != 0) {
  356.                 final double n1 = n;
  357.                 final double n2 = other.n;
  358.                 if (n1 == n2) {
  359.                     // Optimisation where sizes are equal in double-precision.
  360.                     // This is of use in JDK streams as spliterators use a divide by two
  361.                     // strategy for parallel streams.
  362.                     sumCubedDev += (sumSquaredDev - other.sumSquaredDev) * halfDiffOfMean * 3;
  363.                 } else {
  364.                     final double n1n2 = n1 + n2;
  365.                     final double dm = 2 * (halfDiffOfMean / n1n2);
  366.                     sumCubedDev += (sumSquaredDev * n2 - other.sumSquaredDev * n1) * dm * 3 +
  367.                                    (n2 - n1) * (n1 * n2) * pow3(dm) * n1n2;
  368.                 }
  369.             }
  370.         }
  371.         super.combine(other);
  372.         return this;
  373.     }
  374. }