SumOfFourthDeviations.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.apache.commons.statistics.descriptive;

  18. /**
  19.  * Computes the sum of fourth deviations from the sample mean. This
  20.  * statistic is related to the fourth moment.
  21.  *
  22.  * <p>Uses a recursive updating formula as defined in Manca and Marin (2010), equation 16.
  23.  * Note that third term in that equation has been corrected by expansion of the same term
  24.  * from equation 15. Two sum of fourth (quad) deviations (Sq) can be combined using:
  25.  *
  26.  * <p>\[ Sq(X) = {Sq}_1 + {Sq}_2 + \frac{4(m_1 - m_2)(g_1 - g_2) N_1 N_2}{N_1 + N_2}
  27.  *                               + \frac{6(m_1 - m_2)^2(N_2^2 ss_1 + N_1^2 ss_2)}{(N_1 + N_2)^2}
  28.  *                               + \frac{(m_1 - m_2)^4((N_1^2 - N_1 N_2 + N_2^2) N_1 N_2}{(N_1 + N_2)^3} \]
  29.  *
  30.  * <p>where \( N \) is the group size, \( m \) is the mean, \( ss \) is
  31.  * the sum of squared deviations from the mean, and \( g \)
  32.  * is the asymmetrical index where \( g * N \) is the sum of fourth deviations from the mean.
  33.  * Note the term \( ({g_1} - {g_2}) N_1 N_2 == (sc_1 * N_2 - sc_2 * N_1 \)
  34.  * where \( sc \) is the sum of fourth deviations.
  35.  *
  36.  * <p>If \( N_1 \) is size 1 this reduces to:
  37.  *
  38.  * <p>\[ SC_{N+1} = {SC}_N + \frac{4(x - m) -sc}{N + 1}
  39.  *                         + \frac{6(x - m)^2 ss}{(N + 1)^2}
  40.  *                         + \frac{(x - m)^4((1 - N + N^2) N}{(N + 1)^3} \]
  41.  *
  42.  * <p>where \( ss \) is the sum of squared deviations, and \( sc \) is the sum of
  43.  * fourth deviations. This updating formula is identical to that used in
  44.  * {@code org.apache.commons.math3.stat.descriptive.moment.FourthMoment}. The final term
  45.  * uses a rearrangement \( (1 - N + N^2) = (N+1)^2 - 3N \).
  46.  *
  47.  * <p>Supports up to 2<sup>63</sup> (exclusive) observations.
  48.  * This implementation does not check for overflow of the count.
  49.  *
  50.  * <p><strong>Note that this implementation is not synchronized.</strong> If
  51.  * multiple threads access an instance of this class concurrently, and at least
  52.  * one of the threads invokes the {@link java.util.function.DoubleConsumer#accept(double) accept} or
  53.  * {@link StatisticAccumulator#combine(StatisticResult) combine} method, it must be synchronized externally.
  54.  *
  55.  * <p>However, it is safe to use {@link java.util.function.DoubleConsumer#accept(double) accept}
  56.  * and {@link StatisticAccumulator#combine(StatisticResult) combine}
  57.  * as {@code accumulator} and {@code combiner} functions of
  58.  * {@link java.util.stream.Collector Collector} on a parallel stream,
  59.  * because the parallel implementation of {@link java.util.stream.Stream#collect Stream.collect()}
  60.  * provides the necessary partitioning, isolation, and merging of results for
  61.  * safe and efficient parallel execution.
  62.  *
  63.  * <p>References:
  64.  * <ul>
  65.  *   <li>Manca and Marin (2020)
  66.  *       Decomposition of the Sum of Cubes, the Sum Raised to the
  67.  *       Power of Four and Codeviance.
  68.  *       Applied Mathematics, 11, 1013-1020.
  69.  *       <a href="https://doi.org/10.4236/am.2020.1110067">doi: 10.4236/am.2020.1110067</a>
  70.  * </ul>
  71.  *
  72.  * @since 1.1
  73.  */
  74. class SumOfFourthDeviations extends SumOfCubedDeviations {
  75.     /** Sum of forth deviations of the values that have been added. */
  76.     private double sumFourthDev;

  77.     /**
  78.      * Create an instance.
  79.      */
  80.     SumOfFourthDeviations() {
  81.         // No-op
  82.     }

  83.     /**
  84.      * Create an instance with the given sum of fourth and squared deviations.
  85.      *
  86.      * @param sq Sum of fourth (quad) deviations.
  87.      * @param sc Sum of fourth deviations.
  88.      */
  89.     private SumOfFourthDeviations(double sq, SumOfCubedDeviations sc) {
  90.         super(sc);
  91.         this.sumFourthDev = sq;
  92.     }

  93.     /**
  94.      * Create an instance with the given sum of cubed and squared deviations,
  95.      * and first moment.
  96.      *
  97.      * @param sq Sum of fouth deviations.
  98.      * @param sc Sum of cubed deviations.
  99.      * @param ss Sum of squared deviations.
  100.      * @param m1 First moment.
  101.      * @param n Count of values.
  102.      */
  103.     private SumOfFourthDeviations(double sq, double sc, double ss, double m1, long n) {
  104.         super(sc, ss, m1, n);
  105.         this.sumFourthDev = sq;
  106.     }

  107.     /**
  108.      * Returns an instance populated using the input {@code values}.
  109.      *
  110.      * <p>Note: {@code SumOfFourthDeviations} computed using {@link #accept accept} may be
  111.      * different from this instance.
  112.      *
  113.      * @param values Values.
  114.      * @return {@code SumOfFourthDeviations} instance.
  115.      */
  116.     static SumOfFourthDeviations of(double... values) {
  117.         if (values.length == 0) {
  118.             return new SumOfFourthDeviations();
  119.         }
  120.         return create(SumOfCubedDeviations.of(values), values, 0, values.length);
  121.     }

  122.     /**
  123.      * Returns an instance populated using the specified range of {@code values}.
  124.      *
  125.      * <p>Note: {@code SumOfFourthDeviations} computed using {@link #accept accept} may be
  126.      * different from this instance.
  127.      *
  128.      * <p>Warning: No range checks are performed.
  129.      *
  130.      * @param values Values.
  131.      * @param from Inclusive start of the range.
  132.      * @param to Exclusive end of the range.
  133.      * @return {@code SumOfFourthDeviations} instance.
  134.      * @since 1.2
  135.      */
  136.     static SumOfFourthDeviations ofRange(double[] values, int from, int to) {
  137.         if (from == to) {
  138.             return new SumOfFourthDeviations();
  139.         }
  140.         return create(SumOfCubedDeviations.ofRange(values, from, to), values, from, to);
  141.     }

  142.     /**
  143.      * Creates the sum of fourth deviations.
  144.      *
  145.      * <p>Uses the provided {@code sum} to create the first moment.
  146.      * This method is used by {@link DoubleStatistics} using a sum that can be reused
  147.      * for the {@link Sum} statistic.
  148.      *
  149.      * <p>Warning: No range checks are performed.
  150.      *
  151.      * @param sum Sum of the values.
  152.      * @param values Values.
  153.      * @param from Inclusive start of the range.
  154.      * @param to Exclusive end of the range.
  155.      * @return {@code SumOfFourthDeviations} instance.
  156.      */
  157.     static SumOfFourthDeviations createFromRange(org.apache.commons.numbers.core.Sum sum,
  158.                                                  double[] values, int from, int to) {
  159.         if (from == to) {
  160.             return new SumOfFourthDeviations();
  161.         }
  162.         return create(SumOfCubedDeviations.createFromRange(sum, values, from, to), values, from, to);
  163.     }

  164.     /**
  165.      * Creates the sum of fourth deviations.
  166.      *
  167.      * @param sc Sum of cubed deviations.
  168.      * @param values Values.
  169.      * @param from Inclusive start of the range.
  170.      * @param to Exclusive end of the range.
  171.      * @return {@code SumOfFourthDeviations} instance.
  172.      */
  173.     private static SumOfFourthDeviations create(SumOfCubedDeviations sc, double[] values, int from, int to) {
  174.         // Edge cases
  175.         final double xbar = sc.getFirstMoment();
  176.         if (!Double.isFinite(xbar) ||
  177.             !Double.isFinite(sc.sumSquaredDev) ||
  178.             !Double.isFinite(sc.sumCubedDev)) {
  179.             // Overflow computing lower order deviations will overflow
  180.             return new SumOfFourthDeviations(Double.NaN, sc);
  181.         }
  182.         // Compute the sum of fourth (quad) deviations.
  183.         // Note: This handles n=1.
  184.         double s = 0;
  185.         for (int i = from; i < to; i++) {
  186.             s += pow4(values[i] - xbar);
  187.         }
  188.         return new SumOfFourthDeviations(s, sc);
  189.     }

  190.     /**
  191.      * Compute {@code x^4}.
  192.      * Uses compound multiplication.
  193.      *
  194.      * @param x Value.
  195.      * @return x^4
  196.      */
  197.     private static double pow4(double x) {
  198.         final double x2 = x * x;
  199.         return x2 * x2;
  200.     }

  201.     /**
  202.      * Returns an instance populated using the input {@code values}.
  203.      *
  204.      * <p>Note: {@code SumOfFourthDeviations} computed using {@link #accept(double) accept} may be
  205.      * different from this instance.
  206.      *
  207.      * @param values Values.
  208.      * @return {@code SumOfCubedDeviations} instance.
  209.      */
  210.     static SumOfFourthDeviations of(int... values) {
  211.         return ofRange(values, 0, values.length);
  212.     }

  213.     /**
  214.      * Returns an instance populated using the specified range of {@code values}.
  215.      *
  216.      * <p>Note: {@code SumOfFourthDeviations} computed using {@link #accept(double) accept} may be
  217.      * different from this instance.
  218.      *
  219.      * <p>Warning: No range checks are performed.
  220.      *
  221.      * @param values Values.
  222.      * @param from Inclusive start of the range.
  223.      * @param to Exclusive end of the range.
  224.      * @return {@code SumOfFourthDeviations} instance.
  225.      */
  226.     static SumOfFourthDeviations ofRange(int[] values, int from, int to) {
  227.         // Logic shared with the double[] version with int[] lower order moments
  228.         if (from == to) {
  229.             return new SumOfFourthDeviations();
  230.         }
  231.         final IntVariance variance = IntVariance.createFromRange(values, from, to);
  232.         final double xbar = variance.computeMean();
  233.         final double ss = variance.computeSumOfSquaredDeviations();
  234.         // Unlike the double[] case, overflow/NaN is not possible:
  235.         // (max value)^4 times max array length ~ (2^31)^4 * 2^31 ~ 2^155.
  236.         // Compute sum of cubed and fourth deviations together.
  237.         double sc = 0;
  238.         double sq = 0;
  239.         for (int i = from; i < to; i++) {
  240.             final double x = values[i] - xbar;
  241.             final double x2 = x * x;
  242.             sc += x2 * x;
  243.             sq += x2 * x2;
  244.         }
  245.         // Edge case to avoid floating-point error for zero
  246.         if (to - from <= LENGTH_TWO) {
  247.             sc = 0;
  248.         }
  249.         return new SumOfFourthDeviations(sq, sc, ss, xbar, to - from);
  250.     }

  251.     /**
  252.      * Returns an instance populated using the input {@code values}.
  253.      *
  254.      * <p>Note: {@code SumOfFourthDeviations} computed using {@link #accept(double) accept} may be
  255.      * different from this instance.
  256.      *
  257.      * @param values Values.
  258.      * @return {@code SumOfFourthDeviations} instance.
  259.      */
  260.     static SumOfFourthDeviations of(long... values) {
  261.         return ofRange(values, 0, values.length);
  262.     }

  263.     /**
  264.      * Returns an instance populated using the specified range of {@code values}.
  265.      *
  266.      * <p>Note: {@code SumOfFourthDeviations} computed using {@link #accept(double) accept} may be
  267.      * different from this instance.
  268.      *
  269.      * <p>Warning: No range checks are performed.
  270.      *
  271.      * @param values Values.
  272.      * @param from Inclusive start of the range.
  273.      * @param to Exclusive end of the range.
  274.      * @return {@code SumOfFourthDeviations} instance.
  275.      */
  276.     static SumOfFourthDeviations ofRange(long[] values, int from, int to) {
  277.         // Logic shared with the double[] version with long[] lower order moments
  278.         if (from == to) {
  279.             return new SumOfFourthDeviations();
  280.         }
  281.         final LongVariance variance = LongVariance.createFromRange(values, from, to);
  282.         final double xbar = variance.computeMean();
  283.         final double ss = variance.computeSumOfSquaredDeviations();
  284.         // Unlike the double[] case, overflow/NaN is not possible:
  285.         // (max value)^4 times max array length ~ (2^31)^4 * 2^31 ~ 2^155.
  286.         // Compute sum of cubed and fourth deviations together.
  287.         double sc = 0;
  288.         double sq = 0;
  289.         for (int i = from; i < to; i++) {
  290.             final double x = values[i] - xbar;
  291.             final double x2 = x * x;
  292.             sc += x2 * x;
  293.             sq += x2 * x2;
  294.         }
  295.         // Edge case to avoid floating-point error for zero
  296.         if (to - from <= LENGTH_TWO) {
  297.             sc = 0;
  298.         }
  299.         return new SumOfFourthDeviations(sq, sc, ss, xbar, to - from);
  300.     }

  301.     /**
  302.      * Updates the state of the statistic to reflect the addition of {@code value}.
  303.      *
  304.      * @param value Value.
  305.      */
  306.     @Override
  307.     public void accept(double value) {
  308.         // Require current s^2 * N == sum-of-square deviations
  309.         // Require current g * N == sum-of-fourth deviations
  310.         final double ss = sumSquaredDev;
  311.         final double sc = sumCubedDev;
  312.         final double np = n;
  313.         super.accept(value);
  314.         // Terms are arranged so that values that may be zero
  315.         // (np, ss, sc) are first. This will cancel any overflow in
  316.         // multiplication of later terms (nDev * 4, nDev^2, nDev^4).
  317.         // This handles initialisation when np in {0, 1) to zero
  318.         // for any deviation (e.g. series MAX_VALUE, -MAX_VALUE).
  319.         // Note: (np1 * np1 - 3 * np) = (np+1)^2 - 3np = np^2 - np + 1
  320.         // Note: account for the half-deviation representation by scaling by 8=4*2; 24=6*2^2; 16=2^4
  321.         final double np1 = n;
  322.         sumFourthDev = sumFourthDev -
  323.             sc * nDev * 8 +
  324.             ss * nDev * nDev * 24 +
  325.             np * (np1 * np1 - 3 * np) * nDev * nDev * nDev * dev * 16;
  326.     }

  327.     /**
  328.      * Gets the sum of fourth deviations of all input values.
  329.      *
  330.      * <p>Note that the result should be positive. However the updating sum is subject to
  331.      * cancellation of potentially large positive and negative terms. Overflow of these
  332.      * terms can result in a sum of opposite signed infinities and a {@code NaN} result
  333.      * for finite input values where the correct result is positive infinity.
  334.      *
  335.      * <p>Note: Any non-finite result should be considered a failed computation. The
  336.      * result is returned as computed and not consolidated to a single NaN. This is done
  337.      * for testing purposes to allow the result to be reported. It is possible to track
  338.      * input values to finite/non-finite (e.g. using bit mask manipulation of the exponent
  339.      * field). However this statistic in currently used in the kurtosis and in the case
  340.      * of failed computation distinguishing a non-finite result is not useful.
  341.      *
  342.      * @return sum of fourth deviations of all values.
  343.      */
  344.     double getSumOfFourthDeviations() {
  345.         return Double.isFinite(getFirstMoment()) ? sumFourthDev : Double.NaN;
  346.     }

  347.     /**
  348.      * Combines the state of another {@code SumOfFourthDeviations} into this one.
  349.      *
  350.      * @param other Another {@code SumOfFourthDeviations} to be combined.
  351.      * @return {@code this} instance after combining {@code other}.
  352.      */
  353.     SumOfFourthDeviations combine(SumOfFourthDeviations other) {
  354.         if (n == 0) {
  355.             sumFourthDev = other.sumFourthDev;
  356.         } else if (other.n != 0) {
  357.             // Avoid overflow to compute the difference.
  358.             final double halfDiffOfMean = getFirstMomentHalfDifference(other);
  359.             sumFourthDev += other.sumFourthDev;
  360.             // Add additional terms that do not cancel to zero
  361.             if (halfDiffOfMean != 0) {
  362.                 final double n1 = n;
  363.                 final double n2 = other.n;
  364.                 if (n1 == n2) {
  365.                     // Optimisation where sizes are equal in double-precision.
  366.                     // This is of use in JDK streams as spliterators use a divide by two
  367.                     // strategy for parallel streams.
  368.                     // Note: (n1 * n2) * ((n1+n2)^2 - 3 * (n1 * n2)) == n^4
  369.                     sumFourthDev +=
  370.                         (sumCubedDev - other.sumCubedDev) * halfDiffOfMean * 4 +
  371.                         (sumSquaredDev + other.sumSquaredDev) * (halfDiffOfMean * halfDiffOfMean) * 6 +
  372.                         pow4(halfDiffOfMean) * n1 * 2;
  373.                 } else {
  374.                     final double n1n2 = n1 + n2;
  375.                     final double dm = 2 * (halfDiffOfMean / n1n2);
  376.                     // Use the rearrangement for parity with the accept method
  377.                     // n1*n1 - n1*n2 + n2*n2 == (n1+n2)^2 - 3*n1*n2
  378.                     sumFourthDev +=
  379.                         (sumCubedDev * n2 - other.sumCubedDev * n1) * dm * 4 +
  380.                         (n2 * n2 * sumSquaredDev + n1 * n1 * other.sumSquaredDev) * (dm * dm) * 6 +
  381.                         (n1 * n2) * (n1n2 * n1n2 - 3 * (n1 * n2)) * pow4(dm) * n1n2;
  382.                 }
  383.             }
  384.         }
  385.         super.combine(other);
  386.         return this;
  387.     }
  388. }