WilcoxonSignedRankTest.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.apache.commons.statistics.inference;

  18. import java.util.Arrays;
  19. import java.util.EnumSet;
  20. import java.util.Objects;
  21. import org.apache.commons.numbers.core.Sum;
  22. import org.apache.commons.statistics.distribution.NormalDistribution;
  23. import org.apache.commons.statistics.ranking.NaNStrategy;
  24. import org.apache.commons.statistics.ranking.NaturalRanking;
  25. import org.apache.commons.statistics.ranking.RankingAlgorithm;
  26. import org.apache.commons.statistics.ranking.TiesStrategy;

  27. /**
  28.  * Implements the Wilcoxon signed-rank test.
  29.  *
  30.  * @see <a href="https://en.wikipedia.org/wiki/Wilcoxon_signed-rank_test">Wilcoxon signed-rank test (Wikipedia)</a>
  31.  * @since 1.1
  32.  */
  33. public final class WilcoxonSignedRankTest {
  34.     /** Limit on sample size for the exact p-value computation. */
  35.     private static final int EXACT_LIMIT = 1023;
  36.     /** Limit on sample size for the exact p-value computation for the auto mode. */
  37.     private static final int AUTO_LIMIT = 50;
  38.     /** Ranking instance. */
  39.     private static final RankingAlgorithm RANKING = new NaturalRanking(NaNStrategy.FAILED, TiesStrategy.AVERAGE);
  40.     /** Default instance. */
  41.     private static final WilcoxonSignedRankTest DEFAULT = new WilcoxonSignedRankTest(
  42.         AlternativeHypothesis.TWO_SIDED, PValueMethod.AUTO, true, 0);

  43.     /** Alternative hypothesis. */
  44.     private final AlternativeHypothesis alternative;
  45.     /** Method to compute the p-value. */
  46.     private final PValueMethod pValueMethod;
  47.     /** Perform continuity correction. */
  48.     private final boolean continuityCorrection;
  49.     /** Expected location shift. */
  50.     private final double mu;

  51.     /**
  52.      * Result for the Wilcoxon signed-rank test.
  53.      *
  54.      * <p>This class is immutable.
  55.      *
  56.      * @since 1.1
  57.      */
  58.     public static final class Result extends BaseSignificanceResult {
  59.         /** Flag indicating the data had tied values. */
  60.         private final boolean tiedValues;
  61.         /** Flag indicating the data had zero values. */
  62.         private final boolean zeroValues;

  63.         /**
  64.          * Create an instance.
  65.          *
  66.          * @param statistic Test statistic.
  67.          * @param tiedValues Flag indicating the data had tied values.
  68.          * @param zeroValues Flag indicating the data had zero values.
  69.          * @param p Result p-value.
  70.          */
  71.         Result(double statistic, boolean tiedValues, boolean zeroValues, double p) {
  72.             super(statistic, p);
  73.             this.tiedValues = tiedValues;
  74.             this.zeroValues = zeroValues;
  75.         }

  76.         /**
  77.          * Return {@code true} if the data had tied values (with equal ranks).
  78.          *
  79.          * <p>Note: The exact computation cannot be used when there are tied values.
  80.          * The p-value uses the asymptotic approximation using a tie correction.
  81.          *
  82.          * @return {@code true} if there were tied values
  83.          */
  84.         public boolean hasTiedValues() {
  85.             return tiedValues;
  86.         }

  87.         /**
  88.          * Return {@code true} if the data had zero values. This occurs when the differences between
  89.          * sample values matched the expected location shift: {@code z = x - y == mu}.
  90.          *
  91.          * <p>Note: The exact computation cannot be used when there are zero values.
  92.          * The p-value uses the asymptotic approximation.
  93.          *
  94.          * @return {@code true} if there were zero values
  95.          */
  96.         public boolean hasZeroValues() {
  97.             return zeroValues;
  98.         }
  99.     }

  100.     /**
  101.      * @param alternative Alternative hypothesis.
  102.      * @param method P-value method.
  103.      * @param continuityCorrection true to perform continuity correction.
  104.      * @param mu Expected location shift.
  105.      */
  106.     private WilcoxonSignedRankTest(AlternativeHypothesis alternative, PValueMethod method,
  107.         boolean continuityCorrection, double mu) {
  108.         this.alternative = alternative;
  109.         this.pValueMethod = method;
  110.         this.continuityCorrection = continuityCorrection;
  111.         this.mu = mu;
  112.     }

  113.     /**
  114.      * Return an instance using the default options.
  115.      *
  116.      * <ul>
  117.      * <li>{@link AlternativeHypothesis#TWO_SIDED}
  118.      * <li>{@link PValueMethod#AUTO}
  119.      * <li>{@link ContinuityCorrection#ENABLED}
  120.      * <li>{@linkplain #withMu(double) mu = 0}
  121.      * </ul>
  122.      *
  123.      * @return default instance
  124.      */
  125.     public static WilcoxonSignedRankTest withDefaults() {
  126.         return DEFAULT;
  127.     }

  128.     /**
  129.      * Return an instance with the configured alternative hypothesis.
  130.      *
  131.      * @param v Value.
  132.      * @return an instance
  133.      */
  134.     public WilcoxonSignedRankTest with(AlternativeHypothesis v) {
  135.         return new WilcoxonSignedRankTest(Objects.requireNonNull(v), pValueMethod, continuityCorrection, mu);
  136.     }

  137.     /**
  138.      * Return an instance with the configured p-value method.
  139.      *
  140.      * @param v Value.
  141.      * @return an instance
  142.      * @throws IllegalArgumentException if the value is not in the allowed options or is null
  143.      */
  144.     public WilcoxonSignedRankTest with(PValueMethod v) {
  145.         return new WilcoxonSignedRankTest(alternative,
  146.             Arguments.checkOption(v, EnumSet.of(PValueMethod.AUTO, PValueMethod.EXACT, PValueMethod.ASYMPTOTIC)),
  147.             continuityCorrection, mu);
  148.     }

  149.     /**
  150.      * Return an instance with the configured continuity correction.
  151.      *
  152.      * <p>If {@code enabled}, adjust the Wilcoxon rank statistic by 0.5 towards the
  153.      * mean value when computing the z-statistic if a normal approximation is used
  154.      * to compute the p-value.
  155.      *
  156.      * @param v Value.
  157.      * @return an instance
  158.      */
  159.     public WilcoxonSignedRankTest with(ContinuityCorrection v) {
  160.         return new WilcoxonSignedRankTest(alternative, pValueMethod,
  161.             Objects.requireNonNull(v) == ContinuityCorrection.ENABLED, mu);
  162.     }

  163.     /**
  164.      * Return an instance with the configured expected difference {@code mu}.
  165.      *
  166.      * @param v Value.
  167.      * @return an instance
  168.      * @throws IllegalArgumentException if the value is not finite
  169.      */
  170.     public WilcoxonSignedRankTest withMu(double v) {
  171.         return new WilcoxonSignedRankTest(alternative, pValueMethod, continuityCorrection, Arguments.checkFinite(v));
  172.     }

  173.     /**
  174.      * Computes the Wilcoxon signed ranked statistic comparing the differences between
  175.      * sample values {@code z = x - y} to {@code mu}.
  176.      *
  177.      * <p>This method handles matching samples {@code z[i] == mu} (no difference)
  178.      * by including them in the ranking of samples but excludes them from the test statistic
  179.      * (<i>signed-rank zero procedure</i>).
  180.      *
  181.      * @param z Signed differences between sample values.
  182.      * @return Wilcoxon <i>positive-rank sum</i> statistic (W+)
  183.      * @throws IllegalArgumentException if {@code z} is zero-length; contains NaN values;
  184.      * or all differences are equal to the expected difference
  185.      * @see #withMu(double)
  186.      */
  187.     public double statistic(double[] z) {
  188.         return computeStatistic(z, mu);
  189.     }

  190.     /**
  191.      * Computes the Wilcoxon signed ranked statistic comparing the differences between two related
  192.      * samples or repeated measurements on a single sample.
  193.      *
  194.      * <p>This method handles matching samples {@code x[i] - mu == y[i]} (no difference)
  195.      * by including them in the ranking of samples but excludes them from the test statistic
  196.      * (<i>signed-rank zero procedure</i>).
  197.      *
  198.      * <p>This method is functionally equivalent to creating an array of differences
  199.      * {@code z = x - y} and calling {@link #statistic(double[]) statistic(z)}; the
  200.      * implementation may use an optimised method to compute the differences and
  201.      * rank statistic if {@code mu != 0}.
  202.      *
  203.      * @param x First sample values.
  204.      * @param y Second sample values.
  205.      * @return Wilcoxon <i>positive-rank sum</i> statistic (W+)
  206.      * @throws IllegalArgumentException if {@code x} or {@code y} are zero-length; are not
  207.      * the same length; contain NaN values; or {@code x[i] == y[i]} for all data
  208.      * @see #withMu(double)
  209.      */
  210.     public double statistic(double[] x, double[] y) {
  211.         checkSamples(x, y);
  212.         // Apply mu before creation of differences
  213.         final double[] z = calculateDifferences(mu, x, y);
  214.         return computeStatistic(z, 0);
  215.     }

  216.     /**
  217.      * Performs a Wilcoxon signed ranked statistic comparing the differences between
  218.      * sample values {@code z = x - y} to {@code mu}.
  219.      *
  220.      * <p>This method handles matching samples {@code z[i] == mu} (no difference)
  221.      * by including them in the ranking of samples but excludes them from the test statistic
  222.      * (<i>signed-rank zero procedure</i>).
  223.      *
  224.      * <p>The test is defined by the {@link AlternativeHypothesis}.
  225.      *
  226.      * <ul>
  227.      * <li>'two-sided': the distribution of the difference is not symmetric about {@code mu}.
  228.      * <li>'greater': the distribution of the difference is stochastically greater than a
  229.      * distribution symmetric about {@code mu}.
  230.      * <li>'less': the distribution of the difference is stochastically less than a distribution
  231.      * symmetric about {@code mu}.
  232.      * </ul>
  233.      *
  234.      * <p>If the p-value method is {@linkplain PValueMethod#AUTO auto} an exact p-value
  235.      * is computed if the samples contain less than 50 values; otherwise a normal
  236.      * approximation is used.
  237.      *
  238.      * <p>Computation of the exact p-value is only valid if there are no matching
  239.      * samples {@code z[i] == mu} and no tied ranks in the data; otherwise the
  240.      * p-value resorts to the asymptotic Cureton approximation using a tie
  241.      * correction and an optional continuity correction.
  242.      *
  243.      * <p><strong>Note: </strong>
  244.      * Computation of the exact p-value requires the
  245.      * sample size {@code <= 1023}. Exact computation requires tabulation of values
  246.      * not exceeding size {@code n(n+1)/2} and computes in Order(n*n/2). Maximum
  247.      * memory usage is approximately 4 MiB.
  248.      *
  249.      * @param z Differences between sample values.
  250.      * @return test result
  251.      * @throws IllegalArgumentException if {@code z} is zero-length; contains NaN values;
  252.      * or all differences are zero
  253.      * @see #withMu(double)
  254.      * @see #with(AlternativeHypothesis)
  255.      * @see #with(ContinuityCorrection)
  256.      */
  257.     public Result test(double[] z) {
  258.         return computeTest(z, mu);
  259.     }

  260.     /**
  261.      * Performs a Wilcoxon signed ranked statistic comparing mean for two related
  262.      * samples or repeated measurements on a single sample.
  263.      *
  264.      * <p>This method handles matching samples {@code x[i] - mu == y[i]} (no difference)
  265.      * by including them in the ranking of samples but excludes them
  266.      * from the test statistic (<i>signed-rank zero procedure</i>).
  267.      *
  268.      * <p>This method is functionally equivalent to creating an array of differences
  269.      * {@code z = x - y} and calling {@link #test(double[]) test(z)}; the
  270.      * implementation may use an optimised method to compute the differences and
  271.      * rank statistic if {@code mu != 0}.
  272.      *
  273.      * @param x First sample values.
  274.      * @param y Second sample values.
  275.      * @return test result
  276.      * @throws IllegalArgumentException if {@code x} or {@code y} are zero-length; are not
  277.      * the same length; contain NaN values; or {@code x[i] - mu == y[i]} for all data
  278.      * @see #statistic(double[], double[])
  279.      * @see #test(double[])
  280.      */
  281.     public Result test(double[] x, double[] y) {
  282.         checkSamples(x, y);
  283.         // Apply mu before creation of differences
  284.         final double[] z = calculateDifferences(mu, x, y);
  285.         return computeTest(z, 0);
  286.     }

  287.     /**
  288.      * Computes the Wilcoxon signed ranked statistic comparing the differences between
  289.      * sample values {@code z = x - y} to {@code mu}.
  290.      *
  291.      * @param z Signed differences between sample values.
  292.      * @param mu Expected difference.
  293.      * @return Wilcoxon <i>positive-rank sum</i> statistic (W+)
  294.      * @throws IllegalArgumentException if {@code z} is zero-length; contains NaN values;
  295.      * or all differences are equal to the expected difference
  296.      * @see #withMu(double)
  297.      */
  298.     private static double computeStatistic(double[] z, double mu) {
  299.         Arguments.checkValuesRequiredSize(z.length, 1);
  300.         final double[] x = StatisticUtils.subtract(z, mu);
  301.         // Raises an error if all zeros
  302.         countZeros(x);
  303.         final double[] zAbs = calculateAbsoluteDifferences(x);
  304.         final double[] ranks = RANKING.apply(zAbs);
  305.         return calculateW(x, ranks);
  306.     }

  307.     /**
  308.      * Performs a Wilcoxon signed ranked statistic comparing the differences between
  309.      * sample values {@code z = x - y} to {@code mu}.
  310.      *
  311.      * @param z Differences between sample values.
  312.      * @param expectedMu Expected difference.
  313.      * @return test result
  314.      * @throws IllegalArgumentException if {@code z} is zero-length; contains NaN values;
  315.      * or all differences are zero
  316.      */
  317.     private Result computeTest(double[] z, double expectedMu) {
  318.         // Computation as above. The ranks are required for tie correction.
  319.         Arguments.checkValuesRequiredSize(z.length, 1);
  320.         final double[] x = StatisticUtils.subtract(z, expectedMu);
  321.         // Raises an error if all zeros
  322.         final int zeros = countZeros(x);
  323.         final double[] zAbs = calculateAbsoluteDifferences(x);
  324.         final double[] ranks = RANKING.apply(zAbs);
  325.         final double wPlus = calculateW(x, ranks);

  326.         // Exact p has strict requirements for no zeros, no ties
  327.         final double c = calculateTieCorrection(ranks);
  328.         final boolean tiedValues = c != 0;

  329.         final int n = z.length;
  330.         // Exact p requires no ties and no zeros
  331.         final double p;
  332.         if (selectMethod(pValueMethod, n) == PValueMethod.EXACT && n <= EXACT_LIMIT && !tiedValues && zeros == 0) {
  333.             p = calculateExactPValue((int) wPlus, n, alternative);
  334.         } else {
  335.             p = calculateAsymptoticPValue(wPlus, n, zeros, c, alternative, continuityCorrection);
  336.         }
  337.         return new Result(wPlus, tiedValues, zeros != 0, p);
  338.     }

  339.     /**
  340.      * Ensures that the provided arrays fulfil the assumptions.
  341.      *
  342.      * @param x First sample.
  343.      * @param y Second sample.
  344.      * @throws IllegalArgumentException if {@code x} or {@code y} are zero-length; or do not
  345.      * have the same length
  346.      */
  347.     private static void checkSamples(double[] x, double[] y) {
  348.         Arguments.checkValuesRequiredSize(x.length, 1);
  349.         Arguments.checkValuesRequiredSize(y.length, 1);
  350.         Arguments.checkValuesSizeMatch(x.length, y.length);
  351.     }

  352.     /**
  353.      * Calculates x[i] - mu - y[i] for all i.
  354.      *
  355.      * @param mu Expected difference.
  356.      * @param x First sample.
  357.      * @param y Second sample.
  358.      * @return z = x - y
  359.      */
  360.     private static double[] calculateDifferences(double mu, double[] x, double[] y) {
  361.         final double[] z = new double[x.length];
  362.         for (int i = 0; i < x.length; ++i) {
  363.             z[i] = x[i] - mu - y[i];
  364.         }
  365.         return z;
  366.     }

  367.     /**
  368.      * Calculates |z[i]| for all i.
  369.      *
  370.      * @param z Sample.
  371.      * @return |z|
  372.      */
  373.     private static double[] calculateAbsoluteDifferences(double[] z) {
  374.         final double[] zAbs = new double[z.length];
  375.         for (int i = 0; i < z.length; ++i) {
  376.             zAbs[i] = Math.abs(z[i]);
  377.         }
  378.         return zAbs;
  379.     }

  380.     /**
  381.      * Calculate the Wilcoxon <i>positive-rank sum</i> statistic.
  382.      *
  383.      * @param obs Observed signed value.
  384.      * @param ranks Ranks (including averages for ties).
  385.      * @return Wilcoxon <i>positive-rank sum</i> statistic (W+)
  386.      */
  387.     private static double calculateW(final double[] obs, final double[] ranks) {
  388.         final Sum wPlus = Sum.create();
  389.         for (int i = 0; i < obs.length; ++i) {
  390.             // Must be positive (excludes zeros)
  391.             if (obs[i] > 0) {
  392.                 wPlus.add(ranks[i]);
  393.             }
  394.         }
  395.         return wPlus.getAsDouble();
  396.     }

  397.     /**
  398.      * Count the number of zeros in the data.
  399.      *
  400.      * @param z Input data.
  401.      * @return the zero count
  402.      * @throws IllegalArgumentException if the data is all zeros
  403.      */
  404.     private static int countZeros(final double[] z) {
  405.         int c = 0;
  406.         for (final double v : z) {
  407.             if (v == 0) {
  408.                 c++;
  409.             }
  410.         }
  411.         if (c == z.length) {
  412.             throw new InferenceException("All signed differences are zero");
  413.         }
  414.         return c;
  415.     }

  416.     /**
  417.      * Calculate the tie correction.
  418.      * Destructively modifies ranks (by sorting).
  419.      * <pre>
  420.      * c = sum(t^3 - t)
  421.      * </pre>
  422.      * <p>where t is the size of each group of tied observations.
  423.      *
  424.      * @param ranks Ranks
  425.      * @return the tie correction
  426.      */
  427.     static double calculateTieCorrection(double[] ranks) {
  428.         double c = 0;
  429.         int ties = 1;
  430.         Arrays.sort(ranks);
  431.         double last = Double.NaN;
  432.         for (final double rank : ranks) {
  433.             // Deliberate use of equals
  434.             if (last == rank) {
  435.                 // Extend the tied group
  436.                 ties++;
  437.             } else {
  438.                 if (ties != 1) {
  439.                     c += Math.pow(ties, 3) - ties;
  440.                     ties = 1;
  441.                 }
  442.                 last = rank;
  443.             }
  444.         }
  445.         // Final ties count
  446.         c += Math.pow(ties, 3) - ties;
  447.         return c;
  448.     }

  449.     /**
  450.      * Select the method to compute the p-value.
  451.      *
  452.      * @param method P-value method.
  453.      * @param n Size of the data.
  454.      * @return p-value method.
  455.      */
  456.     private static PValueMethod selectMethod(PValueMethod method, int n) {
  457.         return method == PValueMethod.AUTO && n <= AUTO_LIMIT ? PValueMethod.EXACT : method;
  458.     }

  459.     /**
  460.      * Compute the asymptotic p-value using the Cureton normal approximation. This
  461.      * corrects for zeros in the signed-rank zero procedure and/or ties corrected using
  462.      * the average method.
  463.      *
  464.      * @param wPlus Wilcoxon signed rank value (W+).
  465.      * @param n Number of subjects.
  466.      * @param z Count of number of zeros
  467.      * @param c Tie-correction
  468.      * @param alternative Alternative hypothesis.
  469.      * @param continuityCorrection true to use a continuity correction.
  470.      * @return two-sided asymptotic p-value
  471.      */
  472.     private static double calculateAsymptoticPValue(double wPlus, int n, double z, double c,
  473.             AlternativeHypothesis alternative, boolean continuityCorrection) {
  474.         // E[W+] = n * (n + 1) / 4 - z * (z + 1) / 4
  475.         final double e = (n * (n + 1.0) - z * (z + 1.0)) * 0.25;

  476.         final double variance = ((n * (n + 1.0) * (2 * n + 1.0)) -
  477.                                 (z * (z + 1.0) * (2 * z + 1.0)) - c * 0.5) / 24;

  478.         double x = wPlus - e;
  479.         if (continuityCorrection) {
  480.             // +/- 0.5 is a continuity correction towards the expected.
  481.             if (alternative == AlternativeHypothesis.GREATER_THAN) {
  482.                 x -= 0.5;
  483.             } else if (alternative == AlternativeHypothesis.LESS_THAN) {
  484.                 x += 0.5;
  485.             } else {
  486.                 // two-sided. Shift towards the expected of zero.
  487.                 // Use of signum ignores x==0 (i.e. not copySign(0.5, z))
  488.                 x -= Math.signum(x) * 0.5;
  489.             }
  490.         }
  491.         x /= Math.sqrt(variance);

  492.         final NormalDistribution standardNormal = NormalDistribution.of(0, 1);
  493.         if (alternative == AlternativeHypothesis.GREATER_THAN) {
  494.             return standardNormal.survivalProbability(x);
  495.         }
  496.         if (alternative == AlternativeHypothesis.LESS_THAN) {
  497.             return standardNormal.cumulativeProbability(x);
  498.         }
  499.         // two-sided
  500.         return 2 * standardNormal.survivalProbability(Math.abs(x));
  501.     }

  502.     /**
  503.      * Compute the exact p-value.
  504.      *
  505.      * <p>This computation requires that no zeros or ties are found in the data. The input
  506.      * value n is limited to 1023.
  507.      *
  508.      * @param w1 Wilcoxon signed rank value (W+, or W-).
  509.      * @param n Number of subjects.
  510.      * @param alternative Alternative hypothesis.
  511.      * @return exact p-value (two-sided, greater, or less using the options)
  512.      */
  513.     private static double calculateExactPValue(int w1, int n, AlternativeHypothesis alternative) {
  514.         // T+ plus T- equals the sum of the ranks: n(n+1)/2
  515.         // Compute using the lower half.
  516.         // No overflow here if n <= 1023.
  517.         final int sum = n * (n + 1) / 2;
  518.         final int w2 = sum - w1;

  519.         // Return the correct side:
  520.         if (alternative == AlternativeHypothesis.GREATER_THAN) {
  521.             // sf(w1 - 1)
  522.             return sf(w1 - 1, w2 + 1, n);
  523.         }
  524.         if (alternative == AlternativeHypothesis.LESS_THAN) {
  525.             // cdf(w1)
  526.             return cdf(w1, w2, n);
  527.         }
  528.         // two-sided: 2 * sf(max(w1, w2) - 1) or 2 * cdf(min(w1, w2))
  529.         final double p = 2 * computeCdf(Math.min(w1, w2), n);
  530.         // Clip to range: [0, 1]
  531.         return Math.min(1, p);
  532.     }

  533.     /**
  534.      * Compute the cumulative density function of the Wilcoxon signed rank W+ statistic.
  535.      * The W- statistic is passed for convenience to exploit symmetry in the distribution.
  536.      *
  537.      * @param w1 Wilcoxon W+ statistic
  538.      * @param w2 Wilcoxon W- statistic
  539.      * @param n Number of subjects.
  540.      * @return {@code Pr(X <= k)}
  541.      */
  542.     private static double cdf(int w1, int w2, int n) {
  543.         // Exploit symmetry. Note the distribution is discrete thus requiring (w2 - 1).
  544.         return w2 > w1 ?
  545.             computeCdf(w1, n) :
  546.             1 - computeCdf(w2 - 1, n);
  547.     }

  548.     /**
  549.      * Compute the survival function of the Wilcoxon signed rank W+ statistic.
  550.      * The W- statistic is passed for convenience to exploit symmetry in the distribution.
  551.      *
  552.      * @param w1 Wilcoxon W+ statistic
  553.      * @param w2 Wilcoxon W- statistic
  554.      * @param n Number of subjects.
  555.      * @return {@code Pr(X <= k)}
  556.      */
  557.     private static double sf(int w1, int w2, int n) {
  558.         // Opposite of the CDF
  559.         return w2 > w1 ?
  560.             1 - computeCdf(w1, n) :
  561.             computeCdf(w2 - 1, n);
  562.     }

  563.     /**
  564.      * Compute the cumulative density function for the distribution of the Wilcoxon
  565.      * signed rank statistic. This is a discrete distribution and is only valid
  566.      * when no zeros or ties are found in the data.
  567.      *
  568.      * <p>This should be called with the lower of W+ or W- for computational efficiency.
  569.      * The input value n is limited to 1023.
  570.      *
  571.      * <p>Uses recursion to compute the density for {@code X <= t} and sums the values.
  572.      * See: https://en.wikipedia.org/wiki/Wilcoxon_signed-rank_test#Computing_the_null_distribution
  573.      *
  574.      * @param t Smallest Wilcoxon signed rank value (W+, or W-).
  575.      * @param n Number of subjects.
  576.      * @return {@code Pr(T <= t)}
  577.      */
  578.     private static double computeCdf(int t, int n) {
  579.         // Currently limited to n=1023.
  580.         // Note:
  581.         // The limit for t is n(n+1)/2.
  582.         // The highest possible sum is bounded by the normalisation factor 2^n.
  583.         // n         t              sum          support
  584.         // 31        [0, 496]       < 2^31       int
  585.         // 63        [0, 2016]      < 2^63       long
  586.         // 1023      [0, 523766]    < 2^1023     double

  587.         if (t <= 0) {
  588.             // No recursion required
  589.             return t < 0 ? 0 : Math.scalb(1, -n);
  590.         }

  591.         // Define u_n(t) as the number of sign combinations for T = t
  592.         // Pr(T == t) = u_n(t) / 2^n
  593.         // Sum them to create the cumulative probability Pr(T <= t).
  594.         //
  595.         // Recursive formula:
  596.         // u_n(t) = u_{n-1}(t) + u_{n-1}(t-n)
  597.         // u_0(0) = 1
  598.         // u_0(t) = 0 : t != 0
  599.         // u_n(t) = 0 : t < 0 || t > n(n+1)/2

  600.         // Compute all u_n(t) up to t.
  601.         final double[] u = new double[t + 1];
  602.         // Initialize u_1(t) using base cases for recursion
  603.         u[0] = u[1] = 1;

  604.         // Each u_n(t) is created using the current correct values for u_{n-1}(t)
  605.         for (int nn = 2; nn < n + 1; nn++) {
  606.             // u[t] holds the correct value for u_{n-1}(t)
  607.             // u_n(t) = u_{n-1}(t) + u_{n-1}(t-n)
  608.             for (int tt = t; tt >= nn; tt--) {
  609.                 u[tt] += u[tt - nn];
  610.             }
  611.         }
  612.         final double sum = Arrays.stream(u).sum();

  613.         // Finally divide by the number of possible sums: 2^n
  614.         return Math.scalb(sum, -n);
  615.     }
  616. }