MannWhitneyUTest.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.apache.commons.statistics.inference;

  18. import java.lang.ref.SoftReference;
  19. import java.util.Arrays;
  20. import java.util.EnumSet;
  21. import java.util.Objects;
  22. import java.util.concurrent.locks.ReentrantLock;
  23. import java.util.stream.IntStream;
  24. import org.apache.commons.numbers.combinatorics.BinomialCoefficientDouble;
  25. import org.apache.commons.statistics.distribution.NormalDistribution;
  26. import org.apache.commons.statistics.ranking.NaNStrategy;
  27. import org.apache.commons.statistics.ranking.NaturalRanking;
  28. import org.apache.commons.statistics.ranking.RankingAlgorithm;
  29. import org.apache.commons.statistics.ranking.TiesStrategy;

  30. /**
  31.  * Implements the Mann-Whitney U test (also called Wilcoxon rank-sum test).
  32.  *
  33.  * @see <a href="https://en.wikipedia.org/wiki/Mann%E2%80%93Whitney_U_test">
  34.  * Mann-Whitney U test (Wikipedia)</a>
  35.  * @since 1.1
  36.  */
  37. public final class MannWhitneyUTest {
  38.     /** Limit on sample size for the exact p-value computation for the auto mode. */
  39.     private static final int AUTO_LIMIT = 50;
  40.     /** Ranking instance. */
  41.     private static final RankingAlgorithm RANKING = new NaturalRanking(NaNStrategy.FAILED, TiesStrategy.AVERAGE);
  42.     /** Value for an unset f computation. */
  43.     private static final double UNSET = -1;
  44.     /** An object to use for synchonization when accessing the cache of F. */
  45.     private static final ReentrantLock LOCK = new ReentrantLock();
  46.     /** A reference to a previously computed storage for f.
  47.      * Use of a SoftReference ensures this is garbage collected before an OutOfMemoryError.
  48.      * The value should only be accessed, checked for size and optionally
  49.      * modified when holding the lock. When the storage is determined to be the correct
  50.      * size it can be returned for read/write to the array when not holding the lock. */
  51.     private static SoftReference<double[][][]> cacheF = new SoftReference<>(null);
  52.     /** Default instance. */
  53.     private static final MannWhitneyUTest DEFAULT = new MannWhitneyUTest(
  54.         AlternativeHypothesis.TWO_SIDED, PValueMethod.AUTO, true, 0);

  55.     /** Alternative hypothesis. */
  56.     private final AlternativeHypothesis alternative;
  57.     /** Method to compute the p-value. */
  58.     private final PValueMethod pValueMethod;
  59.     /** Perform continuity correction. */
  60.     private final boolean continuityCorrection;
  61.     /** Expected location shift. */
  62.     private final double mu;

  63.     /**
  64.      * Result for the Mann-Whitney U test.
  65.      *
  66.      * <p>This class is immutable.
  67.      *
  68.      * @since 1.1
  69.      */
  70.     public static final class Result extends BaseSignificanceResult {
  71.         /** Flag indicating the data has tied values. */
  72.         private final boolean tiedValues;

  73.         /**
  74.          * Create an instance.
  75.          *
  76.          * @param statistic Test statistic.
  77.          * @param tiedValues Flag indicating the data has tied values.
  78.          * @param p Result p-value.
  79.          */
  80.         Result(double statistic, boolean tiedValues, double p) {
  81.             super(statistic, p);
  82.             this.tiedValues = tiedValues;
  83.         }

  84.         /**
  85.          * {@inheritDoc}
  86.          *
  87.          * <p>This is the U<sub>1</sub> statistic. Compute the U<sub>2</sub> statistic using
  88.          * the original sample lengths {@code n} and {@code m} using:
  89.          * <pre>
  90.          * u2 = (long) n * m - u1;
  91.          * </pre>
  92.          */
  93.         @Override
  94.         public double getStatistic() {
  95.             // Note: This method is here for documentation
  96.             return super.getStatistic();
  97.         }

  98.         /**
  99.          * Return {@code true} if the data had tied values.
  100.          *
  101.          * <p>Note: The exact computation cannot be used when there are tied values.
  102.          *
  103.          * @return {@code true} if there were tied values
  104.          */
  105.         public boolean hasTiedValues() {
  106.             return tiedValues;
  107.         }
  108.     }

  109.     /**
  110.      * @param alternative Alternative hypothesis.
  111.      * @param method P-value method.
  112.      * @param continuityCorrection true to perform continuity correction.
  113.      * @param mu Expected location shift.
  114.      */
  115.     private MannWhitneyUTest(AlternativeHypothesis alternative, PValueMethod method,
  116.         boolean continuityCorrection, double mu) {
  117.         this.alternative = alternative;
  118.         this.pValueMethod = method;
  119.         this.continuityCorrection = continuityCorrection;
  120.         this.mu = mu;
  121.     }

  122.     /**
  123.      * Return an instance using the default options.
  124.      *
  125.      * <ul>
  126.      * <li>{@link AlternativeHypothesis#TWO_SIDED}
  127.      * <li>{@link PValueMethod#AUTO}
  128.      * <li>{@link ContinuityCorrection#ENABLED}
  129.      * <li>{@linkplain #withMu(double) mu = 0}
  130.      * </ul>
  131.      *
  132.      * @return default instance
  133.      */
  134.     public static MannWhitneyUTest withDefaults() {
  135.         return DEFAULT;
  136.     }

  137.     /**
  138.      * Return an instance with the configured alternative hypothesis.
  139.      *
  140.      * @param v Value.
  141.      * @return an instance
  142.      */
  143.     public MannWhitneyUTest with(AlternativeHypothesis v) {
  144.         return new MannWhitneyUTest(Objects.requireNonNull(v), pValueMethod, continuityCorrection, mu);
  145.     }

  146.     /**
  147.      * Return an instance with the configured p-value method.
  148.      *
  149.      * @param v Value.
  150.      * @return an instance
  151.      * @throws IllegalArgumentException if the value is not in the allowed options or is null
  152.      */
  153.     public MannWhitneyUTest with(PValueMethod v) {
  154.         return new MannWhitneyUTest(alternative,
  155.             Arguments.checkOption(v, EnumSet.of(PValueMethod.AUTO, PValueMethod.EXACT, PValueMethod.ASYMPTOTIC)),
  156.             continuityCorrection, mu);
  157.     }

  158.     /**
  159.      * Return an instance with the configured continuity correction.
  160.      *
  161.      * <p>If {@link ContinuityCorrection#ENABLED ENABLED}, adjust the U rank statistic by
  162.      * 0.5 towards the mean value when computing the z-statistic if a normal approximation is used
  163.      * to compute the p-value.
  164.      *
  165.      * @param v Value.
  166.      * @return an instance
  167.      */
  168.     public MannWhitneyUTest with(ContinuityCorrection v) {
  169.         return new MannWhitneyUTest(alternative, pValueMethod,
  170.             Objects.requireNonNull(v) == ContinuityCorrection.ENABLED, mu);
  171.     }

  172.     /**
  173.      * Return an instance with the configured location shift {@code mu}.
  174.      *
  175.      * @param v Value.
  176.      * @return an instance
  177.      * @throws IllegalArgumentException if the value is not finite
  178.      */
  179.     public MannWhitneyUTest withMu(double v) {
  180.         return new MannWhitneyUTest(alternative, pValueMethod, continuityCorrection, Arguments.checkFinite(v));
  181.     }

  182.     /**
  183.      * Computes the Mann-Whitney U statistic comparing two independent
  184.      * samples possibly of different length.
  185.      *
  186.      * <p>This statistic can be used to perform a Mann-Whitney U test evaluating the
  187.      * null hypothesis that the two independent samples differ by a location shift of {@code mu}.
  188.      *
  189.      * <p>This returns the U<sub>1</sub> statistic. Compute the U<sub>2</sub> statistic using:
  190.      * <pre>
  191.      * u2 = (long) x.length * y.length - u1;
  192.      * </pre>
  193.      *
  194.      * @param x First sample values.
  195.      * @param y Second sample values.
  196.      * @return Mann-Whitney U<sub>1</sub> statistic
  197.      * @throws IllegalArgumentException if {@code x} or {@code y} are zero-length; or contain
  198.      * NaN values.
  199.      * @see #withMu(double)
  200.      */
  201.     public double statistic(double[] x, double[] y) {
  202.         checkSamples(x, y);

  203.         final double[] z = concatenateSamples(mu, x, y);
  204.         final double[] ranks = RANKING.apply(z);

  205.         // The ranks for x is in the first x.length entries in ranks because x
  206.         // is in the first x.length entries in z
  207.         final double sumRankX = Arrays.stream(ranks).limit(x.length).sum();

  208.         // U1 = R1 - (n1 * (n1 + 1)) / 2 where R1 is sum of ranks for sample 1,
  209.         // e.g. x, n1 is the number of observations in sample 1.
  210.         return sumRankX - ((long) x.length * (x.length + 1)) * 0.5;
  211.     }

  212.     /**
  213.      * Performs a Mann-Whitney U test comparing the location for two independent
  214.      * samples. The location is specified using {@link #withMu(double) mu}.
  215.      *
  216.      * <p>The test is defined by the {@link AlternativeHypothesis}.
  217.      * <ul>
  218.      * <li>'two-sided': the distribution underlying {@code (x - mu)} is not equal to the
  219.      * distribution underlying {@code y}.
  220.      * <li>'greater': the distribution underlying {@code (x - mu)} is stochastically greater than
  221.      * the distribution underlying {@code y}.
  222.      * <li>'less': the distribution underlying {@code (x - mu)} is stochastically less than
  223.      * the distribution underlying {@code y}.
  224.      * </ul>
  225.      *
  226.      * <p>If the p-value method is {@linkplain PValueMethod#AUTO auto} an exact p-value is
  227.      * computed if the samples contain less than 50 values; otherwise a normal
  228.      * approximation is used.
  229.      *
  230.      * <p>Computation of the exact p-value is only valid if there are no tied
  231.      * ranks in the data; otherwise the p-value resorts to the asymptotic
  232.      * approximation using a tie correction and an optional continuity correction.
  233.      *
  234.      * <p><strong>Note: </strong>
  235.      * Exact computation requires tabulation of values not exceeding size
  236.      * {@code (n+1)*(m+1)*(u+1)} where {@code u} is the minimum of the U<sub>1</sub> and
  237.      * U<sub>2</sub> statistics and {@code n} and {@code m} are the sample sizes.
  238.      * This may use a very large amount of memory and result in an {@link OutOfMemoryError}.
  239.      * Exact computation requires a finite binomial coefficient {@code binom(n+m, m)}
  240.      * which is limited to {@code n+m <= 1029} for any {@code n} and {@code m},
  241.      * or {@code min(n, m) <= 37} for any {@code max(n, m)}.
  242.      * An {@link OutOfMemoryError} is not expected using the
  243.      * limits configured for the {@linkplain PValueMethod#AUTO auto} p-value computation
  244.      * as the maximum required memory is approximately 23 MiB.
  245.      *
  246.      * @param x First sample values.
  247.      * @param y Second sample values.
  248.      * @return test result
  249.      * @throws IllegalArgumentException if {@code x} or {@code y} are zero-length; or contain
  250.      * NaN values.
  251.      * @throws OutOfMemoryError if the exact computation is <em>user-requested</em> for
  252.      * large samples and there is not enough memory.
  253.      * @see #statistic(double[], double[])
  254.      * @see #withMu(double)
  255.      * @see #with(AlternativeHypothesis)
  256.      * @see #with(ContinuityCorrection)
  257.      */
  258.     public Result test(double[] x, double[] y) {
  259.         // Computation as above. The ranks are required for tie correction.
  260.         checkSamples(x, y);
  261.         final double[] z = concatenateSamples(mu, x, y);
  262.         final double[] ranks = RANKING.apply(z);
  263.         final double sumRankX = Arrays.stream(ranks).limit(x.length).sum();
  264.         final double u1 = sumRankX - ((long) x.length * (x.length + 1)) * 0.5;

  265.         final double c = WilcoxonSignedRankTest.calculateTieCorrection(ranks);
  266.         final boolean tiedValues = c != 0;

  267.         PValueMethod method = pValueMethod;
  268.         final int n = x.length;
  269.         final int m = y.length;
  270.         if (method == PValueMethod.AUTO && Math.max(n, m) < AUTO_LIMIT) {
  271.             method = PValueMethod.EXACT;
  272.         }
  273.         // Exact p requires no ties.
  274.         // The method will fail-fast if the computation is not possible due
  275.         // to the size of the data.
  276.         double p = method == PValueMethod.EXACT && !tiedValues ?
  277.             calculateExactPValue(u1, n, m, alternative) : -1;
  278.         if (p < 0) {
  279.             p = calculateAsymptoticPValue(u1, n, m, c);
  280.         }
  281.         return new Result(u1, tiedValues, p);
  282.     }

  283.     /**
  284.      * Ensures that the provided arrays fulfil the assumptions.
  285.      *
  286.      * @param x First sample values.
  287.      * @param y Second sample values.
  288.      * @throws IllegalArgumentException if {@code x} or {@code y} are zero-length.
  289.      */
  290.     private static void checkSamples(double[] x, double[] y) {
  291.         Arguments.checkValuesRequiredSize(x.length, 1);
  292.         Arguments.checkValuesRequiredSize(y.length, 1);
  293.     }

  294.     /**
  295.      * Concatenate the samples into one array. Subtract {@code mu} from the first sample.
  296.      *
  297.      * @param mu Expected difference between means.
  298.      * @param x First sample values.
  299.      * @param y Second sample values.
  300.      * @return concatenated array
  301.      */
  302.     private static double[] concatenateSamples(double mu, double[] x, double[] y) {
  303.         final double[] z = new double[x.length + y.length];
  304.         System.arraycopy(x, 0, z, 0, x.length);
  305.         System.arraycopy(y, 0, z, x.length, y.length);
  306.         if (mu != 0) {
  307.             for (int i = 0; i < x.length; i++) {
  308.                 z[i] -= mu;
  309.             }
  310.         }
  311.         return z;
  312.     }

  313.     /**
  314.      * Calculate the asymptotic p-value using a Normal approximation.
  315.      *
  316.      * @param u Mann-Whitney U value.
  317.      * @param n1 Number of subjects in first sample.
  318.      * @param n2 Number of subjects in second sample.
  319.      * @param c Tie-correction
  320.      * @return two-sided asymptotic p-value
  321.      */
  322.     private double calculateAsymptoticPValue(double u, int n1, int n2, double c) {
  323.         // Use long to avoid overflow
  324.         final long n1n2 = (long) n1 * n2;
  325.         final long n = (long) n1 + n2;

  326.         // https://en.wikipedia.org/wiki/Mann%E2%80%93Whitney_U_test#Normal_approximation_and_tie_correction
  327.         final double e = n1n2 * 0.5;
  328.         final double variance = (n1n2 / 12.0) * ((n + 1.0) - c / n / (n - 1));

  329.         double z = u - e;
  330.         if (continuityCorrection) {
  331.             // +/- 0.5 is a continuity correction towards the expected.
  332.             if (alternative == AlternativeHypothesis.GREATER_THAN) {
  333.                 z -= 0.5;
  334.             } else if (alternative == AlternativeHypothesis.LESS_THAN) {
  335.                 z += 0.5;
  336.             } else {
  337.                 // two-sided. Shift towards the expected of zero.
  338.                 // Use of signum ignores x==0 (i.e. not copySign(0.5, z))
  339.                 z -= Math.signum(z) * 0.5;
  340.             }
  341.         }
  342.         z /= Math.sqrt(variance);

  343.         final NormalDistribution standardNormal = NormalDistribution.of(0, 1);
  344.         if (alternative == AlternativeHypothesis.GREATER_THAN) {
  345.             return standardNormal.survivalProbability(z);
  346.         }
  347.         if (alternative == AlternativeHypothesis.LESS_THAN) {
  348.             return standardNormal.cumulativeProbability(z);
  349.         }
  350.         // two-sided
  351.         return 2 * standardNormal.survivalProbability(Math.abs(z));
  352.     }

  353.     /**
  354.      * Calculate the exact p-value. If the value cannot be computed this returns -1.
  355.      *
  356.      * <p>Note: Computation may run out of memory during array allocation, or method
  357.      * recursion.
  358.      *
  359.      * @param u Mann-Whitney U value.
  360.      * @param m Number of subjects in first sample.
  361.      * @param n Number of subjects in second sample.
  362.      * @param alternative Alternative hypothesis.
  363.      * @return exact p-value (or -1) (two-sided, greater, or less using the options)
  364.      */
  365.     // package-private for testing
  366.     static double calculateExactPValue(double u, int m, int n, AlternativeHypothesis alternative) {
  367.         // Check the computation can be attempted.
  368.         // u must be an integer
  369.         if ((int) u != u) {
  370.             return -1;
  371.         }
  372.         // Note: n+m will not overflow as we concatenated the samples to a single array.
  373.         final double binom = BinomialCoefficientDouble.value(n + m, m);
  374.         if (binom == Double.POSITIVE_INFINITY) {
  375.             return -1;
  376.         }

  377.         // Use u_min for the CDF.
  378.         final int u1 = (int) u;
  379.         final int u2 = (int) ((long) m * n - u1);
  380.         // Use m < n to support symmetry.
  381.         final int n1 = Math.min(m, n);
  382.         final int n2 = Math.max(m, n);

  383.         // Return the correct side:
  384.         if (alternative == AlternativeHypothesis.GREATER_THAN) {
  385.             // sf(u1 - 1)
  386.             return sf(u1 - 1, u2 + 1, n1, n2, binom);
  387.         }
  388.         if (alternative == AlternativeHypothesis.LESS_THAN) {
  389.             // cdf(u1)
  390.             return cdf(u1, u2, n1, n2, binom);
  391.         }
  392.         // two-sided: 2 * sf(max(u1, u2) - 1) or 2 * cdf(min(u1, u2))
  393.         final double p = 2 * computeCdf(Math.min(u1, u2), n1, n2, binom);
  394.         // Clip to range: [0, 1]
  395.         return Math.min(1, p);
  396.     }

  397.     /**
  398.      * Compute the cumulative density function of the Mann-Whitney U1 statistic.
  399.      * The U2 statistic is passed for convenience to exploit symmetry in the distribution.
  400.      *
  401.      * @param u1 Mann-Whitney U1 statistic
  402.      * @param u2 Mann-Whitney U2 statistic
  403.      * @param m First sample size.
  404.      * @param n Second sample size.
  405.      * @param binom binom(n+m, m) (must be finite)
  406.      * @return {@code Pr(X <= k)}
  407.      */
  408.     private static double cdf(int u1, int u2, int m, int n, double binom) {
  409.         // Exploit symmetry. Note the distribution is discrete thus requiring (u2 - 1).
  410.         return u2 > u1 ?
  411.             computeCdf(u1, m, n, binom) :
  412.             1 - computeCdf(u2 - 1, m, n, binom);
  413.     }

  414.     /**
  415.      * Compute the survival function of the Mann-Whitney U1 statistic.
  416.      * The U2 statistic is passed for convenience to exploit symmetry in the distribution.
  417.      *
  418.      * @param u1 Mann-Whitney U1 statistic
  419.      * @param u2 Mann-Whitney U2 statistic
  420.      * @param m First sample size.
  421.      * @param n Second sample size.
  422.      * @param binom binom(n+m, m) (must be finite)
  423.      * @return {@code Pr(X > k)}
  424.      */
  425.     private static double sf(int u1, int u2, int m, int n, double binom) {
  426.         // Opposite of the CDF
  427.         return u2 > u1 ?
  428.             1 - computeCdf(u1, m, n, binom) :
  429.             computeCdf(u2 - 1, m, n, binom);
  430.     }

  431.     /**
  432.      * Compute the cumulative density function of the Mann-Whitney U statistic.
  433.      *
  434.      * <p>This should be called with the lower of U1 or U2 for computational efficiency.
  435.      *
  436.      * <p>Uses the recursive formula provided in Bucchianico, A.D, (1999)
  437.      * Combinatorics, computer algebra and the Wilcoxon-Mann-Whitney test, Journal
  438.      * of Statistical Planning and Inference, Volume 79, Issue 2, 349-364.
  439.      *
  440.      * @param k Mann-Whitney U statistic
  441.      * @param m First sample size.
  442.      * @param n Second sample size.
  443.      * @param binom binom(n+m, m) (must be finite)
  444.      * @return {@code Pr(X <= k)}
  445.      */
  446.     private static double computeCdf(int k, int m, int n, double binom) {
  447.         // Theorem 2.5:
  448.         // f(m, n, k) = 0 if k < 0, m < 0, n < 0, k > nm
  449.         if (k < 0) {
  450.             return 0;
  451.         }
  452.         // Recursively compute f(m, n, k)
  453.         final double[][][] f = getF(m, n, k);

  454.         // P(X=k) = f(m, n, k) / binom(m+n, m)
  455.         // P(X<=k) = sum_0^k (P(X=i))

  456.         // Called with k = min(u1, u2) : max(p) ~ 0.5 so no need to clip to [0, 1]
  457.         return IntStream.rangeClosed(0, k).mapToDouble(i -> fmnk(f, m, n, i)).sum() / binom;
  458.     }

  459.     /**
  460.      * Gets the storage for f(m, n, k).
  461.      *
  462.      * <p>This may be cached for performance.
  463.      *
  464.      * @param m M.
  465.      * @param n N.
  466.      * @param k K.
  467.      * @return the storage for f
  468.      */
  469.     private static double[][][] getF(int m, int n, int k) {
  470.         // Obtain any previous computation of f and expand it if required.
  471.         // F is only modified within this synchronized block.
  472.         // Any concurrent threads using a reference returned by this method
  473.         // will not receive an index out-of-bounds as f is only ever expanded.
  474.         try {
  475.             LOCK.lock();
  476.             // Note: f(x<m, y<n, z<k) is always the same.
  477.             // Cache the array and re-use any previous computation.
  478.             double[][][] f = cacheF.get();

  479.             // Require:
  480.             // f = new double[m + 1][n + 1][k + 1]
  481.             // f(m, n, 0) == 1; otherwise -1 if not computed
  482.             // m+n <= 1029 for any m,n; k < mn/2 (due to symmetry using min(u1, u2))
  483.             // Size m=n=515: approximately 516^2 * 515^2/2 = 398868 doubles ~ 3.04 GiB
  484.             if (f == null) {
  485.                 f = new double[m + 1][n + 1][k + 1];
  486.                 for (final double[][] a : f) {
  487.                     for (final double[] b : a) {
  488.                         initialize(b);
  489.                     }
  490.                 }
  491.                 // Cache for reuse.
  492.                 cacheF = new SoftReference<>(f);
  493.                 return f;
  494.             }

  495.             // Grow if required: m1 < m+1 => m1-(m+1) < 0 => m1 - m < 1
  496.             final int m1 = f.length;
  497.             final int n1 = f[0].length;
  498.             final int k1 = f[0][0].length;
  499.             final boolean growM = m1 - m < 1;
  500.             final boolean growN = n1 - n < 1;
  501.             final boolean growK = k1 - k < 1;
  502.             if (growM | growN | growK) {
  503.                 // Some part of the previous f is too small.
  504.                 // Atomically grow without destroying the previous computation.
  505.                 // Any other thread using the previous f will not go out of bounds
  506.                 // by keeping the new f dimensions at least as large.
  507.                 // Note: Doing this in-place allows the memory to be gradually
  508.                 // increased rather than allocating a new [m + 1][n + 1][k + 1]
  509.                 // and copying all old values.
  510.                 final int sn = Math.max(n1, n + 1);
  511.                 final int sk = Math.max(k1, k + 1);
  512.                 if (growM) {
  513.                     // Entirely new region
  514.                     f = Arrays.copyOf(f, m + 1);
  515.                     for (int x = m1; x <= m; x++) {
  516.                         f[x] = new double[sn][sk];
  517.                         for (final double[] b : f[x]) {
  518.                             initialize(b);
  519.                         }
  520.                     }
  521.                 }
  522.                 // Expand previous in place if required
  523.                 if (growN) {
  524.                     for (int x = 0; x < m1; x++) {
  525.                         f[x] = Arrays.copyOf(f[x], sn);
  526.                         for (int y = n1; y < sn; y++) {
  527.                             final double[] b = f[x][y] = new double[sk];
  528.                             initialize(b);
  529.                         }
  530.                     }
  531.                 }
  532.                 if (growK) {
  533.                     for (int x = 0; x < m1; x++) {
  534.                         for (int y = 0; y < n1; y++) {
  535.                             final double[] b = f[x][y] = Arrays.copyOf(f[x][y], sk);
  536.                             for (int z = k1; z < sk; z++) {
  537.                                 b[z] = UNSET;
  538.                             }
  539.                         }
  540.                     }
  541.                 }
  542.                 // Avoided an OutOfMemoryError. Cache for reuse.
  543.                 cacheF = new SoftReference<>(f);
  544.             }
  545.             return f;
  546.         } finally {
  547.             LOCK.unlock();
  548.         }
  549.     }

  550.     /**
  551.      * Initialize the array for f(m, n, x).
  552.      * Set value to 1 for x=0; otherwise {@link #UNSET}.
  553.      *
  554.      * @param fmn Array.
  555.      */
  556.     private static void initialize(double[] fmn) {
  557.         Arrays.fill(fmn, UNSET);
  558.         // f(m, n, 0) == 1 if m >= 0, n >= 0
  559.         fmn[0] = 1;
  560.     }

  561.     /**
  562.      * Compute f(m; n; k), the number of subsets of {0; 1; ...; n} with m elements such
  563.      * that the elements of this subset add up to k.
  564.      *
  565.      * <p>The function is computed recursively.
  566.      *
  567.      * @param f Tabulated values of f[m][n][k].
  568.      * @param m M
  569.      * @param n N
  570.      * @param k K
  571.      * @return f(m; n; k)
  572.      */
  573.     private static double fmnk(double[][][] f, int m, int n, int k) {
  574.         // Theorem 2.5:
  575.         // Omit conditions that will not be met: k > mn
  576.         // f(m, n, k) = 0 if k < 0, m < 0, n < 0
  577.         if ((k | m | n) < 0) {
  578.             return 0;
  579.         }
  580.         // Compute on demand
  581.         double fmnk = f[m][n][k];
  582.         if (fmnk < 0) {
  583.             // f(m, n, 0) == 1 if m >= 0, n >= 0
  584.             // This is already computed.

  585.             // Recursion from formula (3):
  586.             // f(m, n, k) = f(m-1, n, k-n) + f(m, n-1, k)
  587.             f[m][n][k] = fmnk = fmnk(f, m - 1, n, k - n) + fmnk(f, m, n - 1, k);
  588.         }
  589.         return fmnk;
  590.     }
  591. }