BinomialTest.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.apache.commons.statistics.inference;

  18. import java.util.Objects;
  19. import org.apache.commons.statistics.distribution.BinomialDistribution;

  20. /**
  21.  * Implements binomial test statistics.
  22.  *
  23.  * <p>Performs an exact test for the statistical significance of deviations from a
  24.  * theoretically expected distribution of observations into two categories.
  25.  *
  26.  * @see <a href="http://en.wikipedia.org/wiki/Binomial_test">Binomial test (Wikipedia)</a>
  27.  * @since 1.1
  28.  */
  29. public final class BinomialTest {
  30.     /** Default instance. */
  31.     private static final BinomialTest DEFAULT = new BinomialTest(AlternativeHypothesis.TWO_SIDED);

  32.     /** Alternative hypothesis. */
  33.     private final AlternativeHypothesis alternative;

  34.     /**
  35.      * @param alternative Alternative hypothesis.
  36.      */
  37.     private BinomialTest(AlternativeHypothesis alternative) {
  38.         this.alternative = alternative;
  39.     }

  40.     /**
  41.      * Return an instance using the default options.
  42.      *
  43.      * <ul>
  44.      * <li>{@link AlternativeHypothesis#TWO_SIDED}
  45.      * </ul>
  46.      *
  47.      * @return default instance
  48.      */
  49.     public static BinomialTest withDefaults() {
  50.         return DEFAULT;
  51.     }

  52.     /**
  53.      * Return an instance with the configured alternative hypothesis.
  54.      *
  55.      * @param v Value.
  56.      * @return an instance
  57.      */
  58.     public BinomialTest with(AlternativeHypothesis v) {
  59.         return new BinomialTest(Objects.requireNonNull(v));
  60.     }

  61.     /**
  62.      * Performs a binomial test about the probability of success \( \pi \).
  63.      *
  64.      * <p>The null hypothesis is \( H_0:\pi=\pi_0 \) where \( \pi_0 \) is between 0 and 1.
  65.      *
  66.      * <p>The probability of observing \( k \) successes from \( n \) trials with a given
  67.      * probability of success \( p \) is:
  68.      *
  69.      * <p>\[ \Pr(X=k)=\binom{n}{k}p^k(1-p)^{n-k} \]
  70.      *
  71.      * <p>The test is defined by the {@link AlternativeHypothesis}.
  72.      *
  73.      * <p>To test \( \pi &lt; \pi_0 \) (less than):
  74.      *
  75.      * <p>\[ p = \sum_{i=0}^k\Pr(X=i)=\sum_{i=0}^k\binom{n}{i}\pi_0^i(1-\pi_0)^{n-i} \]
  76.      *
  77.      * <p>To test \( \pi &gt; \pi_0 \) (greater than):
  78.      *
  79.      * <p>\[ p = \sum_{i=0}^k\Pr(X=i)=\sum_{i=k}^n\binom{n}{i}\pi_0^i(1-\pi_0)^{n-i} \]
  80.      *
  81.      * <p>To test \( \pi \ne \pi_0 \) (two-sided) requires finding all \( i \) such that
  82.      * \( \mathcal{I}=\{i:\Pr(X=i)\leq \Pr(X=k)\} \) and compute the sum:
  83.      *
  84.      * <p>\[ p = \sum_{i\in\mathcal{I}}\Pr(X=i)=\sum_{i\in\mathcal{I}}\binom{n}{i}\pi_0^i(1-\pi_0)^{n-i} \]
  85.      *
  86.      * <p>The two-sided p-value represents the likelihood of getting a result at least as
  87.      * extreme as the sample, given the provided {@code probability} of success on a
  88.      * single trial.
  89.      *
  90.      * <p>The test statistic is equal to the estimated proportion \( \frac{k}{n} \).
  91.      *
  92.      * @param numberOfTrials Number of trials performed.
  93.      * @param numberOfSuccesses Number of successes observed.
  94.      * @param probability Assumed probability of a single trial under the null
  95.      * hypothesis.
  96.      * @return test result
  97.      * @throws IllegalArgumentException if {@code numberOfTrials} or
  98.      * {@code numberOfSuccesses} is negative; {@code probability} is not between 0
  99.      * and 1; or if {@code numberOfTrials < numberOfSuccesses}
  100.      * @see #with(AlternativeHypothesis)
  101.      */
  102.     public SignificanceResult test(int numberOfTrials, int numberOfSuccesses, double probability) {
  103.         // Note: The distribution validates number of trials and probability.
  104.         // Here we only have to validate the number of successes.
  105.         Arguments.checkNonNegative(numberOfSuccesses);
  106.         if (numberOfTrials < numberOfSuccesses) {
  107.             throw new InferenceException(
  108.                 "must have n >= k for binomial coefficient (n, k), got n = %d, k = %d",
  109.                 numberOfSuccesses, numberOfTrials);
  110.         }

  111.         final BinomialDistribution distribution = BinomialDistribution.of(numberOfTrials, probability);
  112.         final double p;
  113.         if (alternative == AlternativeHypothesis.GREATER_THAN) {
  114.             p = distribution.survivalProbability(numberOfSuccesses - 1);
  115.         } else if (alternative == AlternativeHypothesis.LESS_THAN) {
  116.             p = distribution.cumulativeProbability(numberOfSuccesses);
  117.         } else {
  118.             p = twoSidedBinomialTest(numberOfTrials, numberOfSuccesses, probability, distribution);
  119.         }
  120.         return new BaseSignificanceResult((double) numberOfSuccesses / numberOfTrials, p);
  121.     }

  122.     /**
  123.      * Returns the <i>observed significance level</i>, or p-value, associated with a
  124.      * two-sided binomial test about the probability of success \( \pi \).
  125.      *
  126.      * @param n Number of trials performed.
  127.      * @param k Number of successes observed.
  128.      * @param probability Assumed probability of a single trial under the null
  129.      * hypothesis.
  130.      * @param distribution Binomial distribution.
  131.      * @return p-value
  132.      */
  133.     private static double twoSidedBinomialTest(int n, int k, double probability,
  134.                                                BinomialDistribution distribution) {
  135.         // Find all i where Pr(X = i) <= Pr(X = k) and sum them.
  136.         // Exploit the known unimodal distribution to increase the
  137.         // search speed. Note the search depends only on magnitude differences.
  138.         // The current BinomialDistribution is faster using log probability
  139.         // as it omits a call to Math.exp.

  140.         // Use the mode as the point of largest probability.
  141.         // The lower or upper mode is important for the search below.
  142.         final int m1 = (int) Math.ceil((n + 1.0) * probability) - 1;
  143.         final int m2 = (int) Math.floor((n + 1.0) * probability);
  144.         if (k < m1) {
  145.             final double pk = distribution.logProbability(k);
  146.             // Lower half = cdf(k)
  147.             // Find upper half. As k < lower mode i should never
  148.             // reach the lower mode based on the probability alone.
  149.             // Bracket with the upper mode.
  150.             final int i = Searches.searchDescending(m2, n, pk, distribution::logProbability);
  151.             return distribution.cumulativeProbability(k) +
  152.                    distribution.survivalProbability(i - 1);
  153.         } else if (k > m2) {
  154.             final double pk = distribution.logProbability(k);
  155.             // Upper half = sf(k - 1)
  156.             // Find lower half. As k > upper mode i should never
  157.             // reach the upper mode based on the probability alone.
  158.             // Bracket with the lower mode.
  159.             final int i = Searches.searchAscending(0, m1, pk, distribution::logProbability);
  160.             return distribution.cumulativeProbability(i) +
  161.                    distribution.survivalProbability(k - 1);
  162.         }
  163.         // k == mode
  164.         // Edge case where the sum of probabilities will be either
  165.         // 1 or 1 - Pr(X = mode) where mode != k
  166.         final double pk = distribution.probability(k);
  167.         final double pm = distribution.probability(k == m1 ? m2 : m1);
  168.         return pm > pk ? 1 - pm : 1;
  169.     }
  170. }