DiscreteDistribution.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.apache.commons.statistics.distribution;

  18. import java.util.stream.IntStream;
  19. import org.apache.commons.rng.UniformRandomProvider;

  20. /**
  21.  * Interface for distributions on the integers.
  22.  */
  23. public interface DiscreteDistribution {

  24.     /**
  25.      * For a random variable {@code X} whose values are distributed according
  26.      * to this distribution, this method returns {@code P(X = x)}.
  27.      * In other words, this method represents the probability mass function (PMF)
  28.      * for the distribution.
  29.      *
  30.      * @param x Point at which the PMF is evaluated.
  31.      * @return the value of the probability mass function at {@code x}.
  32.      */
  33.     double probability(int x);

  34.     /**
  35.      * For a random variable {@code X} whose values are distributed according
  36.      * to this distribution, this method returns {@code P(x0 < X <= x1)}.
  37.      * The default implementation uses the identity
  38.      * {@code P(x0 < X <= x1) = P(X <= x1) - P(X <= x0)}
  39.      *
  40.      * <p>Special cases:
  41.      * <ul>
  42.      * <li>returns {@code 0.0} if {@code x0 == x1};
  43.      * <li>returns {@code probability(x1)} if {@code x0 + 1 == x1};
  44.      * </ul>
  45.      *
  46.      * @param x0 Lower bound (exclusive).
  47.      * @param x1 Upper bound (inclusive).
  48.      * @return the probability that a random variable with this distribution
  49.      * takes a value between {@code x0} and {@code x1},  excluding the lower
  50.      * and including the upper endpoint.
  51.      * @throws IllegalArgumentException if {@code x0 > x1}.
  52.      */
  53.     default double probability(int x0,
  54.                                int x1) {
  55.         if (x0 > x1) {
  56.             throw new DistributionException(DistributionException.INVALID_RANGE_LOW_GT_HIGH, x0, x1);
  57.         }
  58.         // Long addition avoids overflow
  59.         if (x0 + 1L >= x1) {
  60.             return x0 == x1 ? 0.0 : probability(x1);
  61.         }
  62.         return cumulativeProbability(x1) - cumulativeProbability(x0);
  63.     }

  64.     /**
  65.      * For a random variable {@code X} whose values are distributed according
  66.      * to this distribution, this method returns {@code log(P(X = x))}, where
  67.      * {@code log} is the natural logarithm.
  68.      *
  69.      * @param x Point at which the PMF is evaluated.
  70.      * @return the logarithm of the value of the probability mass function at
  71.      * {@code x}.
  72.      */
  73.     default double logProbability(int x) {
  74.         return Math.log(probability(x));
  75.     }

  76.     /**
  77.      * For a random variable {@code X} whose values are distributed according
  78.      * to this distribution, this method returns {@code P(X <= x)}.
  79.      * In other, words, this method represents the (cumulative) distribution
  80.      * function (CDF) for this distribution.
  81.      *
  82.      * @param x Point at which the CDF is evaluated.
  83.      * @return the probability that a random variable with this distribution
  84.      * takes a value less than or equal to {@code x}.
  85.      */
  86.     double cumulativeProbability(int x);

  87.     /**
  88.      * For a random variable {@code X} whose values are distributed according
  89.      * to this distribution, this method returns {@code P(X > x)}.
  90.      * In other words, this method represents the complementary cumulative
  91.      * distribution function.
  92.      *
  93.      * <p>By default, this is defined as {@code 1 - cumulativeProbability(x)}, but
  94.      * the specific implementation may be more accurate.
  95.      *
  96.      * @param x Point at which the survival function is evaluated.
  97.      * @return the probability that a random variable with this
  98.      * distribution takes a value greater than {@code x}.
  99.      */
  100.     default double survivalProbability(int x) {
  101.         return 1.0 - cumulativeProbability(x);
  102.     }

  103.     /**
  104.      * Computes the quantile function of this distribution.
  105.      * For a random variable {@code X} distributed according to this distribution,
  106.      * the returned value is:
  107.      *
  108.      * <p>\[ x = \begin{cases}
  109.      *       \inf \{ x \in \mathbb Z : P(X \le x) \ge p\}   &amp; \text{for } 0 \lt p \le 1 \\
  110.      *       \inf \{ x \in \mathbb Z : P(X \le x) \gt 0 \}  &amp; \text{for } p = 0
  111.      *       \end{cases} \]
  112.      *
  113.      * <p>If the result exceeds the range of the data type {@code int},
  114.      * then {@link Integer#MIN_VALUE} or {@link Integer#MAX_VALUE} is returned.
  115.      * In this case the result of {@link #cumulativeProbability(int) cumulativeProbability(x)}
  116.      * called using the returned {@code p}-quantile may not compute the original {@code p}.
  117.      *
  118.      * @param p Cumulative probability.
  119.      * @return the smallest {@code p}-quantile of this distribution
  120.      * (largest 0-quantile for {@code p = 0}).
  121.      * @throws IllegalArgumentException if {@code p < 0} or {@code p > 1}.
  122.      */
  123.     int inverseCumulativeProbability(double p);

  124.     /**
  125.      * Computes the inverse survival probability function of this distribution.
  126.      * For a random variable {@code X} distributed according to this distribution,
  127.      * the returned value is:
  128.      *
  129.      * <p>\[ x = \begin{cases}
  130.      *       \inf \{ x \in \mathbb Z : P(X \gt x) \le p\}   &amp; \text{for } 0 \le p \lt 1 \\
  131.      *       \inf \{ x \in \mathbb Z : P(X \gt x) \lt 1 \}  &amp; \text{for } p = 1
  132.      *       \end{cases} \]
  133.      *
  134.      * <p>If the result exceeds the range of the data type {@code int},
  135.      * then {@link Integer#MIN_VALUE} or {@link Integer#MAX_VALUE} is returned.
  136.      * In this case the result of {@link #survivalProbability(int) survivalProbability(x)}
  137.      * called using the returned {@code (1-p)}-quantile may not compute the original {@code p}.
  138.      *
  139.      * <p>By default, this is defined as {@code inverseCumulativeProbability(1 - p)}, but
  140.      * the specific implementation may be more accurate.
  141.      *
  142.      * @param p Cumulative probability.
  143.      * @return the smallest {@code (1-p)}-quantile of this distribution
  144.      * (largest 0-quantile for {@code p = 1}).
  145.      * @throws IllegalArgumentException if {@code p < 0} or {@code p > 1}.
  146.      */
  147.     default int inverseSurvivalProbability(double p) {
  148.         return inverseCumulativeProbability(1 - p);
  149.     }

  150.     /**
  151.      * Gets the mean of this distribution.
  152.      *
  153.      * @return the mean.
  154.      */
  155.     double getMean();

  156.     /**
  157.      * Gets the variance of this distribution.
  158.      *
  159.      * @return the variance.
  160.      */
  161.     double getVariance();

  162.     /**
  163.      * Gets the lower bound of the support.
  164.      * This method must return the same value as
  165.      * {@code inverseCumulativeProbability(0)}, i.e.
  166.      * \( \inf \{ x \in \mathbb Z : P(X \le x) \gt 0 \} \).
  167.      * By convention, {@link Integer#MIN_VALUE} should be substituted
  168.      * for negative infinity.
  169.      *
  170.      * @return the lower bound of the support.
  171.      */
  172.     int getSupportLowerBound();

  173.     /**
  174.      * Gets the upper bound of the support.
  175.      * This method must return the same value as
  176.      * {@code inverseCumulativeProbability(1)}, i.e.
  177.      * \( \inf \{ x \in \mathbb Z : P(X \le x) = 1 \} \).
  178.      * By convention, {@link Integer#MAX_VALUE} should be substituted
  179.      * for positive infinity.
  180.      *
  181.      * @return the upper bound of the support.
  182.      */
  183.     int getSupportUpperBound();

  184.     /**
  185.      * Creates a sampler.
  186.      *
  187.      * @param rng Generator of uniformly distributed numbers.
  188.      * @return a sampler that produces random numbers according this
  189.      * distribution.
  190.      */
  191.     Sampler createSampler(UniformRandomProvider rng);

  192.     /**
  193.      * Distribution sampling functionality.
  194.      */
  195.     @FunctionalInterface
  196.     interface Sampler {
  197.         /**
  198.          * Generates a random value sampled from this distribution.
  199.          *
  200.          * @return a random value.
  201.          */
  202.         int sample();

  203.         /**
  204.          * Returns an effectively unlimited stream of {@code int} sample values.
  205.          *
  206.          * <p>The default implementation produces a sequential stream that repeatedly
  207.          * calls {@link #sample sample}().
  208.          *
  209.          * @return a stream of {@code int} values.
  210.          */
  211.         default IntStream samples() {
  212.             return IntStream.generate(this::sample).sequential();
  213.         }

  214.         /**
  215.          * Returns a stream producing the given {@code streamSize} number of {@code int}
  216.          * sample values.
  217.          *
  218.          * <p>The default implementation produces a sequential stream that repeatedly
  219.          * calls {@link #sample sample}(); the stream is limited to the given {@code streamSize}.
  220.          *
  221.          * @param streamSize Number of values to generate.
  222.          * @return a stream of {@code int} values.
  223.          */
  224.         default IntStream samples(long streamSize) {
  225.             return samples().limit(streamSize);
  226.         }
  227.     }
  228. }