EnumeratedIntegerDistribution.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.apache.commons.math4.legacy.distribution;

  18. import java.util.ArrayList;
  19. import java.util.LinkedHashMap;
  20. import java.util.List;
  21. import java.util.Map;
  22. import java.util.Map.Entry;

  23. import org.apache.commons.statistics.distribution.DiscreteDistribution;
  24. import org.apache.commons.math4.legacy.exception.DimensionMismatchException;
  25. import org.apache.commons.math4.legacy.exception.MathArithmeticException;
  26. import org.apache.commons.math4.legacy.exception.NotANumberException;
  27. import org.apache.commons.math4.legacy.exception.NotFiniteNumberException;
  28. import org.apache.commons.math4.legacy.exception.NotPositiveException;
  29. import org.apache.commons.rng.UniformRandomProvider;
  30. import org.apache.commons.math4.legacy.core.Pair;

  31. /**
  32.  * <p>Implementation of an integer-valued {@link EnumeratedDistribution}.</p>
  33.  *
  34.  * <p>Values with zero-probability are allowed but they do not extend the
  35.  * support.<br>
  36.  * Duplicate values are allowed. Probabilities of duplicate values are combined
  37.  * when computing cumulative probabilities and statistics.</p>
  38.  *
  39.  * @since 3.2
  40.  */
  41. public class EnumeratedIntegerDistribution extends AbstractIntegerDistribution {
  42.     /**
  43.      * {@link EnumeratedDistribution} instance (using the {@link Integer} wrapper)
  44.      * used to generate the pmf.
  45.      */
  46.     protected final EnumeratedDistribution<Integer> innerDistribution;

  47.     /**
  48.      * Create a discrete distribution.
  49.      *
  50.      * @param singletons array of random variable values.
  51.      * @param probabilities array of probabilities.
  52.      * @throws DimensionMismatchException if
  53.      * {@code singletons.length != probabilities.length}
  54.      * @throws NotPositiveException if any of the probabilities are negative.
  55.      * @throws NotFiniteNumberException if any of the probabilities are infinite.
  56.      * @throws NotANumberException if any of the probabilities are NaN.
  57.      * @throws MathArithmeticException all of the probabilities are 0.
  58.      */
  59.     public EnumeratedIntegerDistribution(final int[] singletons,
  60.                                          final double[] probabilities)
  61.         throws DimensionMismatchException,
  62.                NotPositiveException,
  63.                MathArithmeticException,
  64.                NotFiniteNumberException,
  65.                NotANumberException {
  66.         innerDistribution = new EnumeratedDistribution<>(createDistribution(singletons,
  67.                                                                             probabilities));
  68.     }

  69.     /**
  70.      * Create a discrete integer-valued distribution from the input data.
  71.      * Values are assigned mass based on their frequency.
  72.      *
  73.      * @param data input dataset
  74.      */
  75.     public EnumeratedIntegerDistribution(final int[] data) {
  76.         final Map<Integer, Integer> dataMap = new LinkedHashMap<>();
  77.         for (int value : data) {
  78.             dataMap.merge(value, 1, Integer::sum);
  79.         }
  80.         final int massPoints = dataMap.size();
  81.         final double denom = data.length;
  82.         final int[] values = new int[massPoints];
  83.         final double[] probabilities = new double[massPoints];
  84.         int index = 0;
  85.         for (Entry<Integer, Integer> entry : dataMap.entrySet()) {
  86.             values[index] = entry.getKey();
  87.             probabilities[index] = entry.getValue().intValue() / denom;
  88.             index++;
  89.         }
  90.         innerDistribution = new EnumeratedDistribution<>(createDistribution(values, probabilities));
  91.     }

  92.     /**
  93.      * Create the list of Pairs representing the distribution from singletons and probabilities.
  94.      *
  95.      * @param singletons values
  96.      * @param probabilities probabilities
  97.      * @return list of value/probability pairs
  98.      */
  99.     private static List<Pair<Integer, Double>>  createDistribution(int[] singletons, double[] probabilities) {
  100.         if (singletons.length != probabilities.length) {
  101.             throw new DimensionMismatchException(probabilities.length, singletons.length);
  102.         }

  103.         final List<Pair<Integer, Double>> samples = new ArrayList<>(singletons.length);

  104.         for (int i = 0; i < singletons.length; i++) {
  105.             samples.add(new Pair<>(singletons[i], probabilities[i]));
  106.         }
  107.         return samples;
  108.     }

  109.     /**
  110.      * {@inheritDoc}
  111.      */
  112.     @Override
  113.     public double probability(final int x) {
  114.         return innerDistribution.probability(x);
  115.     }

  116.     /**
  117.      * {@inheritDoc}
  118.      */
  119.     @Override
  120.     public double cumulativeProbability(final int x) {
  121.         double probability = 0;

  122.         for (final Pair<Integer, Double> sample : innerDistribution.getPmf()) {
  123.             if (sample.getKey() <= x) {
  124.                 probability += sample.getValue();
  125.             }
  126.         }

  127.         return probability;
  128.     }

  129.     /**
  130.      * {@inheritDoc}
  131.      *
  132.      * @return {@code sum(singletons[i] * probabilities[i])}
  133.      */
  134.     @Override
  135.     public double getMean() {
  136.         double mean = 0;

  137.         for (final Pair<Integer, Double> sample : innerDistribution.getPmf()) {
  138.             mean += sample.getValue() * sample.getKey();
  139.         }

  140.         return mean;
  141.     }

  142.     /**
  143.      * {@inheritDoc}
  144.      *
  145.      * @return {@code sum((singletons[i] - mean) ^ 2 * probabilities[i])}
  146.      */
  147.     @Override
  148.     public double getVariance() {
  149.         double mean = 0;
  150.         double meanOfSquares = 0;

  151.         for (final Pair<Integer, Double> sample : innerDistribution.getPmf()) {
  152.             mean += sample.getValue() * sample.getKey();
  153.             meanOfSquares += sample.getValue() * sample.getKey() * sample.getKey();
  154.         }

  155.         return meanOfSquares - mean * mean;
  156.     }

  157.     /**
  158.      * {@inheritDoc}
  159.      *
  160.      * Returns the lowest value with non-zero probability.
  161.      *
  162.      * @return the lowest value with non-zero probability.
  163.      */
  164.     @Override
  165.     public int getSupportLowerBound() {
  166.         int min = Integer.MAX_VALUE;
  167.         for (final Pair<Integer, Double> sample : innerDistribution.getPmf()) {
  168.             if (sample.getKey() < min && sample.getValue() > 0) {
  169.                 min = sample.getKey();
  170.             }
  171.         }

  172.         return min;
  173.     }

  174.     /**
  175.      * {@inheritDoc}
  176.      *
  177.      * Returns the highest value with non-zero probability.
  178.      *
  179.      * @return the highest value with non-zero probability.
  180.      */
  181.     @Override
  182.     public int getSupportUpperBound() {
  183.         int max = Integer.MIN_VALUE;
  184.         for (final Pair<Integer, Double> sample : innerDistribution.getPmf()) {
  185.             if (sample.getKey() > max && sample.getValue() > 0) {
  186.                 max = sample.getKey();
  187.             }
  188.         }

  189.         return max;
  190.     }

  191.     /**
  192.      * {@inheritDoc}
  193.      *
  194.      * Refer to {@link EnumeratedDistribution.Sampler} for implementation details.
  195.      */
  196.     @Override
  197.     public DiscreteDistribution.Sampler createSampler(final UniformRandomProvider rng) {
  198.         return innerDistribution.createSampler(rng)::sample;
  199.     }
  200. }