001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.math3.distribution;
018
019import java.io.Serializable;
020
021import org.apache.commons.math3.analysis.UnivariateFunction;
022import org.apache.commons.math3.analysis.solvers.UnivariateSolverUtils;
023import org.apache.commons.math3.exception.NotStrictlyPositiveException;
024import org.apache.commons.math3.exception.NumberIsTooLargeException;
025import org.apache.commons.math3.exception.OutOfRangeException;
026import org.apache.commons.math3.exception.util.LocalizedFormats;
027import org.apache.commons.math3.random.RandomGenerator;
028import org.apache.commons.math3.util.FastMath;
029
030/**
031 * Base class for probability distributions on the reals.
032 * Default implementations are provided for some of the methods
033 * that do not vary from distribution to distribution.
034 *
035 * @since 3.0
036 */
037public abstract class AbstractRealDistribution
038implements RealDistribution, Serializable {
039    /** Default accuracy. */
040    public static final double SOLVER_DEFAULT_ABSOLUTE_ACCURACY = 1e-6;
041    /** Serializable version identifier */
042    private static final long serialVersionUID = -38038050983108802L;
043     /**
044      * RandomData instance used to generate samples from the distribution.
045      * @deprecated As of 3.1, to be removed in 4.0. Please use the
046      * {@link #random} instance variable instead.
047      */
048    @Deprecated
049    protected org.apache.commons.math3.random.RandomDataImpl randomData =
050        new org.apache.commons.math3.random.RandomDataImpl();
051
052    /**
053     * RNG instance used to generate samples from the distribution.
054     * @since 3.1
055     */
056    protected final RandomGenerator random;
057
058    /** Solver absolute accuracy for inverse cumulative computation */
059    private double solverAbsoluteAccuracy = SOLVER_DEFAULT_ABSOLUTE_ACCURACY;
060
061    /**
062     * @deprecated As of 3.1, to be removed in 4.0. Please use
063     * {@link #AbstractRealDistribution(RandomGenerator)} instead.
064     */
065    @Deprecated
066    protected AbstractRealDistribution() {
067        // Legacy users are only allowed to access the deprecated "randomData".
068        // New users are forbidden to use this constructor.
069        random = null;
070    }
071    /**
072     * @param rng Random number generator.
073     * @since 3.1
074     */
075    protected AbstractRealDistribution(RandomGenerator rng) {
076        random = rng;
077    }
078
079    /**
080     * {@inheritDoc}
081     *
082     * The default implementation uses the identity
083     * <p>{@code P(x0 < X <= x1) = P(X <= x1) - P(X <= x0)}</p>
084     *
085     * @deprecated As of 3.1 (to be removed in 4.0). Please use
086     * {@link #probability(double,double)} instead.
087     */
088    @Deprecated
089    public double cumulativeProbability(double x0, double x1) throws NumberIsTooLargeException {
090        return probability(x0, x1);
091    }
092
093    /**
094     * For a random variable {@code X} whose values are distributed according
095     * to this distribution, this method returns {@code P(x0 < X <= x1)}.
096     *
097     * @param x0 Lower bound (excluded).
098     * @param x1 Upper bound (included).
099     * @return the probability that a random variable with this distribution
100     * takes a value between {@code x0} and {@code x1}, excluding the lower
101     * and including the upper endpoint.
102     * @throws NumberIsTooLargeException if {@code x0 > x1}.
103     *
104     * The default implementation uses the identity
105     * {@code P(x0 < X <= x1) = P(X <= x1) - P(X <= x0)}
106     *
107     * @since 3.1
108     */
109    public double probability(double x0,
110                              double x1) {
111        if (x0 > x1) {
112            throw new NumberIsTooLargeException(LocalizedFormats.LOWER_ENDPOINT_ABOVE_UPPER_ENDPOINT,
113                                                x0, x1, true);
114        }
115        return cumulativeProbability(x1) - cumulativeProbability(x0);
116    }
117
118    /**
119     * {@inheritDoc}
120     *
121     * The default implementation returns
122     * <ul>
123     * <li>{@link #getSupportLowerBound()} for {@code p = 0},</li>
124     * <li>{@link #getSupportUpperBound()} for {@code p = 1}.</li>
125     * </ul>
126     */
127    public double inverseCumulativeProbability(final double p) throws OutOfRangeException {
128        /*
129         * IMPLEMENTATION NOTES
130         * --------------------
131         * Where applicable, use is made of the one-sided Chebyshev inequality
132         * to bracket the root. This inequality states that
133         * P(X - mu >= k * sig) <= 1 / (1 + k^2),
134         * mu: mean, sig: standard deviation. Equivalently
135         * 1 - P(X < mu + k * sig) <= 1 / (1 + k^2),
136         * F(mu + k * sig) >= k^2 / (1 + k^2).
137         *
138         * For k = sqrt(p / (1 - p)), we find
139         * F(mu + k * sig) >= p,
140         * and (mu + k * sig) is an upper-bound for the root.
141         *
142         * Then, introducing Y = -X, mean(Y) = -mu, sd(Y) = sig, and
143         * P(Y >= -mu + k * sig) <= 1 / (1 + k^2),
144         * P(-X >= -mu + k * sig) <= 1 / (1 + k^2),
145         * P(X <= mu - k * sig) <= 1 / (1 + k^2),
146         * F(mu - k * sig) <= 1 / (1 + k^2).
147         *
148         * For k = sqrt((1 - p) / p), we find
149         * F(mu - k * sig) <= p,
150         * and (mu - k * sig) is a lower-bound for the root.
151         *
152         * In cases where the Chebyshev inequality does not apply, geometric
153         * progressions 1, 2, 4, ... and -1, -2, -4, ... are used to bracket
154         * the root.
155         */
156        if (p < 0.0 || p > 1.0) {
157            throw new OutOfRangeException(p, 0, 1);
158        }
159
160        double lowerBound = getSupportLowerBound();
161        if (p == 0.0) {
162            return lowerBound;
163        }
164
165        double upperBound = getSupportUpperBound();
166        if (p == 1.0) {
167            return upperBound;
168        }
169
170        final double mu = getNumericalMean();
171        final double sig = FastMath.sqrt(getNumericalVariance());
172        final boolean chebyshevApplies;
173        chebyshevApplies = !(Double.isInfinite(mu) || Double.isNaN(mu) ||
174                             Double.isInfinite(sig) || Double.isNaN(sig));
175
176        if (lowerBound == Double.NEGATIVE_INFINITY) {
177            if (chebyshevApplies) {
178                lowerBound = mu - sig * FastMath.sqrt((1. - p) / p);
179            } else {
180                lowerBound = -1.0;
181                while (cumulativeProbability(lowerBound) >= p) {
182                    lowerBound *= 2.0;
183                }
184            }
185        }
186
187        if (upperBound == Double.POSITIVE_INFINITY) {
188            if (chebyshevApplies) {
189                upperBound = mu + sig * FastMath.sqrt(p / (1. - p));
190            } else {
191                upperBound = 1.0;
192                while (cumulativeProbability(upperBound) < p) {
193                    upperBound *= 2.0;
194                }
195            }
196        }
197
198        final UnivariateFunction toSolve = new UnivariateFunction() {
199            /** {@inheritDoc} */
200            public double value(final double x) {
201                return cumulativeProbability(x) - p;
202            }
203        };
204
205        double x = UnivariateSolverUtils.solve(toSolve,
206                                                   lowerBound,
207                                                   upperBound,
208                                                   getSolverAbsoluteAccuracy());
209
210        if (!isSupportConnected()) {
211            /* Test for plateau. */
212            final double dx = getSolverAbsoluteAccuracy();
213            if (x - dx >= getSupportLowerBound()) {
214                double px = cumulativeProbability(x);
215                if (cumulativeProbability(x - dx) == px) {
216                    upperBound = x;
217                    while (upperBound - lowerBound > dx) {
218                        final double midPoint = 0.5 * (lowerBound + upperBound);
219                        if (cumulativeProbability(midPoint) < px) {
220                            lowerBound = midPoint;
221                        } else {
222                            upperBound = midPoint;
223                        }
224                    }
225                    return upperBound;
226                }
227            }
228        }
229        return x;
230    }
231
232    /**
233     * Returns the solver absolute accuracy for inverse cumulative computation.
234     * You can override this method in order to use a Brent solver with an
235     * absolute accuracy different from the default.
236     *
237     * @return the maximum absolute error in inverse cumulative probability estimates
238     */
239    protected double getSolverAbsoluteAccuracy() {
240        return solverAbsoluteAccuracy;
241    }
242
243    /** {@inheritDoc} */
244    public void reseedRandomGenerator(long seed) {
245        random.setSeed(seed);
246        randomData.reSeed(seed);
247    }
248
249    /**
250     * {@inheritDoc}
251     *
252     * The default implementation uses the
253     * <a href="http://en.wikipedia.org/wiki/Inverse_transform_sampling">
254     * inversion method.
255     * </a>
256     */
257    public double sample() {
258        return inverseCumulativeProbability(random.nextDouble());
259    }
260
261    /**
262     * {@inheritDoc}
263     *
264     * The default implementation generates the sample by calling
265     * {@link #sample()} in a loop.
266     */
267    public double[] sample(int sampleSize) {
268        if (sampleSize <= 0) {
269            throw new NotStrictlyPositiveException(LocalizedFormats.NUMBER_OF_SAMPLES,
270                    sampleSize);
271        }
272        double[] out = new double[sampleSize];
273        for (int i = 0; i < sampleSize; i++) {
274            out[i] = sample();
275        }
276        return out;
277    }
278
279    /**
280     * {@inheritDoc}
281     *
282     * @return zero.
283     * @since 3.1
284     */
285    public double probability(double x) {
286        return 0d;
287    }
288
289    /**
290     * Returns the natural logarithm of the probability density function (PDF) of this distribution
291     * evaluated at the specified point {@code x}. In general, the PDF is the derivative of the
292     * {@link #cumulativeProbability(double) CDF}. If the derivative does not exist at {@code x},
293     * then an appropriate replacement should be returned, e.g. {@code Double.POSITIVE_INFINITY},
294     * {@code Double.NaN}, or the limit inferior or limit superior of the difference quotient. Note
295     * that due to the floating point precision and under/overflow issues, this method will for some
296     * distributions be more precise and faster than computing the logarithm of
297     * {@link #density(double)}. The default implementation simply computes the logarithm of
298     * {@code density(x)}.
299     *
300     * @param x the point at which the PDF is evaluated
301     * @return the logarithm of the value of the probability density function at point {@code x}
302     */
303    public double logDensity(double x) {
304        return FastMath.log(density(x));
305    }
306}
307