001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.math3.distribution;
018
019import java.io.Serializable;
020
021import org.apache.commons.math3.analysis.UnivariateFunction;
022import org.apache.commons.math3.analysis.solvers.UnivariateSolverUtils;
023import org.apache.commons.math3.exception.NotStrictlyPositiveException;
024import org.apache.commons.math3.exception.NumberIsTooLargeException;
025import org.apache.commons.math3.exception.OutOfRangeException;
026import org.apache.commons.math3.exception.util.LocalizedFormats;
027import org.apache.commons.math3.random.RandomGenerator;
028import org.apache.commons.math3.random.RandomDataImpl;
029import org.apache.commons.math3.util.FastMath;
030
031/**
032 * Base class for probability distributions on the reals.
033 * Default implementations are provided for some of the methods
034 * that do not vary from distribution to distribution.
035 *
036 * @version $Id: AbstractRealDistribution.java 1533974 2013-10-20 20:42:41Z psteitz $
037 * @since 3.0
038 */
039public abstract class AbstractRealDistribution
040implements RealDistribution, Serializable {
041    /** Default accuracy. */
042    public static final double SOLVER_DEFAULT_ABSOLUTE_ACCURACY = 1e-6;
043    /** Serializable version identifier */
044    private static final long serialVersionUID = -38038050983108802L;
045     /**
046      * RandomData instance used to generate samples from the distribution.
047      * @deprecated As of 3.1, to be removed in 4.0. Please use the
048      * {@link #random} instance variable instead.
049      */
050    @Deprecated
051    protected RandomDataImpl randomData = new RandomDataImpl();
052
053    /**
054     * RNG instance used to generate samples from the distribution.
055     * @since 3.1
056     */
057    protected final RandomGenerator random;
058
059    /** Solver absolute accuracy for inverse cumulative computation */
060    private double solverAbsoluteAccuracy = SOLVER_DEFAULT_ABSOLUTE_ACCURACY;
061
062    /**
063     * @deprecated As of 3.1, to be removed in 4.0. Please use
064     * {@link #AbstractRealDistribution(RandomGenerator)} instead.
065     */
066    @Deprecated
067    protected AbstractRealDistribution() {
068        // Legacy users are only allowed to access the deprecated "randomData".
069        // New users are forbidden to use this constructor.
070        random = null;
071    }
072    /**
073     * @param rng Random number generator.
074     * @since 3.1
075     */
076    protected AbstractRealDistribution(RandomGenerator rng) {
077        random = rng;
078    }
079
080    /**
081     * {@inheritDoc}
082     *
083     * The default implementation uses the identity
084     * <p>{@code P(x0 < X <= x1) = P(X <= x1) - P(X <= x0)}</p>
085     *
086     * @deprecated As of 3.1 (to be removed in 4.0). Please use
087     * {@link #probability(double,double)} instead.
088     */
089    @Deprecated
090    public double cumulativeProbability(double x0, double x1) throws NumberIsTooLargeException {
091        return probability(x0, x1);
092    }
093
094    /**
095     * For a random variable {@code X} whose values are distributed according
096     * to this distribution, this method returns {@code P(x0 < X <= x1)}.
097     *
098     * @param x0 Lower bound (excluded).
099     * @param x1 Upper bound (included).
100     * @return the probability that a random variable with this distribution
101     * takes a value between {@code x0} and {@code x1}, excluding the lower
102     * and including the upper endpoint.
103     * @throws NumberIsTooLargeException if {@code x0 > x1}.
104     *
105     * The default implementation uses the identity
106     * {@code P(x0 < X <= x1) = P(X <= x1) - P(X <= x0)}
107     *
108     * @since 3.1
109     */
110    public double probability(double x0,
111                              double x1) {
112        if (x0 > x1) {
113            throw new NumberIsTooLargeException(LocalizedFormats.LOWER_ENDPOINT_ABOVE_UPPER_ENDPOINT,
114                                                x0, x1, true);
115        }
116        return cumulativeProbability(x1) - cumulativeProbability(x0);
117    }
118
119    /**
120     * {@inheritDoc}
121     *
122     * The default implementation returns
123     * <ul>
124     * <li>{@link #getSupportLowerBound()} for {@code p = 0},</li>
125     * <li>{@link #getSupportUpperBound()} for {@code p = 1}.</li>
126     * </ul>
127     */
128    public double inverseCumulativeProbability(final double p) throws OutOfRangeException {
129        /*
130         * IMPLEMENTATION NOTES
131         * --------------------
132         * Where applicable, use is made of the one-sided Chebyshev inequality
133         * to bracket the root. This inequality states that
134         * P(X - mu >= k * sig) <= 1 / (1 + k^2),
135         * mu: mean, sig: standard deviation. Equivalently
136         * 1 - P(X < mu + k * sig) <= 1 / (1 + k^2),
137         * F(mu + k * sig) >= k^2 / (1 + k^2).
138         *
139         * For k = sqrt(p / (1 - p)), we find
140         * F(mu + k * sig) >= p,
141         * and (mu + k * sig) is an upper-bound for the root.
142         *
143         * Then, introducing Y = -X, mean(Y) = -mu, sd(Y) = sig, and
144         * P(Y >= -mu + k * sig) <= 1 / (1 + k^2),
145         * P(-X >= -mu + k * sig) <= 1 / (1 + k^2),
146         * P(X <= mu - k * sig) <= 1 / (1 + k^2),
147         * F(mu - k * sig) <= 1 / (1 + k^2).
148         *
149         * For k = sqrt((1 - p) / p), we find
150         * F(mu - k * sig) <= p,
151         * and (mu - k * sig) is a lower-bound for the root.
152         *
153         * In cases where the Chebyshev inequality does not apply, geometric
154         * progressions 1, 2, 4, ... and -1, -2, -4, ... are used to bracket
155         * the root.
156         */
157        if (p < 0.0 || p > 1.0) {
158            throw new OutOfRangeException(p, 0, 1);
159        }
160
161        double lowerBound = getSupportLowerBound();
162        if (p == 0.0) {
163            return lowerBound;
164        }
165
166        double upperBound = getSupportUpperBound();
167        if (p == 1.0) {
168            return upperBound;
169        }
170
171        final double mu = getNumericalMean();
172        final double sig = FastMath.sqrt(getNumericalVariance());
173        final boolean chebyshevApplies;
174        chebyshevApplies = !(Double.isInfinite(mu) || Double.isNaN(mu) ||
175                             Double.isInfinite(sig) || Double.isNaN(sig));
176
177        if (lowerBound == Double.NEGATIVE_INFINITY) {
178            if (chebyshevApplies) {
179                lowerBound = mu - sig * FastMath.sqrt((1. - p) / p);
180            } else {
181                lowerBound = -1.0;
182                while (cumulativeProbability(lowerBound) >= p) {
183                    lowerBound *= 2.0;
184                }
185            }
186        }
187
188        if (upperBound == Double.POSITIVE_INFINITY) {
189            if (chebyshevApplies) {
190                upperBound = mu + sig * FastMath.sqrt(p / (1. - p));
191            } else {
192                upperBound = 1.0;
193                while (cumulativeProbability(upperBound) < p) {
194                    upperBound *= 2.0;
195                }
196            }
197        }
198
199        final UnivariateFunction toSolve = new UnivariateFunction() {
200
201            public double value(final double x) {
202                return cumulativeProbability(x) - p;
203            }
204        };
205
206        double x = UnivariateSolverUtils.solve(toSolve,
207                                                   lowerBound,
208                                                   upperBound,
209                                                   getSolverAbsoluteAccuracy());
210
211        if (!isSupportConnected()) {
212            /* Test for plateau. */
213            final double dx = getSolverAbsoluteAccuracy();
214            if (x - dx >= getSupportLowerBound()) {
215                double px = cumulativeProbability(x);
216                if (cumulativeProbability(x - dx) == px) {
217                    upperBound = x;
218                    while (upperBound - lowerBound > dx) {
219                        final double midPoint = 0.5 * (lowerBound + upperBound);
220                        if (cumulativeProbability(midPoint) < px) {
221                            lowerBound = midPoint;
222                        } else {
223                            upperBound = midPoint;
224                        }
225                    }
226                    return upperBound;
227                }
228            }
229        }
230        return x;
231    }
232
233    /**
234     * Returns the solver absolute accuracy for inverse cumulative computation.
235     * You can override this method in order to use a Brent solver with an
236     * absolute accuracy different from the default.
237     *
238     * @return the maximum absolute error in inverse cumulative probability estimates
239     */
240    protected double getSolverAbsoluteAccuracy() {
241        return solverAbsoluteAccuracy;
242    }
243
244    /** {@inheritDoc} */
245    public void reseedRandomGenerator(long seed) {
246        random.setSeed(seed);
247        randomData.reSeed(seed);
248    }
249
250    /**
251     * {@inheritDoc}
252     *
253     * The default implementation uses the
254     * <a href="http://en.wikipedia.org/wiki/Inverse_transform_sampling">
255     * inversion method.
256     * </a>
257     */
258    public double sample() {
259        return inverseCumulativeProbability(random.nextDouble());
260    }
261
262    /**
263     * {@inheritDoc}
264     *
265     * The default implementation generates the sample by calling
266     * {@link #sample()} in a loop.
267     */
268    public double[] sample(int sampleSize) {
269        if (sampleSize <= 0) {
270            throw new NotStrictlyPositiveException(LocalizedFormats.NUMBER_OF_SAMPLES,
271                    sampleSize);
272        }
273        double[] out = new double[sampleSize];
274        for (int i = 0; i < sampleSize; i++) {
275            out[i] = sample();
276        }
277        return out;
278    }
279
280    /**
281     * {@inheritDoc}
282     *
283     * @return zero.
284     * @since 3.1
285     */
286    public double probability(double x) {
287        return 0d;
288    }
289
290    /**
291     * Returns the natural logarithm of the probability density function (PDF) of this distribution
292     * evaluated at the specified point {@code x}. In general, the PDF is the derivative of the
293     * {@link #cumulativeProbability(double) CDF}. If the derivative does not exist at {@code x},
294     * then an appropriate replacement should be returned, e.g. {@code Double.POSITIVE_INFINITY},
295     * {@code Double.NaN}, or the limit inferior or limit superior of the difference quotient. Note
296     * that due to the floating point precision and under/overflow issues, this method will for some
297     * distributions be more precise and faster than computing the logarithm of
298     * {@link #density(double)}. The default implementation simply computes the logarithm of
299     * {@code density(x)}.
300     *
301     * @param x the point at which the PDF is evaluated
302     * @return the logarithm of the value of the probability density function at point {@code x}
303     */
304    public double logDensity(double x) {
305        return FastMath.log(density(x));
306    }
307}
308