001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    
018    package org.apache.commons.math.distribution;
019    
020    import java.io.Serializable;
021    
022    import org.apache.commons.math.exception.NotStrictlyPositiveException;
023    import org.apache.commons.math.exception.util.LocalizedFormats;
024    import org.apache.commons.math.util.FastMath;
025    
026    /**
027     * Implementation for the {@link ZipfDistribution}.
028     *
029     * @version $Id: ZipfDistributionImpl.java 1131229 2011-06-03 20:49:25Z luc $
030     */
031    public class ZipfDistributionImpl extends AbstractIntegerDistribution
032        implements ZipfDistribution, Serializable {
033        /** Serializable version identifier. */
034        private static final long serialVersionUID = -140627372283420404L;
035        /** Number of elements. */
036        private final int numberOfElements;
037        /** Exponent parameter of the distribution. */
038        private final double exponent;
039    
040        /**
041         * Create a new Zipf distribution with the given number of elements and
042         * exponent.
043         *
044         * @param numberOfElements Number of elements.
045         * @param exponent Exponent.
046         * @exception NotStrictlyPositiveException if {@code numberOfElements <= 0}
047         * or {@code exponent <= 0}.
048         */
049        public ZipfDistributionImpl(final int numberOfElements,
050                                    final double exponent) {
051            if (numberOfElements <= 0) {
052                throw new NotStrictlyPositiveException(LocalizedFormats.DIMENSION,
053                                                       numberOfElements);
054            }
055            if (exponent <= 0) {
056                throw new NotStrictlyPositiveException(LocalizedFormats.EXPONENT,
057                                                       exponent);
058            }
059    
060            this.numberOfElements = numberOfElements;
061            this.exponent = exponent;
062        }
063    
064        /**
065         * {@inheritDoc}
066         */
067        public int getNumberOfElements() {
068            return numberOfElements;
069        }
070    
071        /**
072         * {@inheritDoc}
073         */
074        public double getExponent() {
075            return exponent;
076        }
077    
078        /**
079         * The probability mass function {@code P(X = x)} for a Zipf distribution.
080         *
081         * @param x Value at which the probability density function is evaluated.
082         * @return the value of the probability mass function at {@code x}.
083         */
084        public double probability(final int x) {
085            if (x <= 0 || x > numberOfElements) {
086                return 0.0;
087            }
088    
089            return (1.0 / FastMath.pow(x, exponent)) / generalizedHarmonic(numberOfElements, exponent);
090        }
091    
092        /**
093         * The probability distribution function {@code P(X <= x)} for a
094         * Zipf distribution.
095         *
096         * @param x Value at which the PDF is evaluated.
097         * @return Zipf distribution function evaluated at {@code x}.
098         */
099        @Override
100        public double cumulativeProbability(final int x) {
101            if (x <= 0) {
102                return 0.0;
103            } else if (x >= numberOfElements) {
104                return 1.0;
105            }
106    
107            return generalizedHarmonic(x, exponent) / generalizedHarmonic(numberOfElements, exponent);
108        }
109    
110        /**
111         * Access the domain value lower bound, based on {@code p}, used to
112         * bracket a PDF root.
113         *
114         * @param p Desired probability for the critical value.
115         * @return the domain value lower bound, i.e. {@code P(X < 'lower bound') < p}.
116         */
117        @Override
118        protected int getDomainLowerBound(final double p) {
119            return 0;
120        }
121    
122        /**
123         * Access the domain value upper bound, based on {@code p}, used to
124         * bracket a PDF root.
125         *
126         * @param p Desired probability for the critical value
127         * @return the domain value upper bound, i.e. {@code P(X < 'upper bound') > p}.
128         */
129        @Override
130        protected int getDomainUpperBound(final double p) {
131            return numberOfElements;
132        }
133    
134        /**
135         * Calculates the Nth generalized harmonic number. See
136         * <a href="http://mathworld.wolfram.com/HarmonicSeries.html">Harmonic
137         * Series</a>.
138         *
139         * @param n Term in the series to calculate (must be larger than 1)
140         * @param m Exponent (special case {@code m = 1} is the harmonic series).
141         * @return the n<sup>th</sup> generalized harmonic number.
142         */
143        private double generalizedHarmonic(final int n, final double m) {
144            double value = 0;
145            for (int k = n; k > 0; --k) {
146                value += 1.0 / FastMath.pow(k, m);
147            }
148            return value;
149        }
150    
151        /**
152         * {@inheritDoc}
153         *
154         * The lower bound of the support is always 1 no matter the parameters.
155         *
156         * @return lower bound of the support (always 1)
157         */
158        @Override
159        public int getSupportLowerBound() {
160            return 1;
161        }
162    
163        /**
164         * {@inheritDoc}
165         *
166         * The upper bound of the support is the number of elements
167         *
168         * @return upper bound of the support
169         */
170        @Override
171        public int getSupportUpperBound() {
172            return getNumberOfElements();
173        }
174    
175        /**
176         * {@inheritDoc}
177         *
178         * For number of elements N and exponent s, the mean is
179         * <code>Hs1 / Hs</code> where
180         * <ul>
181         *  <li><code>Hs1 = generalizedHarmonic(N, s - 1)</code></li>
182         *  <li><code>Hs = generalizedHarmonic(N, s)</code></li>
183         * </ul>
184         *
185         * @return {@inheritDoc}
186         */
187        @Override
188        protected double calculateNumericalMean() {
189            final int N = getNumberOfElements();
190            final double s = getExponent();
191    
192            final double Hs1 = generalizedHarmonic(N, s - 1);
193            final double Hs = generalizedHarmonic(N, s);
194    
195            return Hs1 / Hs;
196        }
197    
198        /**
199         * {@inheritDoc}
200         *
201         * For number of elements N and exponent s, the mean is
202         * <code>(Hs2 / Hs) - (Hs1^2 / Hs^2)</code> where
203         * <ul>
204         *  <li><code>Hs2 = generalizedHarmonic(N, s - 2)</code></li>
205         *  <li><code>Hs1 = generalizedHarmonic(N, s - 1)</code></li>
206         *  <li><code>Hs = generalizedHarmonic(N, s)</code></li>
207         * </ul>
208         *
209         * @return {@inheritDoc}
210         */
211        @Override
212        protected double calculateNumericalVariance() {
213            final int N = getNumberOfElements();
214            final double s = getExponent();
215    
216            final double Hs2 = generalizedHarmonic(N, s - 2);
217            final double Hs1 = generalizedHarmonic(N, s - 1);
218            final double Hs = generalizedHarmonic(N, s);
219    
220            return (Hs2 / Hs) - ((Hs1 * Hs1) / (Hs * Hs));
221        }
222    }