001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018 package org.apache.commons.math.random;
019
020 import java.io.IOException;
021 import java.io.File;
022 import java.net.URL;
023 import java.util.List;
024
025 import org.apache.commons.math.exception.NullArgumentException;
026 import org.apache.commons.math.stat.descriptive.StatisticalSummary;
027 import org.apache.commons.math.stat.descriptive.SummaryStatistics;
028
029 /**
030 * Represents an <a href="http://random.mat.sbg.ac.at/~ste/dipl/node11.html">
031 * empirical probability distribution</a> -- a probability distribution derived
032 * from observed data without making any assumptions about the functional form
033 * of the population distribution that the data come from.<p>
034 * Implementations of this interface maintain data structures, called
035 * <i>distribution digests</i>, that describe empirical distributions and
036 * support the following operations: <ul>
037 * <li>loading the distribution from a file of observed data values</li>
038 * <li>dividing the input data into "bin ranges" and reporting bin frequency
039 * counts (data for histogram)</li>
040 * <li>reporting univariate statistics describing the full set of data values
041 * as well as the observations within each bin</li>
042 * <li>generating random values from the distribution</li>
043 * </ul>
044 * Applications can use <code>EmpiricalDistribution</code> implementations to
045 * build grouped frequency histograms representing the input data or to
046 * generate random values "like" those in the input file -- i.e., the values
047 * generated will follow the distribution of the values in the file.</p>
048 *
049 * @version $Id: EmpiricalDistribution.java 1132432 2011-06-05 14:59:29Z luc $
050 */
051 public interface EmpiricalDistribution {
052
053 /**
054 * Computes the empirical distribution from the provided
055 * array of numbers.
056 *
057 * @param dataArray the data array
058 */
059 void load(double[] dataArray);
060
061 /**
062 * Computes the empirical distribution from the input file.
063 *
064 * @param file the input file
065 * @throws IOException if an IO error occurs
066 * @throws NullArgumentException if file is null
067 */
068 void load(File file) throws IOException;
069
070 /**
071 * Computes the empirical distribution using data read from a URL.
072 *
073 * @param url url of the input file
074 * @throws IOException if an IO error occurs
075 * @throws NullArgumentException if url is null
076 */
077 void load(URL url) throws IOException, NullArgumentException;
078
079 /**
080 * Generates a random value from this distribution.
081 * <strong>Preconditions:</strong><ul>
082 * <li>the distribution must be loaded before invoking this method</li></ul>
083 * @return the random value.
084 *
085 * @throws IllegalStateException if the distribution has not been loaded
086 */
087 double getNextValue() throws IllegalStateException;
088
089
090 /**
091 * Returns a
092 * {@link org.apache.commons.math.stat.descriptive.StatisticalSummary}
093 * describing this distribution.
094 * <strong>Preconditions:</strong><ul>
095 * <li>the distribution must be loaded before invoking this method</li>
096 * </ul>
097 *
098 * @return the sample statistics
099 * @throws IllegalStateException if the distribution has not been loaded
100 */
101 StatisticalSummary getSampleStats() throws IllegalStateException;
102
103 /**
104 * Property indicating whether or not the distribution has been loaded.
105 *
106 * @return true if the distribution has been loaded
107 */
108 boolean isLoaded();
109
110 /**
111 * Returns the number of bins.
112 *
113 * @return the number of bins
114 */
115 int getBinCount();
116
117 /**
118 * Returns a list of
119 * {@link org.apache.commons.math.stat.descriptive.SummaryStatistics}
120 * containing statistics describing the values in each of the bins. The
121 * List is indexed on the bin number.
122 *
123 * @return List of bin statistics
124 */
125 List<SummaryStatistics> getBinStats();
126
127 /**
128 * Returns the array of upper bounds for the bins. Bins are: <br/>
129 * [min,upperBounds[0]],(upperBounds[0],upperBounds[1]],...,
130 * (upperBounds[binCount-2], upperBounds[binCount-1] = max].
131 *
132 * @return array of bin upper bounds
133 */
134 double[] getUpperBounds();
135
136 }