001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.math3.random; 019 020import java.io.BufferedReader; 021import java.io.File; 022import java.io.FileInputStream; 023import java.io.IOException; 024import java.io.InputStream; 025import java.io.InputStreamReader; 026import java.net.URL; 027import java.nio.charset.Charset; 028import java.util.ArrayList; 029import java.util.List; 030 031import org.apache.commons.math3.distribution.AbstractRealDistribution; 032import org.apache.commons.math3.distribution.ConstantRealDistribution; 033import org.apache.commons.math3.distribution.NormalDistribution; 034import org.apache.commons.math3.distribution.RealDistribution; 035import org.apache.commons.math3.exception.MathIllegalStateException; 036import org.apache.commons.math3.exception.MathInternalError; 037import org.apache.commons.math3.exception.NullArgumentException; 038import org.apache.commons.math3.exception.OutOfRangeException; 039import org.apache.commons.math3.exception.ZeroException; 040import org.apache.commons.math3.exception.NotStrictlyPositiveException; 041import org.apache.commons.math3.exception.util.LocalizedFormats; 042import org.apache.commons.math3.stat.descriptive.StatisticalSummary; 043import org.apache.commons.math3.stat.descriptive.SummaryStatistics; 044import org.apache.commons.math3.util.FastMath; 045import org.apache.commons.math3.util.MathUtils; 046 047/** 048 * <p>Represents an <a href="http://http://en.wikipedia.org/wiki/Empirical_distribution_function"> 049 * empirical probability distribution</a> -- a probability distribution derived 050 * from observed data without making any assumptions about the functional form 051 * of the population distribution that the data come from.</p> 052 * 053 * <p>An <code>EmpiricalDistribution</code> maintains data structures, called 054 * <i>distribution digests</i>, that describe empirical distributions and 055 * support the following operations: <ul> 056 * <li>loading the distribution from a file of observed data values</li> 057 * <li>dividing the input data into "bin ranges" and reporting bin frequency 058 * counts (data for histogram)</li> 059 * <li>reporting univariate statistics describing the full set of data values 060 * as well as the observations within each bin</li> 061 * <li>generating random values from the distribution</li> 062 * </ul> 063 * Applications can use <code>EmpiricalDistribution</code> to build grouped 064 * frequency histograms representing the input data or to generate random values 065 * "like" those in the input file -- i.e., the values generated will follow the 066 * distribution of the values in the file.</p> 067 * 068 * <p>The implementation uses what amounts to the 069 * <a href="http://nedwww.ipac.caltech.edu/level5/March02/Silverman/Silver2_6.html"> 070 * Variable Kernel Method</a> with Gaussian smoothing:<p> 071 * <strong>Digesting the input file</strong> 072 * <ol><li>Pass the file once to compute min and max.</li> 073 * <li>Divide the range from min-max into <code>binCount</code> "bins."</li> 074 * <li>Pass the data file again, computing bin counts and univariate 075 * statistics (mean, std dev.) for each of the bins </li> 076 * <li>Divide the interval (0,1) into subintervals associated with the bins, 077 * with the length of a bin's subinterval proportional to its count.</li></ol> 078 * <strong>Generating random values from the distribution</strong><ol> 079 * <li>Generate a uniformly distributed value in (0,1) </li> 080 * <li>Select the subinterval to which the value belongs. 081 * <li>Generate a random Gaussian value with mean = mean of the associated 082 * bin and std dev = std dev of associated bin.</li></ol></p> 083 * 084 * <p>EmpiricalDistribution implements the {@link RealDistribution} interface 085 * as follows. Given x within the range of values in the dataset, let B 086 * be the bin containing x and let K be the within-bin kernel for B. Let P(B-) 087 * be the sum of the probabilities of the bins below B and let K(B) be the 088 * mass of B under K (i.e., the integral of the kernel density over B). Then 089 * set P(X < x) = P(B-) + P(B) * K(x) / K(B) where K(x) is the kernel distribution 090 * evaluated at x. This results in a cdf that matches the grouped frequency 091 * distribution at the bin endpoints and interpolates within bins using 092 * within-bin kernels.</p> 093 * 094 *<strong>USAGE NOTES:</strong><ul> 095 *<li>The <code>binCount</code> is set by default to 1000. A good rule of thumb 096 * is to set the bin count to approximately the length of the input file divided 097 * by 10. </li> 098 *<li>The input file <i>must</i> be a plain text file containing one valid numeric 099 * entry per line.</li> 100 * </ul></p> 101 * 102 */ 103public class EmpiricalDistribution extends AbstractRealDistribution { 104 105 /** Default bin count */ 106 public static final int DEFAULT_BIN_COUNT = 1000; 107 108 /** Character set for file input */ 109 private static final String FILE_CHARSET = "US-ASCII"; 110 111 /** Serializable version identifier */ 112 private static final long serialVersionUID = 5729073523949762654L; 113 114 /** RandomDataGenerator instance to use in repeated calls to getNext() */ 115 protected final RandomDataGenerator randomData; 116 117 /** List of SummaryStatistics objects characterizing the bins */ 118 private final List<SummaryStatistics> binStats; 119 120 /** Sample statistics */ 121 private SummaryStatistics sampleStats = null; 122 123 /** Max loaded value */ 124 private double max = Double.NEGATIVE_INFINITY; 125 126 /** Min loaded value */ 127 private double min = Double.POSITIVE_INFINITY; 128 129 /** Grid size */ 130 private double delta = 0d; 131 132 /** number of bins */ 133 private final int binCount; 134 135 /** is the distribution loaded? */ 136 private boolean loaded = false; 137 138 /** upper bounds of subintervals in (0,1) "belonging" to the bins */ 139 private double[] upperBounds = null; 140 141 /** 142 * Creates a new EmpiricalDistribution with the default bin count. 143 */ 144 public EmpiricalDistribution() { 145 this(DEFAULT_BIN_COUNT); 146 } 147 148 /** 149 * Creates a new EmpiricalDistribution with the specified bin count. 150 * 151 * @param binCount number of bins. Must be strictly positive. 152 * @throws NotStrictlyPositiveException if {@code binCount <= 0}. 153 */ 154 public EmpiricalDistribution(int binCount) { 155 this(binCount, new RandomDataGenerator()); 156 } 157 158 /** 159 * Creates a new EmpiricalDistribution with the specified bin count using the 160 * provided {@link RandomGenerator} as the source of random data. 161 * 162 * @param binCount number of bins. Must be strictly positive. 163 * @param generator random data generator (may be null, resulting in default JDK generator) 164 * @throws NotStrictlyPositiveException if {@code binCount <= 0}. 165 * @since 3.0 166 */ 167 public EmpiricalDistribution(int binCount, RandomGenerator generator) { 168 this(binCount, new RandomDataGenerator(generator)); 169 } 170 171 /** 172 * Creates a new EmpiricalDistribution with default bin count using the 173 * provided {@link RandomGenerator} as the source of random data. 174 * 175 * @param generator random data generator (may be null, resulting in default JDK generator) 176 * @since 3.0 177 */ 178 public EmpiricalDistribution(RandomGenerator generator) { 179 this(DEFAULT_BIN_COUNT, generator); 180 } 181 182 /** 183 * Creates a new EmpiricalDistribution with the specified bin count using the 184 * provided {@link RandomDataImpl} instance as the source of random data. 185 * 186 * @param binCount number of bins 187 * @param randomData random data generator (may be null, resulting in default JDK generator) 188 * @since 3.0 189 * @deprecated As of 3.1. Please use {@link #EmpiricalDistribution(int,RandomGenerator)} instead. 190 */ 191 @Deprecated 192 public EmpiricalDistribution(int binCount, RandomDataImpl randomData) { 193 this(binCount, randomData.getDelegate()); 194 } 195 196 /** 197 * Creates a new EmpiricalDistribution with default bin count using the 198 * provided {@link RandomDataImpl} as the source of random data. 199 * 200 * @param randomData random data generator (may be null, resulting in default JDK generator) 201 * @since 3.0 202 * @deprecated As of 3.1. Please use {@link #EmpiricalDistribution(RandomGenerator)} instead. 203 */ 204 @Deprecated 205 public EmpiricalDistribution(RandomDataImpl randomData) { 206 this(DEFAULT_BIN_COUNT, randomData); 207 } 208 209 /** 210 * Private constructor to allow lazy initialisation of the RNG contained 211 * in the {@link #randomData} instance variable. 212 * 213 * @param binCount number of bins. Must be strictly positive. 214 * @param randomData Random data generator. 215 * @throws NotStrictlyPositiveException if {@code binCount <= 0}. 216 */ 217 private EmpiricalDistribution(int binCount, 218 RandomDataGenerator randomData) { 219 super(randomData.getRandomGenerator()); 220 if (binCount <= 0) { 221 throw new NotStrictlyPositiveException(binCount); 222 } 223 this.binCount = binCount; 224 this.randomData = randomData; 225 binStats = new ArrayList<SummaryStatistics>(); 226 } 227 228 /** 229 * Computes the empirical distribution from the provided 230 * array of numbers. 231 * 232 * @param in the input data array 233 * @exception NullArgumentException if in is null 234 */ 235 public void load(double[] in) throws NullArgumentException { 236 DataAdapter da = new ArrayDataAdapter(in); 237 try { 238 da.computeStats(); 239 // new adapter for the second pass 240 fillBinStats(new ArrayDataAdapter(in)); 241 } catch (IOException ex) { 242 // Can't happen 243 throw new MathInternalError(); 244 } 245 loaded = true; 246 247 } 248 249 /** 250 * Computes the empirical distribution using data read from a URL. 251 * 252 * <p>The input file <i>must</i> be an ASCII text file containing one 253 * valid numeric entry per line.</p> 254 * 255 * @param url url of the input file 256 * 257 * @throws IOException if an IO error occurs 258 * @throws NullArgumentException if url is null 259 * @throws ZeroException if URL contains no data 260 */ 261 public void load(URL url) throws IOException, NullArgumentException, ZeroException { 262 MathUtils.checkNotNull(url); 263 Charset charset = Charset.forName(FILE_CHARSET); 264 BufferedReader in = 265 new BufferedReader(new InputStreamReader(url.openStream(), charset)); 266 try { 267 DataAdapter da = new StreamDataAdapter(in); 268 da.computeStats(); 269 if (sampleStats.getN() == 0) { 270 throw new ZeroException(LocalizedFormats.URL_CONTAINS_NO_DATA, url); 271 } 272 // new adapter for the second pass 273 in = new BufferedReader(new InputStreamReader(url.openStream(), charset)); 274 fillBinStats(new StreamDataAdapter(in)); 275 loaded = true; 276 } finally { 277 try { 278 in.close(); 279 } catch (IOException ex) { //NOPMD 280 // ignore 281 } 282 } 283 } 284 285 /** 286 * Computes the empirical distribution from the input file. 287 * 288 * <p>The input file <i>must</i> be an ASCII text file containing one 289 * valid numeric entry per line.</p> 290 * 291 * @param file the input file 292 * @throws IOException if an IO error occurs 293 * @throws NullArgumentException if file is null 294 */ 295 public void load(File file) throws IOException, NullArgumentException { 296 MathUtils.checkNotNull(file); 297 Charset charset = Charset.forName(FILE_CHARSET); 298 InputStream is = new FileInputStream(file); 299 BufferedReader in = new BufferedReader(new InputStreamReader(is, charset)); 300 try { 301 DataAdapter da = new StreamDataAdapter(in); 302 da.computeStats(); 303 // new adapter for second pass 304 is = new FileInputStream(file); 305 in = new BufferedReader(new InputStreamReader(is, charset)); 306 fillBinStats(new StreamDataAdapter(in)); 307 loaded = true; 308 } finally { 309 try { 310 in.close(); 311 } catch (IOException ex) { //NOPMD 312 // ignore 313 } 314 } 315 } 316 317 /** 318 * Provides methods for computing <code>sampleStats</code> and 319 * <code>beanStats</code> abstracting the source of data. 320 */ 321 private abstract class DataAdapter{ 322 323 /** 324 * Compute bin stats. 325 * 326 * @throws IOException if an error occurs computing bin stats 327 */ 328 public abstract void computeBinStats() throws IOException; 329 330 /** 331 * Compute sample statistics. 332 * 333 * @throws IOException if an error occurs computing sample stats 334 */ 335 public abstract void computeStats() throws IOException; 336 337 } 338 339 /** 340 * <code>DataAdapter</code> for data provided through some input stream 341 */ 342 private class StreamDataAdapter extends DataAdapter{ 343 344 /** Input stream providing access to the data */ 345 private BufferedReader inputStream; 346 347 /** 348 * Create a StreamDataAdapter from a BufferedReader 349 * 350 * @param in BufferedReader input stream 351 */ 352 StreamDataAdapter(BufferedReader in){ 353 super(); 354 inputStream = in; 355 } 356 357 /** {@inheritDoc} */ 358 @Override 359 public void computeBinStats() throws IOException { 360 String str = null; 361 double val = 0.0d; 362 while ((str = inputStream.readLine()) != null) { 363 val = Double.parseDouble(str); 364 SummaryStatistics stats = binStats.get(findBin(val)); 365 stats.addValue(val); 366 } 367 368 inputStream.close(); 369 inputStream = null; 370 } 371 372 /** {@inheritDoc} */ 373 @Override 374 public void computeStats() throws IOException { 375 String str = null; 376 double val = 0.0; 377 sampleStats = new SummaryStatistics(); 378 while ((str = inputStream.readLine()) != null) { 379 val = Double.parseDouble(str); 380 sampleStats.addValue(val); 381 } 382 inputStream.close(); 383 inputStream = null; 384 } 385 } 386 387 /** 388 * <code>DataAdapter</code> for data provided as array of doubles. 389 */ 390 private class ArrayDataAdapter extends DataAdapter { 391 392 /** Array of input data values */ 393 private double[] inputArray; 394 395 /** 396 * Construct an ArrayDataAdapter from a double[] array 397 * 398 * @param in double[] array holding the data 399 * @throws NullArgumentException if in is null 400 */ 401 ArrayDataAdapter(double[] in) throws NullArgumentException { 402 super(); 403 MathUtils.checkNotNull(in); 404 inputArray = in; 405 } 406 407 /** {@inheritDoc} */ 408 @Override 409 public void computeStats() throws IOException { 410 sampleStats = new SummaryStatistics(); 411 for (int i = 0; i < inputArray.length; i++) { 412 sampleStats.addValue(inputArray[i]); 413 } 414 } 415 416 /** {@inheritDoc} */ 417 @Override 418 public void computeBinStats() throws IOException { 419 for (int i = 0; i < inputArray.length; i++) { 420 SummaryStatistics stats = 421 binStats.get(findBin(inputArray[i])); 422 stats.addValue(inputArray[i]); 423 } 424 } 425 } 426 427 /** 428 * Fills binStats array (second pass through data file). 429 * 430 * @param da object providing access to the data 431 * @throws IOException if an IO error occurs 432 */ 433 private void fillBinStats(final DataAdapter da) 434 throws IOException { 435 // Set up grid 436 min = sampleStats.getMin(); 437 max = sampleStats.getMax(); 438 delta = (max - min)/((double) binCount); 439 440 // Initialize binStats ArrayList 441 if (!binStats.isEmpty()) { 442 binStats.clear(); 443 } 444 for (int i = 0; i < binCount; i++) { 445 SummaryStatistics stats = new SummaryStatistics(); 446 binStats.add(i,stats); 447 } 448 449 // Filling data in binStats Array 450 da.computeBinStats(); 451 452 // Assign upperBounds based on bin counts 453 upperBounds = new double[binCount]; 454 upperBounds[0] = 455 ((double) binStats.get(0).getN()) / (double) sampleStats.getN(); 456 for (int i = 1; i < binCount-1; i++) { 457 upperBounds[i] = upperBounds[i-1] + 458 ((double) binStats.get(i).getN()) / (double) sampleStats.getN(); 459 } 460 upperBounds[binCount-1] = 1.0d; 461 } 462 463 /** 464 * Returns the index of the bin to which the given value belongs 465 * 466 * @param value the value whose bin we are trying to find 467 * @return the index of the bin containing the value 468 */ 469 private int findBin(double value) { 470 return FastMath.min( 471 FastMath.max((int) FastMath.ceil((value - min) / delta) - 1, 0), 472 binCount - 1); 473 } 474 475 /** 476 * Generates a random value from this distribution. 477 * <strong>Preconditions:</strong><ul> 478 * <li>the distribution must be loaded before invoking this method</li></ul> 479 * @return the random value. 480 * @throws MathIllegalStateException if the distribution has not been loaded 481 */ 482 public double getNextValue() throws MathIllegalStateException { 483 484 if (!loaded) { 485 throw new MathIllegalStateException(LocalizedFormats.DISTRIBUTION_NOT_LOADED); 486 } 487 488 return sample(); 489 } 490 491 /** 492 * Returns a {@link StatisticalSummary} describing this distribution. 493 * <strong>Preconditions:</strong><ul> 494 * <li>the distribution must be loaded before invoking this method</li></ul> 495 * 496 * @return the sample statistics 497 * @throws IllegalStateException if the distribution has not been loaded 498 */ 499 public StatisticalSummary getSampleStats() { 500 return sampleStats; 501 } 502 503 /** 504 * Returns the number of bins. 505 * 506 * @return the number of bins. 507 */ 508 public int getBinCount() { 509 return binCount; 510 } 511 512 /** 513 * Returns a List of {@link SummaryStatistics} instances containing 514 * statistics describing the values in each of the bins. The list is 515 * indexed on the bin number. 516 * 517 * @return List of bin statistics. 518 */ 519 public List<SummaryStatistics> getBinStats() { 520 return binStats; 521 } 522 523 /** 524 * <p>Returns a fresh copy of the array of upper bounds for the bins. 525 * Bins are: <br/> 526 * [min,upperBounds[0]],(upperBounds[0],upperBounds[1]],..., 527 * (upperBounds[binCount-2], upperBounds[binCount-1] = max].</p> 528 * 529 * <p>Note: In versions 1.0-2.0 of commons-math, this method 530 * incorrectly returned the array of probability generator upper 531 * bounds now returned by {@link #getGeneratorUpperBounds()}.</p> 532 * 533 * @return array of bin upper bounds 534 * @since 2.1 535 */ 536 public double[] getUpperBounds() { 537 double[] binUpperBounds = new double[binCount]; 538 for (int i = 0; i < binCount - 1; i++) { 539 binUpperBounds[i] = min + delta * (i + 1); 540 } 541 binUpperBounds[binCount - 1] = max; 542 return binUpperBounds; 543 } 544 545 /** 546 * <p>Returns a fresh copy of the array of upper bounds of the subintervals 547 * of [0,1] used in generating data from the empirical distribution. 548 * Subintervals correspond to bins with lengths proportional to bin counts.</p> 549 * 550 * <strong>Preconditions:</strong><ul> 551 * <li>the distribution must be loaded before invoking this method</li></ul> 552 * 553 * <p>In versions 1.0-2.0 of commons-math, this array was (incorrectly) returned 554 * by {@link #getUpperBounds()}.</p> 555 * 556 * @since 2.1 557 * @return array of upper bounds of subintervals used in data generation 558 * @throws NullPointerException unless a {@code load} method has been 559 * called beforehand. 560 */ 561 public double[] getGeneratorUpperBounds() { 562 int len = upperBounds.length; 563 double[] out = new double[len]; 564 System.arraycopy(upperBounds, 0, out, 0, len); 565 return out; 566 } 567 568 /** 569 * Property indicating whether or not the distribution has been loaded. 570 * 571 * @return true if the distribution has been loaded 572 */ 573 public boolean isLoaded() { 574 return loaded; 575 } 576 577 /** 578 * Reseeds the random number generator used by {@link #getNextValue()}. 579 * 580 * @param seed random generator seed 581 * @since 3.0 582 */ 583 public void reSeed(long seed) { 584 randomData.reSeed(seed); 585 } 586 587 // Distribution methods --------------------------- 588 589 /** 590 * {@inheritDoc} 591 * @since 3.1 592 */ 593 @Override 594 public double probability(double x) { 595 return 0; 596 } 597 598 /** 599 * {@inheritDoc} 600 * 601 * <p>Returns the kernel density normalized so that its integral over each bin 602 * equals the bin mass.</p> 603 * 604 * <p>Algorithm description: <ol> 605 * <li>Find the bin B that x belongs to.</li> 606 * <li>Compute K(B) = the mass of B with respect to the within-bin kernel (i.e., the 607 * integral of the kernel density over B).</li> 608 * <li>Return k(x) * P(B) / K(B), where k is the within-bin kernel density 609 * and P(B) is the mass of B.</li></ol></p> 610 * @since 3.1 611 */ 612 public double density(double x) { 613 if (x < min || x > max) { 614 return 0d; 615 } 616 final int binIndex = findBin(x); 617 final RealDistribution kernel = getKernel(binStats.get(binIndex)); 618 return kernel.density(x) * pB(binIndex) / kB(binIndex); 619 } 620 621 /** 622 * {@inheritDoc} 623 * 624 * <p>Algorithm description:<ol> 625 * <li>Find the bin B that x belongs to.</li> 626 * <li>Compute P(B) = the mass of B and P(B-) = the combined mass of the bins below B.</li> 627 * <li>Compute K(B) = the probability mass of B with respect to the within-bin kernel 628 * and K(B-) = the kernel distribution evaluated at the lower endpoint of B</li> 629 * <li>Return P(B-) + P(B) * [K(x) - K(B-)] / K(B) where 630 * K(x) is the within-bin kernel distribution function evaluated at x.</li></ol> 631 * If K is a constant distribution, we return P(B-) + P(B) (counting the full 632 * mass of B).</p> 633 * 634 * @since 3.1 635 */ 636 public double cumulativeProbability(double x) { 637 if (x < min) { 638 return 0d; 639 } else if (x >= max) { 640 return 1d; 641 } 642 final int binIndex = findBin(x); 643 final double pBminus = pBminus(binIndex); 644 final double pB = pB(binIndex); 645 final RealDistribution kernel = k(x); 646 if (kernel instanceof ConstantRealDistribution) { 647 if (x < kernel.getNumericalMean()) { 648 return pBminus; 649 } else { 650 return pBminus + pB; 651 } 652 } 653 final double[] binBounds = getUpperBounds(); 654 final double kB = kB(binIndex); 655 final double lower = binIndex == 0 ? min : binBounds[binIndex - 1]; 656 final double withinBinCum = 657 (kernel.cumulativeProbability(x) - kernel.cumulativeProbability(lower)) / kB; 658 return pBminus + pB * withinBinCum; 659 } 660 661 /** 662 * {@inheritDoc} 663 * 664 * <p>Algorithm description:<ol> 665 * <li>Find the smallest i such that the sum of the masses of the bins 666 * through i is at least p.</li> 667 * <li> 668 * Let K be the within-bin kernel distribution for bin i.</br> 669 * Let K(B) be the mass of B under K. <br/> 670 * Let K(B-) be K evaluated at the lower endpoint of B (the combined 671 * mass of the bins below B under K).<br/> 672 * Let P(B) be the probability of bin i.<br/> 673 * Let P(B-) be the sum of the bin masses below bin i. <br/> 674 * Let pCrit = p - P(B-)<br/> 675 * <li>Return the inverse of K evaluated at <br/> 676 * K(B-) + pCrit * K(B) / P(B) </li> 677 * </ol></p> 678 * 679 * @since 3.1 680 */ 681 @Override 682 public double inverseCumulativeProbability(final double p) throws OutOfRangeException { 683 if (p < 0.0 || p > 1.0) { 684 throw new OutOfRangeException(p, 0, 1); 685 } 686 687 if (p == 0.0) { 688 return getSupportLowerBound(); 689 } 690 691 if (p == 1.0) { 692 return getSupportUpperBound(); 693 } 694 695 int i = 0; 696 while (cumBinP(i) < p) { 697 i++; 698 } 699 700 final RealDistribution kernel = getKernel(binStats.get(i)); 701 final double kB = kB(i); 702 final double[] binBounds = getUpperBounds(); 703 final double lower = i == 0 ? min : binBounds[i - 1]; 704 final double kBminus = kernel.cumulativeProbability(lower); 705 final double pB = pB(i); 706 final double pBminus = pBminus(i); 707 final double pCrit = p - pBminus; 708 if (pCrit <= 0) { 709 return lower; 710 } 711 return kernel.inverseCumulativeProbability(kBminus + pCrit * kB / pB); 712 } 713 714 /** 715 * {@inheritDoc} 716 * @since 3.1 717 */ 718 public double getNumericalMean() { 719 return sampleStats.getMean(); 720 } 721 722 /** 723 * {@inheritDoc} 724 * @since 3.1 725 */ 726 public double getNumericalVariance() { 727 return sampleStats.getVariance(); 728 } 729 730 /** 731 * {@inheritDoc} 732 * @since 3.1 733 */ 734 public double getSupportLowerBound() { 735 return min; 736 } 737 738 /** 739 * {@inheritDoc} 740 * @since 3.1 741 */ 742 public double getSupportUpperBound() { 743 return max; 744 } 745 746 /** 747 * {@inheritDoc} 748 * @since 3.1 749 */ 750 public boolean isSupportLowerBoundInclusive() { 751 return true; 752 } 753 754 /** 755 * {@inheritDoc} 756 * @since 3.1 757 */ 758 public boolean isSupportUpperBoundInclusive() { 759 return true; 760 } 761 762 /** 763 * {@inheritDoc} 764 * @since 3.1 765 */ 766 public boolean isSupportConnected() { 767 return true; 768 } 769 770 /** 771 * {@inheritDoc} 772 * @since 3.1 773 */ 774 @Override 775 public void reseedRandomGenerator(long seed) { 776 randomData.reSeed(seed); 777 } 778 779 /** 780 * The probability of bin i. 781 * 782 * @param i the index of the bin 783 * @return the probability that selection begins in bin i 784 */ 785 private double pB(int i) { 786 return i == 0 ? upperBounds[0] : 787 upperBounds[i] - upperBounds[i - 1]; 788 } 789 790 /** 791 * The combined probability of the bins up to but not including bin i. 792 * 793 * @param i the index of the bin 794 * @return the probability that selection begins in a bin below bin i. 795 */ 796 private double pBminus(int i) { 797 return i == 0 ? 0 : upperBounds[i - 1]; 798 } 799 800 /** 801 * Mass of bin i under the within-bin kernel of the bin. 802 * 803 * @param i index of the bin 804 * @return the difference in the within-bin kernel cdf between the 805 * upper and lower endpoints of bin i 806 */ 807 @SuppressWarnings("deprecation") 808 private double kB(int i) { 809 final double[] binBounds = getUpperBounds(); 810 final RealDistribution kernel = getKernel(binStats.get(i)); 811 return i == 0 ? kernel.cumulativeProbability(min, binBounds[0]) : 812 kernel.cumulativeProbability(binBounds[i - 1], binBounds[i]); 813 } 814 815 /** 816 * The within-bin kernel of the bin that x belongs to. 817 * 818 * @param x the value to locate within a bin 819 * @return the within-bin kernel of the bin containing x 820 */ 821 private RealDistribution k(double x) { 822 final int binIndex = findBin(x); 823 return getKernel(binStats.get(binIndex)); 824 } 825 826 /** 827 * The combined probability of the bins up to and including binIndex. 828 * 829 * @param binIndex maximum bin index 830 * @return sum of the probabilities of bins through binIndex 831 */ 832 private double cumBinP(int binIndex) { 833 return upperBounds[binIndex]; 834 } 835 836 /** 837 * The within-bin smoothing kernel. Returns a Gaussian distribution 838 * parameterized by {@code bStats}, unless the bin contains only one 839 * observation, in which case a constant distribution is returned. 840 * 841 * @param bStats summary statistics for the bin 842 * @return within-bin kernel parameterized by bStats 843 */ 844 protected RealDistribution getKernel(SummaryStatistics bStats) { 845 if (bStats.getN() == 1 || bStats.getVariance() == 0) { 846 return new ConstantRealDistribution(bStats.getMean()); 847 } else { 848 return new NormalDistribution(randomData.getRandomGenerator(), 849 bStats.getMean(), bStats.getStandardDeviation(), 850 NormalDistribution.DEFAULT_INVERSE_ABSOLUTE_ACCURACY); 851 } 852 } 853}