001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.math4.legacy.stat; 018 019import java.util.List; 020 021import org.apache.commons.math4.legacy.exception.DimensionMismatchException; 022import org.apache.commons.math4.legacy.exception.MathIllegalArgumentException; 023import org.apache.commons.math4.legacy.exception.NoDataException; 024import org.apache.commons.math4.legacy.exception.NotPositiveException; 025import org.apache.commons.math4.legacy.exception.NullArgumentException; 026import org.apache.commons.math4.legacy.exception.NumberIsTooSmallException; 027import org.apache.commons.math4.legacy.exception.util.LocalizedFormats; 028import org.apache.commons.math4.legacy.stat.descriptive.DescriptiveStatistics; 029import org.apache.commons.math4.legacy.stat.descriptive.UnivariateStatistic; 030import org.apache.commons.math4.legacy.stat.descriptive.moment.GeometricMean; 031import org.apache.commons.math4.legacy.stat.descriptive.moment.Mean; 032import org.apache.commons.math4.legacy.stat.descriptive.moment.Variance; 033import org.apache.commons.math4.legacy.stat.descriptive.rank.Max; 034import org.apache.commons.math4.legacy.stat.descriptive.rank.Min; 035import org.apache.commons.math4.legacy.stat.descriptive.rank.Percentile; 036import org.apache.commons.math4.legacy.stat.descriptive.summary.Product; 037import org.apache.commons.math4.legacy.stat.descriptive.summary.Sum; 038import org.apache.commons.math4.legacy.stat.descriptive.summary.SumOfLogs; 039import org.apache.commons.math4.legacy.stat.descriptive.summary.SumOfSquares; 040 041/** 042 * StatUtils provides static methods for computing statistics based on data 043 * stored in double[] arrays. 044 */ 045public final class StatUtils { 046 047 /** sum. */ 048 private static final UnivariateStatistic SUM = new Sum(); 049 050 /** sumSq. */ 051 private static final UnivariateStatistic SUM_OF_SQUARES = new SumOfSquares(); 052 053 /** prod. */ 054 private static final UnivariateStatistic PRODUCT = new Product(); 055 056 /** sumLog. */ 057 private static final UnivariateStatistic SUM_OF_LOGS = new SumOfLogs(); 058 059 /** min. */ 060 private static final UnivariateStatistic MIN = new Min(); 061 062 /** max. */ 063 private static final UnivariateStatistic MAX = new Max(); 064 065 /** mean. */ 066 private static final UnivariateStatistic MEAN = new Mean(); 067 068 /** variance. */ 069 private static final Variance VARIANCE = new Variance(); 070 071 /** percentile. */ 072 private static final Percentile PERCENTILE = new Percentile(); 073 074 /** geometric mean. */ 075 private static final GeometricMean GEOMETRIC_MEAN = new GeometricMean(); 076 077 /** 078 * Private Constructor. 079 */ 080 private StatUtils() { 081 } 082 083 /** 084 * Returns the sum of the values in the input array, or 085 * <code>Double.NaN</code> if the array is empty. 086 * <p> 087 * Throws <code>IllegalArgumentException</code> if the input array is null. 088 * 089 * @param values array of values to sum 090 * @return the sum of the values or <code>Double.NaN</code> if the array is empty 091 * @throws MathIllegalArgumentException if the array is null 092 */ 093 public static double sum(final double[] values) throws MathIllegalArgumentException { 094 return SUM.evaluate(values); 095 } 096 097 /** 098 * Returns the sum of the entries in the specified portion of 099 * the input array, or <code>Double.NaN</code> if the designated subarray is empty. 100 * <p> 101 * Throws <code>IllegalArgumentException</code> if the array is null. 102 * 103 * @param values the input array 104 * @param begin index of the first array element to include 105 * @param length the number of elements to include 106 * @return the sum of the values or Double.NaN if length = 0 107 * @throws MathIllegalArgumentException if the array is null or the array index 108 * parameters are not valid 109 */ 110 public static double sum(final double[] values, final int begin, final int length) 111 throws MathIllegalArgumentException { 112 return SUM.evaluate(values, begin, length); 113 } 114 115 /** 116 * Returns the sum of the squares of the entries in the input array, or 117 * <code>Double.NaN</code> if the array is empty. 118 * <p> 119 * Throws <code>IllegalArgumentException</code> if the array is null. 120 * 121 * @param values input array 122 * @return the sum of the squared values or <code>Double.NaN</code> if the array is empty 123 * @throws MathIllegalArgumentException if the array is null 124 */ 125 public static double sumSq(final double[] values) throws MathIllegalArgumentException { 126 return SUM_OF_SQUARES.evaluate(values); 127 } 128 129 /** 130 * Returns the sum of the squares of the entries in the specified portion of 131 * the input array, or <code>Double.NaN</code> if the designated subarray 132 * is empty. 133 * <p> 134 * Throws <code>IllegalArgumentException</code> if the array is null. 135 * 136 * @param values the input array 137 * @param begin index of the first array element to include 138 * @param length the number of elements to include 139 * @return the sum of the squares of the values or Double.NaN if length = 0 140 * @throws MathIllegalArgumentException if the array is null or the array index 141 * parameters are not valid 142 */ 143 public static double sumSq(final double[] values, final int begin, final int length) 144 throws MathIllegalArgumentException { 145 return SUM_OF_SQUARES.evaluate(values, begin, length); 146 } 147 148 /** 149 * Returns the product of the entries in the input array, or 150 * <code>Double.NaN</code> if the array is empty. 151 * <p> 152 * Throws <code>IllegalArgumentException</code> if the array is null. 153 * 154 * @param values the input array 155 * @return the product of the values or Double.NaN if the array is empty 156 * @throws MathIllegalArgumentException if the array is null 157 */ 158 public static double product(final double[] values) throws MathIllegalArgumentException { 159 return PRODUCT.evaluate(values); 160 } 161 162 /** 163 * Returns the product of the entries in the specified portion of 164 * the input array, or <code>Double.NaN</code> if the designated subarray 165 * is empty. 166 * <p> 167 * Throws <code>IllegalArgumentException</code> if the array is null. 168 * 169 * @param values the input array 170 * @param begin index of the first array element to include 171 * @param length the number of elements to include 172 * @return the product of the values or Double.NaN if length = 0 173 * @throws MathIllegalArgumentException if the array is null or the array index 174 * parameters are not valid 175 */ 176 public static double product(final double[] values, final int begin, final int length) 177 throws MathIllegalArgumentException { 178 return PRODUCT.evaluate(values, begin, length); 179 } 180 181 /** 182 * Returns the sum of the natural logs of the entries in the input array, or 183 * <code>Double.NaN</code> if the array is empty. 184 * <p> 185 * Throws <code>IllegalArgumentException</code> if the array is null. 186 * <p> 187 * See {@link org.apache.commons.math4.legacy.stat.descriptive.summary.SumOfLogs}. 188 * 189 * @param values the input array 190 * @return the sum of the natural logs of the values or Double.NaN if the array is empty 191 * @throws MathIllegalArgumentException if the array is null 192 */ 193 public static double sumLog(final double[] values) throws MathIllegalArgumentException { 194 return SUM_OF_LOGS.evaluate(values); 195 } 196 197 /** 198 * Returns the sum of the natural logs of the entries in the specified portion of 199 * the input array, or <code>Double.NaN</code> if the designated subarray is empty. 200 * <p> 201 * Throws <code>IllegalArgumentException</code> if the array is null. 202 * <p> 203 * See {@link org.apache.commons.math4.legacy.stat.descriptive.summary.SumOfLogs}. 204 * 205 * @param values the input array 206 * @param begin index of the first array element to include 207 * @param length the number of elements to include 208 * @return the sum of the natural logs of the values or Double.NaN if 209 * length = 0 210 * @throws MathIllegalArgumentException if the array is null or the array index 211 * parameters are not valid 212 */ 213 public static double sumLog(final double[] values, final int begin, final int length) 214 throws MathIllegalArgumentException { 215 return SUM_OF_LOGS.evaluate(values, begin, length); 216 } 217 218 /** 219 * Returns the arithmetic mean of the entries in the input array, or 220 * <code>Double.NaN</code> if the array is empty. 221 * <p> 222 * Throws <code>IllegalArgumentException</code> if the array is null. 223 * <p> 224 * See {@link org.apache.commons.math4.legacy.stat.descriptive.moment.Mean} for 225 * details on the computing algorithm. 226 * 227 * @param values the input array 228 * @return the mean of the values or Double.NaN if the array is empty 229 * @throws MathIllegalArgumentException if the array is null 230 */ 231 public static double mean(final double[] values) throws MathIllegalArgumentException { 232 return MEAN.evaluate(values); 233 } 234 235 /** 236 * Returns the arithmetic mean of the entries in the specified portion of 237 * the input array, or <code>Double.NaN</code> if the designated subarray 238 * is empty. 239 * <p> 240 * Throws <code>IllegalArgumentException</code> if the array is null. 241 * <p> 242 * See {@link org.apache.commons.math4.legacy.stat.descriptive.moment.Mean Mean} for 243 * details on the computing algorithm. 244 * 245 * @param values the input array 246 * @param begin index of the first array element to include 247 * @param length the number of elements to include 248 * @return the mean of the values or Double.NaN if length = 0 249 * @throws MathIllegalArgumentException if the array is null or the array index 250 * parameters are not valid 251 */ 252 public static double mean(final double[] values, final int begin, final int length) 253 throws MathIllegalArgumentException { 254 return MEAN.evaluate(values, begin, length); 255 } 256 257 /** 258 * Returns the geometric mean of the entries in the input array, or 259 * <code>Double.NaN</code> if the array is empty. 260 * <p> 261 * Throws <code>IllegalArgumentException</code> if the array is null. 262 * <p> 263 * See {@link org.apache.commons.math4.legacy.stat.descriptive.moment.GeometricMean GeometricMean} 264 * for details on the computing algorithm. 265 * 266 * @param values the input array 267 * @return the geometric mean of the values or Double.NaN if the array is empty 268 * @throws MathIllegalArgumentException if the array is null 269 */ 270 public static double geometricMean(final double[] values) throws MathIllegalArgumentException { 271 return GEOMETRIC_MEAN.evaluate(values); 272 } 273 274 /** 275 * Returns the geometric mean of the entries in the specified portion of 276 * the input array, or <code>Double.NaN</code> if the designated subarray 277 * is empty. 278 * <p> 279 * Throws <code>IllegalArgumentException</code> if the array is null. 280 * <p> 281 * See {@link org.apache.commons.math4.legacy.stat.descriptive.moment.GeometricMean GeometricMean} 282 * for details on the computing algorithm. 283 * 284 * @param values the input array 285 * @param begin index of the first array element to include 286 * @param length the number of elements to include 287 * @return the geometric mean of the values or Double.NaN if length = 0 288 * @throws MathIllegalArgumentException if the array is null or the array index 289 * parameters are not valid 290 */ 291 public static double geometricMean(final double[] values, final int begin, final int length) 292 throws MathIllegalArgumentException { 293 return GEOMETRIC_MEAN.evaluate(values, begin, length); 294 } 295 296 /** 297 * Returns the variance of the entries in the input array, or 298 * <code>Double.NaN</code> if the array is empty. 299 * <p> 300 * This method returns the bias-corrected sample variance (using {@code n - 1} in 301 * the denominator). Use {@link #populationVariance(double[])} for the non-bias-corrected 302 * population variance. 303 * <p> 304 * See {@link org.apache.commons.math4.legacy.stat.descriptive.moment.Variance Variance} for 305 * details on the computing algorithm. 306 * <p> 307 * Returns 0 for a single-value (i.e. length = 1) sample. 308 * <p> 309 * Throws <code>MathIllegalArgumentException</code> if the array is null. 310 * 311 * @param values the input array 312 * @return the variance of the values or Double.NaN if the array is empty 313 * @throws MathIllegalArgumentException if the array is null 314 */ 315 public static double variance(final double[] values) throws MathIllegalArgumentException { 316 return VARIANCE.evaluate(values); 317 } 318 319 /** 320 * Returns the variance of the entries in the specified portion of 321 * the input array, or <code>Double.NaN</code> if the designated subarray 322 * is empty. 323 * <p> 324 * This method returns the bias-corrected sample variance (using {@code n - 1} in 325 * the denominator). Use {@link #populationVariance(double[], int, int)} for the non-bias-corrected 326 * population variance. 327 * <p> 328 * See {@link org.apache.commons.math4.legacy.stat.descriptive.moment.Variance Variance} for 329 * details on the computing algorithm. 330 * <p> 331 * Returns 0 for a single-value (i.e. length = 1) sample. 332 * <p> 333 * Throws <code>MathIllegalArgumentException</code> if the array is null or the 334 * array index parameters are not valid. 335 * 336 * @param values the input array 337 * @param begin index of the first array element to include 338 * @param length the number of elements to include 339 * @return the variance of the values or Double.NaN if length = 0 340 * @throws MathIllegalArgumentException if the array is null or the array index 341 * parameters are not valid 342 */ 343 public static double variance(final double[] values, final int begin, final int length) 344 throws MathIllegalArgumentException { 345 return VARIANCE.evaluate(values, begin, length); 346 } 347 348 /** 349 * Returns the variance of the entries in the specified portion of 350 * the input array, using the precomputed mean value. Returns 351 * <code>Double.NaN</code> if the designated subarray is empty. 352 * <p> 353 * This method returns the bias-corrected sample variance (using {@code n - 1} in 354 * the denominator). Use {@link #populationVariance(double[], double, int, int)} for 355 * the non-bias-corrected population variance. 356 * <p> 357 * See {@link org.apache.commons.math4.legacy.stat.descriptive.moment.Variance Variance} for 358 * details on the computing algorithm. 359 * <p> 360 * The formula used assumes that the supplied mean value is the arithmetic 361 * mean of the sample data, not a known population parameter. This method 362 * is supplied only to save computation when the mean has already been 363 * computed. 364 * <p> 365 * Returns 0 for a single-value (i.e. length = 1) sample. 366 * <p> 367 * Throws <code>MathIllegalArgumentException</code> if the array is null or the 368 * array index parameters are not valid. 369 * 370 * @param values the input array 371 * @param mean the precomputed mean value 372 * @param begin index of the first array element to include 373 * @param length the number of elements to include 374 * @return the variance of the values or Double.NaN if length = 0 375 * @throws MathIllegalArgumentException if the array is null or the array index 376 * parameters are not valid 377 */ 378 public static double variance(final double[] values, final double mean, final int begin, final int length) 379 throws MathIllegalArgumentException { 380 return VARIANCE.evaluate(values, mean, begin, length); 381 } 382 383 /** 384 * Returns the variance of the entries in the input array, using the 385 * precomputed mean value. Returns <code>Double.NaN</code> if the array 386 * is empty. 387 * <p> 388 * This method returns the bias-corrected sample variance (using {@code n - 1} in 389 * the denominator). Use {@link #populationVariance(double[], double)} for the 390 * non-bias-corrected population variance. 391 * <p> 392 * See {@link org.apache.commons.math4.legacy.stat.descriptive.moment.Variance Variance} for 393 * details on the computing algorithm. 394 * <p> 395 * The formula used assumes that the supplied mean value is the arithmetic 396 * mean of the sample data, not a known population parameter. This method 397 * is supplied only to save computation when the mean has already been 398 * computed. 399 * <p> 400 * Returns 0 for a single-value (i.e. length = 1) sample. 401 * <p> 402 * Throws <code>MathIllegalArgumentException</code> if the array is null. 403 * 404 * @param values the input array 405 * @param mean the precomputed mean value 406 * @return the variance of the values or Double.NaN if the array is empty 407 * @throws MathIllegalArgumentException if the array is null 408 */ 409 public static double variance(final double[] values, final double mean) throws MathIllegalArgumentException { 410 return VARIANCE.evaluate(values, mean); 411 } 412 413 /** 414 * Returns the <a href="http://en.wikibooks.org/wiki/Statistics/Summary/Variance"> 415 * population variance</a> of the entries in the input array, or 416 * <code>Double.NaN</code> if the array is empty. 417 * <p> 418 * See {@link org.apache.commons.math4.legacy.stat.descriptive.moment.Variance Variance} for 419 * details on the formula and computing algorithm. 420 * <p> 421 * Returns 0 for a single-value (i.e. length = 1) sample. 422 * <p> 423 * Throws <code>MathIllegalArgumentException</code> if the array is null. 424 * 425 * @param values the input array 426 * @return the population variance of the values or Double.NaN if the array is empty 427 * @throws MathIllegalArgumentException if the array is null 428 */ 429 public static double populationVariance(final double[] values) throws MathIllegalArgumentException { 430 return new Variance(false).evaluate(values); 431 } 432 433 /** 434 * Returns the <a href="http://en.wikibooks.org/wiki/Statistics/Summary/Variance"> 435 * population variance</a> of the entries in the specified portion of 436 * the input array, or <code>Double.NaN</code> if the designated subarray 437 * is empty. 438 * <p> 439 * See {@link org.apache.commons.math4.legacy.stat.descriptive.moment.Variance Variance} for 440 * details on the computing algorithm. 441 * <p> 442 * Returns 0 for a single-value (i.e. length = 1) sample. 443 * <p> 444 * Throws <code>MathIllegalArgumentException</code> if the array is null or the 445 * array index parameters are not valid. 446 * 447 * @param values the input array 448 * @param begin index of the first array element to include 449 * @param length the number of elements to include 450 * @return the population variance of the values or Double.NaN if length = 0 451 * @throws MathIllegalArgumentException if the array is null or the array index 452 * parameters are not valid 453 */ 454 public static double populationVariance(final double[] values, final int begin, final int length) 455 throws MathIllegalArgumentException { 456 return new Variance(false).evaluate(values, begin, length); 457 } 458 459 /** 460 * Returns the <a href="http://en.wikibooks.org/wiki/Statistics/Summary/Variance"> 461 * population variance</a> of the entries in the specified portion of 462 * the input array, using the precomputed mean value. Returns 463 * <code>Double.NaN</code> if the designated subarray is empty. 464 * <p> 465 * See {@link org.apache.commons.math4.legacy.stat.descriptive.moment.Variance Variance} for 466 * details on the computing algorithm. 467 * <p> 468 * The formula used assumes that the supplied mean value is the arithmetic 469 * mean of the sample data, not a known population parameter. This method 470 * is supplied only to save computation when the mean has already been 471 * computed. 472 * <p> 473 * Returns 0 for a single-value (i.e. length = 1) sample. 474 * <p> 475 * Throws <code>MathIllegalArgumentException</code> if the array is null or the 476 * array index parameters are not valid. 477 * 478 * @param values the input array 479 * @param mean the precomputed mean value 480 * @param begin index of the first array element to include 481 * @param length the number of elements to include 482 * @return the population variance of the values or Double.NaN if length = 0 483 * @throws MathIllegalArgumentException if the array is null or the array index 484 * parameters are not valid 485 */ 486 public static double populationVariance(final double[] values, final double mean, 487 final int begin, final int length) 488 throws MathIllegalArgumentException { 489 return new Variance(false).evaluate(values, mean, begin, length); 490 } 491 492 /** 493 * Returns the <a href="http://en.wikibooks.org/wiki/Statistics/Summary/Variance"> 494 * population variance</a> of the entries in the input array, using the precomputed 495 * mean value. Returns <code>Double.NaN</code> if the array is empty. 496 * <p> 497 * See {@link org.apache.commons.math4.legacy.stat.descriptive.moment.Variance Variance} for 498 * details on the computing algorithm. 499 * <p> 500 * The formula used assumes that the supplied mean value is the arithmetic 501 * mean of the sample data, not a known population parameter. This method is 502 * supplied only to save computation when the mean has already been computed. 503 * <p> 504 * Returns 0 for a single-value (i.e. length = 1) sample. 505 * <p> 506 * Throws <code>MathIllegalArgumentException</code> if the array is null. 507 * 508 * @param values the input array 509 * @param mean the precomputed mean value 510 * @return the population variance of the values or Double.NaN if the array is empty 511 * @throws MathIllegalArgumentException if the array is null 512 */ 513 public static double populationVariance(final double[] values, final double mean) 514 throws MathIllegalArgumentException { 515 return new Variance(false).evaluate(values, mean); 516 } 517 518 /** 519 * Returns the maximum of the entries in the input array, or 520 * <code>Double.NaN</code> if the array is empty. 521 * <p> 522 * Throws <code>MathIllegalArgumentException</code> if the array is null. 523 * <ul> 524 * <li>The result is <code>NaN</code> iff all values are <code>NaN</code> 525 * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li> 526 * <li>If any of the values equals <code>Double.POSITIVE_INFINITY</code>, 527 * the result is <code>Double.POSITIVE_INFINITY.</code></li> 528 * </ul> 529 * 530 * @param values the input array 531 * @return the maximum of the values or Double.NaN if the array is empty 532 * @throws MathIllegalArgumentException if the array is null 533 */ 534 public static double max(final double[] values) throws MathIllegalArgumentException { 535 return MAX.evaluate(values); 536 } 537 538 /** 539 * Returns the maximum of the entries in the specified portion of the input array, 540 * or <code>Double.NaN</code> if the designated subarray is empty. 541 * <p> 542 * Throws <code>MathIllegalArgumentException</code> if the array is null or 543 * the array index parameters are not valid. 544 * <ul> 545 * <li>The result is <code>NaN</code> iff all values are <code>NaN</code> 546 * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li> 547 * <li>If any of the values equals <code>Double.POSITIVE_INFINITY</code>, 548 * the result is <code>Double.POSITIVE_INFINITY.</code></li> 549 * </ul> 550 * 551 * @param values the input array 552 * @param begin index of the first array element to include 553 * @param length the number of elements to include 554 * @return the maximum of the values or Double.NaN if length = 0 555 * @throws MathIllegalArgumentException if the array is null or the array index 556 * parameters are not valid 557 */ 558 public static double max(final double[] values, final int begin, final int length) 559 throws MathIllegalArgumentException { 560 return MAX.evaluate(values, begin, length); 561 } 562 563 /** 564 * Returns the minimum of the entries in the input array, or 565 * <code>Double.NaN</code> if the array is empty. 566 * <p> 567 * Throws <code>MathIllegalArgumentException</code> if the array is null. 568 * <ul> 569 * <li>The result is <code>NaN</code> iff all values are <code>NaN</code> 570 * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li> 571 * <li>If any of the values equals <code>Double.NEGATIVE_INFINITY</code>, 572 * the result is <code>Double.NEGATIVE_INFINITY.</code></li> 573 * </ul> 574 * 575 * @param values the input array 576 * @return the minimum of the values or Double.NaN if the array is empty 577 * @throws MathIllegalArgumentException if the array is null 578 */ 579 public static double min(final double[] values) throws MathIllegalArgumentException { 580 return MIN.evaluate(values); 581 } 582 583 /** 584 * Returns the minimum of the entries in the specified portion of the input array, 585 * or <code>Double.NaN</code> if the designated subarray is empty. 586 * <p> 587 * Throws <code>MathIllegalArgumentException</code> if the array is null or 588 * the array index parameters are not valid. 589 * <ul> 590 * <li>The result is <code>NaN</code> iff all values are <code>NaN</code> 591 * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li> 592 * <li>If any of the values equals <code>Double.NEGATIVE_INFINITY</code>, 593 * the result is <code>Double.NEGATIVE_INFINITY.</code></li> 594 * </ul> 595 * 596 * @param values the input array 597 * @param begin index of the first array element to include 598 * @param length the number of elements to include 599 * @return the minimum of the values or Double.NaN if length = 0 600 * @throws MathIllegalArgumentException if the array is null or the array index 601 * parameters are not valid 602 */ 603 public static double min(final double[] values, final int begin, final int length) 604 throws MathIllegalArgumentException { 605 return MIN.evaluate(values, begin, length); 606 } 607 608 /** 609 * Returns an estimate of the <code>p</code>th percentile of the values 610 * in the <code>values</code> array. 611 * <ul> 612 * <li>Returns <code>Double.NaN</code> if <code>values</code> has length 613 * <code>0</code></li> 614 * <li>Returns (for any value of <code>p</code>) <code>values[0]</code> 615 * if <code>values</code> has length <code>1</code></li> 616 * <li>Throws <code>IllegalArgumentException</code> if <code>values</code> 617 * is null or p is not a valid quantile value (p must be greater than 0 618 * and less than or equal to 100)</li> 619 * </ul> 620 * <p> 621 * See {@link org.apache.commons.math4.legacy.stat.descriptive.rank.Percentile Percentile} 622 * for a description of the percentile estimation algorithm used. 623 * 624 * @param values input array of values 625 * @param p the percentile value to compute 626 * @return the percentile value or Double.NaN if the array is empty 627 * @throws MathIllegalArgumentException if <code>values</code> is null or p is invalid 628 */ 629 public static double percentile(final double[] values, final double p) throws MathIllegalArgumentException { 630 return PERCENTILE.evaluate(values,p); 631 } 632 633 /** 634 * Returns an estimate of the <code>p</code>th percentile of the values 635 * in the <code>values</code> array, starting with the element in (0-based) 636 * position <code>begin</code> in the array and including <code>length</code> 637 * values. 638 * <ul> 639 * <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li> 640 * <li>Returns (for any value of <code>p</code>) <code>values[begin]</code> 641 * if <code>length = 1 </code></li> 642 * <li>Throws <code>MathIllegalArgumentException</code> if <code>values</code> 643 * is null, <code>begin</code> or <code>length</code> is invalid, or 644 * <code>p</code> is not a valid quantile value (p must be greater than 0 645 * and less than or equal to 100)</li> 646 * </ul> 647 * <p> 648 * See {@link org.apache.commons.math4.legacy.stat.descriptive.rank.Percentile Percentile} 649 * for a description of the percentile estimation algorithm used. 650 * 651 * @param values array of input values 652 * @param p the percentile to compute 653 * @param begin the first (0-based) element to include in the computation 654 * @param length the number of array elements to include 655 * @return the percentile value 656 * @throws MathIllegalArgumentException if the parameters are not valid or the input array is null 657 */ 658 public static double percentile(final double[] values, final int begin, final int length, final double p) 659 throws MathIllegalArgumentException { 660 return PERCENTILE.evaluate(values, begin, length, p); 661 } 662 663 /** 664 * Returns the sum of the (signed) differences between corresponding elements of the 665 * input arrays -- i.e., sum(sample1[i] - sample2[i]). 666 * 667 * @param sample1 the first array 668 * @param sample2 the second array 669 * @return sum of paired differences 670 * @throws DimensionMismatchException if the arrays do not have the same (positive) length. 671 * @throws NoDataException if the sample arrays are empty. 672 */ 673 public static double sumDifference(final double[] sample1, final double[] sample2) 674 throws DimensionMismatchException, NoDataException { 675 676 int n = sample1.length; 677 if (n != sample2.length) { 678 throw new DimensionMismatchException(n, sample2.length); 679 } 680 if (n <= 0) { 681 throw new NoDataException(LocalizedFormats.INSUFFICIENT_DIMENSION); 682 } 683 double result = 0; 684 for (int i = 0; i < n; i++) { 685 result += sample1[i] - sample2[i]; 686 } 687 return result; 688 } 689 690 /** 691 * Returns the mean of the (signed) differences between corresponding elements of the 692 * input arrays -- i.e., sum(sample1[i] - sample2[i]) / sample1.length. 693 * 694 * @param sample1 the first array 695 * @param sample2 the second array 696 * @return mean of paired differences 697 * @throws DimensionMismatchException if the arrays do not have the same (positive) length. 698 * @throws NoDataException if the sample arrays are empty. 699 */ 700 public static double meanDifference(final double[] sample1, final double[] sample2) 701 throws DimensionMismatchException, NoDataException { 702 return sumDifference(sample1, sample2) / sample1.length; 703 } 704 705 /** 706 * Returns the variance of the (signed) differences between corresponding elements of the 707 * input arrays -- i.e., var(sample1[i] - sample2[i]). 708 * 709 * @param sample1 the first array 710 * @param sample2 the second array 711 * @param meanDifference the mean difference between corresponding entries 712 * @return variance of paired differences 713 * @throws DimensionMismatchException if the arrays do not have the same length. 714 * @throws NumberIsTooSmallException if the arrays length is less than 2. 715 * @see #meanDifference(double[],double[]) 716 */ 717 public static double varianceDifference(final double[] sample1, final double[] sample2, double meanDifference) 718 throws DimensionMismatchException, NumberIsTooSmallException { 719 720 double sum1 = 0d; 721 double sum2 = 0d; 722 double diff = 0d; 723 int n = sample1.length; 724 if (n != sample2.length) { 725 throw new DimensionMismatchException(n, sample2.length); 726 } 727 if (n < 2) { 728 throw new NumberIsTooSmallException(n, 2, true); 729 } 730 for (int i = 0; i < n; i++) { 731 diff = sample1[i] - sample2[i]; 732 sum1 += (diff - meanDifference) *(diff - meanDifference); 733 sum2 += diff - meanDifference; 734 } 735 return (sum1 - (sum2 * sum2 / n)) / (n - 1); 736 } 737 738 /** 739 * Normalize (standardize) the sample, so it is has a mean of 0 and a standard deviation of 1. 740 * 741 * @param sample Sample to normalize. 742 * @return normalized (standardized) sample. 743 * @since 2.2 744 */ 745 public static double[] normalize(final double[] sample) { 746 DescriptiveStatistics stats = new DescriptiveStatistics(); 747 748 // Add the data from the series to stats 749 for (int i = 0; i < sample.length; i++) { 750 stats.addValue(sample[i]); 751 } 752 753 // Compute mean and standard deviation 754 double mean = stats.getMean(); 755 double standardDeviation = stats.getStandardDeviation(); 756 757 // initialize the standardizedSample, which has the same length as the sample 758 double[] standardizedSample = new double[sample.length]; 759 760 for (int i = 0; i < sample.length; i++) { 761 // z = (x- mean)/standardDeviation 762 standardizedSample[i] = (sample[i] - mean) / standardDeviation; 763 } 764 return standardizedSample; 765 } 766 767 /** 768 * Returns the sample mode(s). 769 * <p> 770 * The mode is the most frequently occurring value in the sample. 771 * If there is a unique value with maximum frequency, this value is returned 772 * as the only element of the output array. Otherwise, the returned array 773 * contains the maximum frequency elements in increasing order. 774 * <p> 775 * For example, if {@code sample} is {0, 12, 5, 6, 0, 13, 5, 17}, 776 * the returned array will have length two, with 0 in the first element and 777 * 5 in the second. 778 * <p> 779 * NaN values are ignored when computing the mode - i.e., NaNs will never 780 * appear in the output array. If the sample includes only NaNs or has 781 * length 0, an empty array is returned. 782 * 783 * @param sample input data 784 * @return array of array of the most frequently occurring element(s) sorted in ascending order. 785 * @throws MathIllegalArgumentException if the indices are invalid or the array is null 786 * @since 3.3 787 */ 788 public static double[] mode(double[] sample) throws MathIllegalArgumentException { 789 if (sample == null) { 790 throw new NullArgumentException(LocalizedFormats.INPUT_ARRAY); 791 } 792 return getMode(sample, 0, sample.length); 793 } 794 795 /** 796 * Returns the sample mode(s). 797 * <p> 798 * The mode is the most frequently occurring value in the sample. 799 * If there is a unique value with maximum frequency, this value is returned 800 * as the only element of the output array. Otherwise, the returned array 801 * contains the maximum frequency elements in increasing order. 802 * <p> 803 * For example, if {@code sample} is {0, 12, 5, 6, 0, 13, 5, 17}, 804 * the returned array will have length two, with 0 in the first element and 805 * 5 in the second. 806 * <p> 807 * NaN values are ignored when computing the mode - i.e., NaNs will never 808 * appear in the output array. If the sample includes only NaNs or has 809 * length 0, an empty array is returned. 810 * 811 * @param sample input data 812 * @param begin index (0-based) of the first array element to include 813 * @param length the number of elements to include 814 * @return array of array of the most frequently occurring element(s) sorted in ascending order. 815 * @throws MathIllegalArgumentException if the indices are invalid or the array is null 816 * @since 3.3 817 */ 818 public static double[] mode(double[] sample, final int begin, final int length) { 819 if (sample == null) { 820 throw new NullArgumentException(LocalizedFormats.INPUT_ARRAY); 821 } 822 823 if (begin < 0) { 824 throw new NotPositiveException(LocalizedFormats.START_POSITION, Integer.valueOf(begin)); 825 } 826 827 if (length < 0) { 828 throw new NotPositiveException(LocalizedFormats.LENGTH, Integer.valueOf(length)); 829 } 830 831 return getMode(sample, begin, length); 832 } 833 834 /** 835 * Private helper method. 836 * Assumes parameters have been validated. 837 * @param values input data 838 * @param begin index (0-based) of the first array element to include 839 * @param length the number of elements to include 840 * @return array of array of the most frequently occurring element(s) sorted in ascending order. 841 */ 842 private static double[] getMode(double[] values, final int begin, final int length) { 843 // Add the values to the frequency table 844 Frequency<Double> freq = new Frequency<>(); 845 for (int i = begin; i < begin + length; i++) { 846 final double value = values[i]; 847 if (!Double.isNaN(value)) { 848 freq.addValue(Double.valueOf(value)); 849 } 850 } 851 List<Double> list = freq.getMode(); 852 // Convert the list to an array of primitive double 853 return list.stream().mapToDouble(Double::doubleValue).toArray(); 854 } 855}