1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.commons.math4.legacy.stat.descriptive.moment; 18 19 import org.apache.commons.math4.legacy.exception.MathIllegalArgumentException; 20 import org.apache.commons.math4.legacy.exception.NullArgumentException; 21 import org.apache.commons.math4.legacy.exception.util.LocalizedFormats; 22 import org.apache.commons.math4.legacy.stat.descriptive.AbstractStorelessUnivariateStatistic; 23 import org.apache.commons.math4.legacy.stat.descriptive.WeightedEvaluation; 24 import org.apache.commons.math4.legacy.core.MathArrays; 25 26 /** 27 * Computes the variance of the available values. By default, the unbiased 28 * "sample variance" definitional formula is used: 29 * <p> 30 * variance = sum((x_i - mean)^2) / (n - 1) </p> 31 * <p> 32 * where mean is the {@link Mean} and <code>n</code> is the number 33 * of sample observations.</p> 34 * <p> 35 * The definitional formula does not have good numerical properties, so 36 * this implementation does not compute the statistic using the definitional 37 * formula. <ul> 38 * <li> The <code>getResult</code> method computes the variance using 39 * updating formulas based on West's algorithm, as described in 40 * <a href="http://doi.acm.org/10.1145/359146.359152"> Chan, T. F. and 41 * J. G. Lewis 1979, <i>Communications of the ACM</i>, 42 * vol. 22 no. 9, pp. 526-531.</a></li> 43 * <li> The <code>evaluate</code> methods leverage the fact that they have the 44 * full array of values in memory to execute a two-pass algorithm. 45 * Specifically, these methods use the "corrected two-pass algorithm" from 46 * Chan, Golub, Levesque, <i>Algorithms for Computing the Sample Variance</i>, 47 * American Statistician, vol. 37, no. 3 (1983) pp. 242-247.</li></ul> 48 * Note that adding values using <code>increment</code> or 49 * <code>incrementAll</code> and then executing <code>getResult</code> will 50 * sometimes give a different, less accurate, result than executing 51 * <code>evaluate</code> with the full array of values. The former approach 52 * should only be used when the full array of values is not available. 53 * <p> 54 * The "population variance" ( sum((x_i - mean)^2) / n ) can also 55 * be computed using this statistic. The <code>isBiasCorrected</code> 56 * property determines whether the "population" or "sample" value is 57 * returned by the <code>evaluate</code> and <code>getResult</code> methods. 58 * To compute population variances, set this property to <code>false.</code> 59 * </p> 60 * <p> 61 * <strong>Note that this implementation is not synchronized.</strong> If 62 * multiple threads access an instance of this class concurrently, and at least 63 * one of the threads invokes the <code>increment()</code> or 64 * <code>clear()</code> method, it must be synchronized externally.</p> 65 */ 66 public class Variance extends AbstractStorelessUnivariateStatistic implements WeightedEvaluation { 67 /** SecondMoment is used in incremental calculation of Variance. */ 68 protected SecondMoment moment; 69 70 /** 71 * Whether or not {@link #increment(double)} should increment 72 * the internal second moment. When a Variance is constructed with an 73 * external SecondMoment as a constructor parameter, this property is 74 * set to false and increments must be applied to the second moment 75 * directly. 76 */ 77 protected boolean incMoment = true; 78 79 /** 80 * Whether or not bias correction is applied when computing the 81 * value of the statistic. True means that bias is corrected. See 82 * {@link Variance} for details on the formula. 83 */ 84 private boolean isBiasCorrected = true; 85 86 /** 87 * Constructs a Variance with default (true) <code>isBiasCorrected</code> 88 * property. 89 */ 90 public Variance() { 91 moment = new SecondMoment(); 92 } 93 94 /** 95 * Constructs a Variance based on an external second moment. 96 * <p> 97 * When this constructor is used, the statistic may only be 98 * incremented via the moment, i.e., {@link #increment(double)} 99 * does nothing; whereas {@code m2.increment(value)} increments 100 * both {@code m2} and the Variance instance constructed from it. 101 * 102 * @param m2 the SecondMoment (Third or Fourth moments work here as well.) 103 */ 104 public Variance(final SecondMoment m2) { 105 incMoment = false; 106 this.moment = m2; 107 } 108 109 /** 110 * Constructs a Variance with the specified <code>isBiasCorrected</code> 111 * property. 112 * 113 * @param isBiasCorrected setting for bias correction - true means 114 * bias will be corrected and is equivalent to using the "no arg" 115 * constructor 116 */ 117 public Variance(boolean isBiasCorrected) { 118 moment = new SecondMoment(); 119 this.isBiasCorrected = isBiasCorrected; 120 } 121 122 /** 123 * Constructs a Variance with the specified <code>isBiasCorrected</code> 124 * property and the supplied external second moment. 125 * 126 * @param isBiasCorrected setting for bias correction - true means 127 * bias will be corrected 128 * @param m2 the SecondMoment (Third or Fourth moments work 129 * here as well.) 130 */ 131 public Variance(boolean isBiasCorrected, SecondMoment m2) { 132 incMoment = false; 133 this.moment = m2; 134 this.isBiasCorrected = isBiasCorrected; 135 } 136 137 /** 138 * Copy constructor, creates a new {@code Variance} identical 139 * to the {@code original}. 140 * 141 * @param original the {@code Variance} instance to copy 142 * @throws NullArgumentException if original is null 143 */ 144 public Variance(Variance original) throws NullArgumentException { 145 copy(original, this); 146 } 147 148 /** 149 * {@inheritDoc} 150 * <p>If all values are available, it is more accurate to use 151 * {@link #evaluate(double[])} rather than adding values one at a time 152 * using this method and then executing {@link #getResult}, since 153 * <code>evaluate</code> leverages the fact that is has the full 154 * list of values together to execute a two-pass algorithm. 155 * See {@link Variance}.</p> 156 * 157 * <p>Note also that when {@link #Variance(SecondMoment)} is used to 158 * create a Variance, this method does nothing. In that case, the 159 * SecondMoment should be incremented directly.</p> 160 */ 161 @Override 162 public void increment(final double d) { 163 if (incMoment) { 164 moment.increment(d); 165 } 166 } 167 168 /** 169 * {@inheritDoc} 170 */ 171 @Override 172 public double getResult() { 173 if (moment.n == 0) { 174 return Double.NaN; 175 } else if (moment.n == 1) { 176 return 0d; 177 } else { 178 if (isBiasCorrected) { 179 return moment.m2 / (moment.n - 1d); 180 } else { 181 return moment.m2 / (moment.n); 182 } 183 } 184 } 185 186 /** 187 * {@inheritDoc} 188 */ 189 @Override 190 public long getN() { 191 return moment.getN(); 192 } 193 194 /** 195 * {@inheritDoc} 196 */ 197 @Override 198 public void clear() { 199 if (incMoment) { 200 moment.clear(); 201 } 202 } 203 204 /** 205 * Returns the variance of the entries in the input array, or 206 * <code>Double.NaN</code> if the array is empty. 207 * <p> 208 * See {@link Variance} for details on the computing algorithm.</p> 209 * <p> 210 * Returns 0 for a single-value (i.e. length = 1) sample.</p> 211 * <p> 212 * Throws <code>MathIllegalArgumentException</code> if the array is null.</p> 213 * <p> 214 * Does not change the internal state of the statistic.</p> 215 * 216 * @param values the input array 217 * @return the variance of the values or Double.NaN if length = 0 218 * @throws MathIllegalArgumentException if the array is null 219 */ 220 @Override 221 public double evaluate(final double[] values) throws MathIllegalArgumentException { 222 if (values == null) { 223 throw new NullArgumentException(LocalizedFormats.INPUT_ARRAY); 224 } 225 return evaluate(values, 0, values.length); 226 } 227 228 /** 229 * Returns the variance of the entries in the specified portion of 230 * the input array, or <code>Double.NaN</code> if the designated subarray 231 * is empty. Note that Double.NaN may also be returned if the input 232 * includes NaN and / or infinite values. 233 * <p> 234 * See {@link Variance} for details on the computing algorithm.</p> 235 * <p> 236 * Returns 0 for a single-value (i.e. length = 1) sample.</p> 237 * <p> 238 * Does not change the internal state of the statistic.</p> 239 * <p> 240 * Throws <code>MathIllegalArgumentException</code> if the array is null.</p> 241 * 242 * @param values the input array 243 * @param begin index of the first array element to include 244 * @param length the number of elements to include 245 * @return the variance of the values or Double.NaN if length = 0 246 * @throws MathIllegalArgumentException if the array is null or the array index 247 * parameters are not valid 248 */ 249 @Override 250 public double evaluate(final double[] values, final int begin, final int length) 251 throws MathIllegalArgumentException { 252 253 double var = Double.NaN; 254 255 if (MathArrays.verifyValues(values, begin, length)) { 256 if (length == 1) { 257 var = 0.0; 258 } else if (length > 1) { 259 Mean mean = new Mean(); 260 double m = mean.evaluate(values, begin, length); 261 var = evaluate(values, m, begin, length); 262 } 263 } 264 return var; 265 } 266 267 /** 268 * <p>Returns the weighted variance of the entries in the specified portion of 269 * the input array, or <code>Double.NaN</code> if the designated subarray 270 * is empty.</p> 271 * <p> 272 * Uses the formula <div style="white-space: pre"><code> 273 * Σ(weights[i]*(values[i] - weightedMean)<sup>2</sup>)/(Σ(weights[i]) - 1) 274 * </code></div> 275 * where weightedMean is the weighted mean 276 * <p> 277 * This formula will not return the same result as the unweighted variance when all 278 * weights are equal, unless all weights are equal to 1. The formula assumes that 279 * weights are to be treated as "expansion values," as will be the case if for example 280 * the weights represent frequency counts. To normalize weights so that the denominator 281 * in the variance computation equals the length of the input vector minus one, use <pre> 282 * <code>evaluate(values, MathArrays.normalizeArray(weights, values.length)); </code> 283 * </pre> 284 * <p> 285 * Returns 0 for a single-value (i.e. length = 1) sample.</p> 286 * <p> 287 * Throws <code>IllegalArgumentException</code> if any of the following are true: 288 * <ul><li>the values array is null</li> 289 * <li>the weights array is null</li> 290 * <li>the weights array does not have the same length as the values array</li> 291 * <li>the weights array contains one or more infinite values</li> 292 * <li>the weights array contains one or more NaN values</li> 293 * <li>the weights array contains negative values</li> 294 * <li>the weights array does not contain at least one non-zero value (applies when length is non zero)</li> 295 * <li>the start and length arguments do not determine a valid array</li> 296 * </ul> 297 * <p> 298 * Does not change the internal state of the statistic.</p> 299 * <p> 300 * Throws <code>MathIllegalArgumentException</code> if either array is null.</p> 301 * 302 * @param values the input array 303 * @param weights the weights array 304 * @param begin index of the first array element to include 305 * @param length the number of elements to include 306 * @return the weighted variance of the values or Double.NaN if length = 0 307 * @throws MathIllegalArgumentException if the parameters are not valid 308 * @since 2.1 309 */ 310 @Override 311 public double evaluate(final double[] values, final double[] weights, 312 final int begin, final int length) throws MathIllegalArgumentException { 313 314 double var = Double.NaN; 315 316 if (MathArrays.verifyValues(values, weights, begin, length)) { 317 if (length == 1) { 318 var = 0.0; 319 } else if (length > 1) { 320 Mean mean = new Mean(); 321 double m = mean.evaluate(values, weights, begin, length); 322 var = evaluate(values, weights, m, begin, length); 323 } 324 } 325 return var; 326 } 327 328 /** 329 * <p> 330 * Returns the weighted variance of the entries in the input array.</p> 331 * <p> 332 * Uses the formula <div style="white-space:pre"><code> 333 * Σ(weights[i]*(values[i] - weightedMean)<sup>2</sup>)/(Σ(weights[i]) - 1) 334 * </code></div> 335 * where weightedMean is the weighted mean 336 * <p> 337 * This formula will not return the same result as the unweighted variance when all 338 * weights are equal, unless all weights are equal to 1. The formula assumes that 339 * weights are to be treated as "expansion values," as will be the case if for example 340 * the weights represent frequency counts. To normalize weights so that the denominator 341 * in the variance computation equals the length of the input vector minus one, use <pre> 342 * <code>evaluate(values, MathArrays.normalizeArray(weights, values.length)); </code> 343 * </pre> 344 * <p> 345 * Returns 0 for a single-value (i.e. length = 1) sample.</p> 346 * <p> 347 * Throws <code>MathIllegalArgumentException</code> if any of the following are true: 348 * <ul><li>the values array is null</li> 349 * <li>the weights array is null</li> 350 * <li>the weights array does not have the same length as the values array</li> 351 * <li>the weights array contains one or more infinite values</li> 352 * <li>the weights array contains one or more NaN values</li> 353 * <li>the weights array contains negative values</li> 354 * <li>the weights array does not contain at least one non-zero value (applies when length is non zero)</li> 355 * </ul> 356 * <p> 357 * Does not change the internal state of the statistic.</p> 358 * <p> 359 * Throws <code>MathIllegalArgumentException</code> if either array is null.</p> 360 * 361 * @param values the input array 362 * @param weights the weights array 363 * @return the weighted variance of the values 364 * @throws MathIllegalArgumentException if the parameters are not valid 365 * @since 2.1 366 */ 367 @Override 368 public double evaluate(final double[] values, final double[] weights) 369 throws MathIllegalArgumentException { 370 return evaluate(values, weights, 0, values.length); 371 } 372 373 /** 374 * Returns the variance of the entries in the specified portion of 375 * the input array, using the precomputed mean value. Returns 376 * <code>Double.NaN</code> if the designated subarray is empty. 377 * <p> 378 * See {@link Variance} for details on the computing algorithm.</p> 379 * <p> 380 * The formula used assumes that the supplied mean value is the arithmetic 381 * mean of the sample data, not a known population parameter. This method 382 * is supplied only to save computation when the mean has already been 383 * computed.</p> 384 * <p> 385 * Returns 0 for a single-value (i.e. length = 1) sample.</p> 386 * <p> 387 * Throws <code>MathIllegalArgumentException</code> if the array is null.</p> 388 * <p> 389 * Does not change the internal state of the statistic.</p> 390 * 391 * @param values the input array 392 * @param mean the precomputed mean value 393 * @param begin index of the first array element to include 394 * @param length the number of elements to include 395 * @return the variance of the values or Double.NaN if length = 0 396 * @throws MathIllegalArgumentException if the array is null or the array index 397 * parameters are not valid 398 */ 399 public double evaluate(final double[] values, final double mean, 400 final int begin, final int length) throws MathIllegalArgumentException { 401 402 double var = Double.NaN; 403 404 if (MathArrays.verifyValues(values, begin, length)) { 405 if (length == 1) { 406 var = 0.0; 407 } else if (length > 1) { 408 double accum = 0.0; 409 double dev = 0.0; 410 double accum2 = 0.0; 411 for (int i = begin; i < begin + length; i++) { 412 dev = values[i] - mean; 413 accum += dev * dev; 414 accum2 += dev; 415 } 416 double len = length; 417 if (isBiasCorrected) { 418 var = (accum - (accum2 * accum2 / len)) / (len - 1.0); 419 } else { 420 var = (accum - (accum2 * accum2 / len)) / len; 421 } 422 } 423 } 424 return var; 425 } 426 427 /** 428 * Returns the variance of the entries in the input array, using the 429 * precomputed mean value. Returns <code>Double.NaN</code> if the array 430 * is empty. 431 * <p> 432 * See {@link Variance} for details on the computing algorithm.</p> 433 * <p> 434 * If <code>isBiasCorrected</code> is <code>true</code> the formula used 435 * assumes that the supplied mean value is the arithmetic mean of the 436 * sample data, not a known population parameter. If the mean is a known 437 * population parameter, or if the "population" version of the variance is 438 * desired, set <code>isBiasCorrected</code> to <code>false</code> before 439 * invoking this method.</p> 440 * <p> 441 * Returns 0 for a single-value (i.e. length = 1) sample.</p> 442 * <p> 443 * Throws <code>MathIllegalArgumentException</code> if the array is null.</p> 444 * <p> 445 * Does not change the internal state of the statistic.</p> 446 * 447 * @param values the input array 448 * @param mean the precomputed mean value 449 * @return the variance of the values or Double.NaN if the array is empty 450 * @throws MathIllegalArgumentException if the array is null 451 */ 452 public double evaluate(final double[] values, final double mean) throws MathIllegalArgumentException { 453 return evaluate(values, mean, 0, values.length); 454 } 455 456 /** 457 * Returns the weighted variance of the entries in the specified portion of 458 * the input array, using the precomputed weighted mean value. Returns 459 * <code>Double.NaN</code> if the designated subarray is empty. 460 * <p> 461 * Uses the formula <div style="white-space:pre"><code> 462 * Σ(weights[i]*(values[i] - mean)<sup>2</sup>)/(Σ(weights[i]) - 1) 463 * </code></div> 464 * <p> 465 * The formula used assumes that the supplied mean value is the weighted arithmetic 466 * mean of the sample data, not a known population parameter. This method 467 * is supplied only to save computation when the mean has already been 468 * computed.</p> 469 * <p> 470 * This formula will not return the same result as the unweighted variance when all 471 * weights are equal, unless all weights are equal to 1. The formula assumes that 472 * weights are to be treated as "expansion values," as will be the case if for example 473 * the weights represent frequency counts. To normalize weights so that the denominator 474 * in the variance computation equals the length of the input vector minus one, use <pre> 475 * <code>evaluate(values, MathArrays.normalizeArray(weights, values.length), mean); </code> 476 * </pre> 477 * <p> 478 * Returns 0 for a single-value (i.e. length = 1) sample.</p> 479 * <p> 480 * Throws <code>MathIllegalArgumentException</code> if any of the following are true: 481 * <ul><li>the values array is null</li> 482 * <li>the weights array is null</li> 483 * <li>the weights array does not have the same length as the values array</li> 484 * <li>the weights array contains one or more infinite values</li> 485 * <li>the weights array contains one or more NaN values</li> 486 * <li>the weights array contains negative values</li> 487 * <li>the weights array does not contain at least one non-zero value (applies when length is non zero)</li> 488 * <li>the start and length arguments do not determine a valid array</li> 489 * </ul> 490 * <p> 491 * Does not change the internal state of the statistic.</p> 492 * 493 * @param values the input array 494 * @param weights the weights array 495 * @param mean the precomputed weighted mean value 496 * @param begin index of the first array element to include 497 * @param length the number of elements to include 498 * @return the variance of the values or Double.NaN if length = 0 499 * @throws MathIllegalArgumentException if the parameters are not valid 500 * @since 2.1 501 */ 502 public double evaluate(final double[] values, final double[] weights, 503 final double mean, final int begin, final int length) 504 throws MathIllegalArgumentException { 505 506 double var = Double.NaN; 507 508 if (MathArrays.verifyValues(values, weights, begin, length)) { 509 if (length == 1) { 510 var = 0.0; 511 } else if (length > 1) { 512 double accum = 0.0; 513 double dev = 0.0; 514 double accum2 = 0.0; 515 for (int i = begin; i < begin + length; i++) { 516 dev = values[i] - mean; 517 accum += weights[i] * (dev * dev); 518 accum2 += weights[i] * dev; 519 } 520 521 double sumWts = 0; 522 for (int i = begin; i < begin + length; i++) { 523 sumWts += weights[i]; 524 } 525 526 if (isBiasCorrected) { 527 // Note: For this to be valid the weights should correspond to counts 528 // of each observation where the weights are positive integers; the 529 // sum of the weights is the total number of observations and should 530 // be at least 2. 531 var = (accum - (accum2 * accum2 / sumWts)) / (sumWts - 1.0); 532 } else { 533 var = (accum - (accum2 * accum2 / sumWts)) / sumWts; 534 } 535 } 536 } 537 return var; 538 } 539 540 /** 541 * <p>Returns the weighted variance of the values in the input array, using 542 * the precomputed weighted mean value.</p> 543 * <p> 544 * Uses the formula <div style="white-space:pre"><code> 545 * Σ(weights[i]*(values[i] - mean)<sup>2</sup>)/(Σ(weights[i]) - 1) 546 * </code></div> 547 * <p> 548 * The formula used assumes that the supplied mean value is the weighted arithmetic 549 * mean of the sample data, not a known population parameter. This method 550 * is supplied only to save computation when the mean has already been 551 * computed.</p> 552 * <p> 553 * This formula will not return the same result as the unweighted variance when all 554 * weights are equal, unless all weights are equal to 1. The formula assumes that 555 * weights are to be treated as "expansion values," as will be the case if for example 556 * the weights represent frequency counts. To normalize weights so that the denominator 557 * in the variance computation equals the length of the input vector minus one, use <pre> 558 * <code>evaluate(values, MathArrays.normalizeArray(weights, values.length), mean); </code> 559 * </pre> 560 * <p> 561 * Returns 0 for a single-value (i.e. length = 1) sample.</p> 562 * <p> 563 * Throws <code>MathIllegalArgumentException</code> if any of the following are true: 564 * <ul><li>the values array is null</li> 565 * <li>the weights array is null</li> 566 * <li>the weights array does not have the same length as the values array</li> 567 * <li>the weights array contains one or more infinite values</li> 568 * <li>the weights array contains one or more NaN values</li> 569 * <li>the weights array contains negative values</li> 570 * <li>the weights array does not contain at least one non-zero value (applies when length is non zero)</li> 571 * </ul> 572 * <p> 573 * Does not change the internal state of the statistic.</p> 574 * 575 * @param values the input array 576 * @param weights the weights array 577 * @param mean the precomputed weighted mean value 578 * @return the variance of the values or Double.NaN if length = 0 579 * @throws MathIllegalArgumentException if the parameters are not valid 580 * @since 2.1 581 */ 582 public double evaluate(final double[] values, final double[] weights, final double mean) 583 throws MathIllegalArgumentException { 584 return evaluate(values, weights, mean, 0, values.length); 585 } 586 587 /** 588 * @return Returns the isBiasCorrected. 589 */ 590 public boolean isBiasCorrected() { 591 return isBiasCorrected; 592 } 593 594 /** 595 * @param biasCorrected The isBiasCorrected to set. 596 */ 597 public void setBiasCorrected(boolean biasCorrected) { 598 this.isBiasCorrected = biasCorrected; 599 } 600 601 /** 602 * {@inheritDoc} 603 */ 604 @Override 605 public Variance copy() { 606 Variance result = new Variance(); 607 // No try-catch or advertised exception because parameters are guaranteed non-null 608 copy(this, result); 609 return result; 610 } 611 612 /** 613 * Copies source to dest. 614 * <p>Neither source nor dest can be null.</p> 615 * 616 * @param source Variance to copy 617 * @param dest Variance to copy to 618 * @throws NullArgumentException if either source or dest is null 619 */ 620 public static void copy(Variance source, Variance dest) 621 throws NullArgumentException { 622 NullArgumentException.check(source); 623 NullArgumentException.check(dest); 624 dest.moment = source.moment.copy(); 625 dest.isBiasCorrected = source.isBiasCorrected; 626 dest.incMoment = source.incMoment; 627 } 628 }