1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.commons.math4.legacy.stat.correlation; 18 19 import org.apache.commons.math4.legacy.exception.NumberIsTooSmallException; 20 import org.apache.commons.math4.legacy.exception.util.LocalizedFormats; 21 22 /** 23 * Bivariate Covariance implementation that does not require input data to be 24 * stored in memory. 25 * 26 * <p>This class is based on a paper written by Philippe Pébay: 27 * <a href="http://prod.sandia.gov/techlib/access-control.cgi/2008/086212.pdf"> 28 * Formulas for Robust, One-Pass Parallel Computation of Covariances and 29 * Arbitrary-Order Statistical Moments</a>, 2008, Technical Report SAND2008-6212, 30 * Sandia National Laboratories. It computes the covariance for a pair of variables. 31 * Use {@link StorelessCovariance} to estimate an entire covariance matrix.</p> 32 * 33 * <p>Note: This class is package private as it is only used internally in 34 * the {@link StorelessCovariance} class.</p> 35 * 36 * @since 3.0 37 */ 38 class StorelessBivariateCovariance { 39 40 /** the mean of variable x. */ 41 private double meanX; 42 43 /** the mean of variable y. */ 44 private double meanY; 45 46 /** number of observations. */ 47 private double n; 48 49 /** the running covariance estimate. */ 50 private double covarianceNumerator; 51 52 /** flag for bias correction. */ 53 private boolean biasCorrected; 54 55 /** 56 * Create an empty {@link StorelessBivariateCovariance} instance with 57 * bias correction. 58 */ 59 StorelessBivariateCovariance() { 60 this(true); 61 } 62 63 /** 64 * Create an empty {@link StorelessBivariateCovariance} instance. 65 * 66 * @param biasCorrection if <code>true</code> the covariance estimate is corrected 67 * for bias, i.e. n-1 in the denominator, otherwise there is no bias correction, 68 * i.e. n in the denominator. 69 */ 70 StorelessBivariateCovariance(final boolean biasCorrection) { 71 meanX = meanY = 0.0; 72 n = 0; 73 covarianceNumerator = 0.0; 74 biasCorrected = biasCorrection; 75 } 76 77 /** 78 * Update the covariance estimation with a pair of variables (x, y). 79 * 80 * @param x the x value 81 * @param y the y value 82 */ 83 public void increment(final double x, final double y) { 84 n++; 85 final double deltaX = x - meanX; 86 final double deltaY = y - meanY; 87 meanX += deltaX / n; 88 meanY += deltaY / n; 89 covarianceNumerator += ((n - 1.0) / n) * deltaX * deltaY; 90 } 91 92 /** 93 * Appends another bivariate covariance calculation to this. 94 * After this operation, statistics returned should be close to what would 95 * have been obtained by by performing all of the {@link #increment(double, double)} 96 * operations in {@code cov} directly on this. 97 * 98 * @param cov StorelessBivariateCovariance instance to append. 99 */ 100 public void append(StorelessBivariateCovariance cov) { 101 double oldN = n; 102 n += cov.n; 103 final double deltaX = cov.meanX - meanX; 104 final double deltaY = cov.meanY - meanY; 105 meanX += deltaX * cov.n / n; 106 meanY += deltaY * cov.n / n; 107 covarianceNumerator += cov.covarianceNumerator + oldN * cov.n / n * deltaX * deltaY; 108 } 109 110 /** 111 * Returns the number of observations. 112 * 113 * @return number of observations 114 */ 115 public double getN() { 116 return n; 117 } 118 119 /** 120 * Return the current covariance estimate. 121 * 122 * @return the current covariance 123 * @throws NumberIsTooSmallException if the number of observations 124 * is < 2 125 */ 126 public double getResult() throws NumberIsTooSmallException { 127 if (n < 2) { 128 throw new NumberIsTooSmallException(LocalizedFormats.INSUFFICIENT_DIMENSION, 129 n, 2, true); 130 } 131 if (biasCorrected) { 132 return covarianceNumerator / (n - 1d); 133 } else { 134 return covarianceNumerator / n; 135 } 136 } 137 } 138