View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.math4.legacy.stat.correlation;
18  
19  import org.apache.commons.math4.legacy.exception.NumberIsTooSmallException;
20  import org.apache.commons.math4.legacy.exception.util.LocalizedFormats;
21  
22  /**
23   * Bivariate Covariance implementation that does not require input data to be
24   * stored in memory.
25   *
26   * <p>This class is based on a paper written by Philippe P&eacute;bay:
27   * <a href="http://prod.sandia.gov/techlib/access-control.cgi/2008/086212.pdf">
28   * Formulas for Robust, One-Pass Parallel Computation of Covariances and
29   * Arbitrary-Order Statistical Moments</a>, 2008, Technical Report SAND2008-6212,
30   * Sandia National Laboratories. It computes the covariance for a pair of variables.
31   * Use {@link StorelessCovariance} to estimate an entire covariance matrix.</p>
32   *
33   * <p>Note: This class is package private as it is only used internally in
34   * the {@link StorelessCovariance} class.</p>
35   *
36   * @since 3.0
37   */
38  class StorelessBivariateCovariance {
39  
40      /** the mean of variable x. */
41      private double meanX;
42  
43      /** the mean of variable y. */
44      private double meanY;
45  
46      /** number of observations. */
47      private double n;
48  
49      /** the running covariance estimate. */
50      private double covarianceNumerator;
51  
52      /** flag for bias correction. */
53      private boolean biasCorrected;
54  
55      /**
56       * Create an empty {@link StorelessBivariateCovariance} instance with
57       * bias correction.
58       */
59      StorelessBivariateCovariance() {
60          this(true);
61      }
62  
63      /**
64       * Create an empty {@link StorelessBivariateCovariance} instance.
65       *
66       * @param biasCorrection if <code>true</code> the covariance estimate is corrected
67       * for bias, i.e. n-1 in the denominator, otherwise there is no bias correction,
68       * i.e. n in the denominator.
69       */
70      StorelessBivariateCovariance(final boolean biasCorrection) {
71          meanX = meanY = 0.0;
72          n = 0;
73          covarianceNumerator = 0.0;
74          biasCorrected = biasCorrection;
75      }
76  
77      /**
78       * Update the covariance estimation with a pair of variables (x, y).
79       *
80       * @param x the x value
81       * @param y the y value
82       */
83      public void increment(final double x, final double y) {
84          n++;
85          final double deltaX = x - meanX;
86          final double deltaY = y - meanY;
87          meanX += deltaX / n;
88          meanY += deltaY / n;
89          covarianceNumerator += ((n - 1.0) / n) * deltaX * deltaY;
90      }
91  
92      /**
93       * Appends another bivariate covariance calculation to this.
94       * After this operation, statistics returned should be close to what would
95       * have been obtained by by performing all of the {@link #increment(double, double)}
96       * operations in {@code cov} directly on this.
97       *
98       * @param cov StorelessBivariateCovariance instance to append.
99       */
100     public void append(StorelessBivariateCovariance cov) {
101         double oldN = n;
102         n += cov.n;
103         final double deltaX = cov.meanX - meanX;
104         final double deltaY = cov.meanY - meanY;
105         meanX += deltaX * cov.n / n;
106         meanY += deltaY * cov.n / n;
107         covarianceNumerator += cov.covarianceNumerator + oldN * cov.n / n * deltaX * deltaY;
108     }
109 
110     /**
111      * Returns the number of observations.
112      *
113      * @return number of observations
114      */
115     public double getN() {
116         return n;
117     }
118 
119     /**
120      * Return the current covariance estimate.
121      *
122      * @return the current covariance
123      * @throws NumberIsTooSmallException if the number of observations
124      * is &lt; 2
125      */
126     public double getResult() throws NumberIsTooSmallException {
127         if (n < 2) {
128             throw new NumberIsTooSmallException(LocalizedFormats.INSUFFICIENT_DIMENSION,
129                                                 n, 2, true);
130         }
131         if (biasCorrected) {
132             return covarianceNumerator / (n - 1d);
133         } else {
134             return covarianceNumerator / n;
135         }
136     }
137 }
138