1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.math4.legacy.stat.correlation;
18
19 import org.apache.commons.math4.legacy.exception.NumberIsTooSmallException;
20 import org.apache.commons.math4.legacy.exception.util.LocalizedFormats;
21
22 /**
23 * Bivariate Covariance implementation that does not require input data to be
24 * stored in memory.
25 *
26 * <p>This class is based on a paper written by Philippe Pébay:
27 * <a href="http://prod.sandia.gov/techlib/access-control.cgi/2008/086212.pdf">
28 * Formulas for Robust, One-Pass Parallel Computation of Covariances and
29 * Arbitrary-Order Statistical Moments</a>, 2008, Technical Report SAND2008-6212,
30 * Sandia National Laboratories. It computes the covariance for a pair of variables.
31 * Use {@link StorelessCovariance} to estimate an entire covariance matrix.</p>
32 *
33 * <p>Note: This class is package private as it is only used internally in
34 * the {@link StorelessCovariance} class.</p>
35 *
36 * @since 3.0
37 */
38 class StorelessBivariateCovariance {
39
40 /** the mean of variable x. */
41 private double meanX;
42
43 /** the mean of variable y. */
44 private double meanY;
45
46 /** number of observations. */
47 private double n;
48
49 /** the running covariance estimate. */
50 private double covarianceNumerator;
51
52 /** flag for bias correction. */
53 private boolean biasCorrected;
54
55 /**
56 * Create an empty {@link StorelessBivariateCovariance} instance with
57 * bias correction.
58 */
59 StorelessBivariateCovariance() {
60 this(true);
61 }
62
63 /**
64 * Create an empty {@link StorelessBivariateCovariance} instance.
65 *
66 * @param biasCorrection if <code>true</code> the covariance estimate is corrected
67 * for bias, i.e. n-1 in the denominator, otherwise there is no bias correction,
68 * i.e. n in the denominator.
69 */
70 StorelessBivariateCovariance(final boolean biasCorrection) {
71 meanX = meanY = 0.0;
72 n = 0;
73 covarianceNumerator = 0.0;
74 biasCorrected = biasCorrection;
75 }
76
77 /**
78 * Update the covariance estimation with a pair of variables (x, y).
79 *
80 * @param x the x value
81 * @param y the y value
82 */
83 public void increment(final double x, final double y) {
84 n++;
85 final double deltaX = x - meanX;
86 final double deltaY = y - meanY;
87 meanX += deltaX / n;
88 meanY += deltaY / n;
89 covarianceNumerator += ((n - 1.0) / n) * deltaX * deltaY;
90 }
91
92 /**
93 * Appends another bivariate covariance calculation to this.
94 * After this operation, statistics returned should be close to what would
95 * have been obtained by by performing all of the {@link #increment(double, double)}
96 * operations in {@code cov} directly on this.
97 *
98 * @param cov StorelessBivariateCovariance instance to append.
99 */
100 public void append(StorelessBivariateCovariance cov) {
101 double oldN = n;
102 n += cov.n;
103 final double deltaX = cov.meanX - meanX;
104 final double deltaY = cov.meanY - meanY;
105 meanX += deltaX * cov.n / n;
106 meanY += deltaY * cov.n / n;
107 covarianceNumerator += cov.covarianceNumerator + oldN * cov.n / n * deltaX * deltaY;
108 }
109
110 /**
111 * Returns the number of observations.
112 *
113 * @return number of observations
114 */
115 public double getN() {
116 return n;
117 }
118
119 /**
120 * Return the current covariance estimate.
121 *
122 * @return the current covariance
123 * @throws NumberIsTooSmallException if the number of observations
124 * is < 2
125 */
126 public double getResult() throws NumberIsTooSmallException {
127 if (n < 2) {
128 throw new NumberIsTooSmallException(LocalizedFormats.INSUFFICIENT_DIMENSION,
129 n, 2, true);
130 }
131 if (biasCorrected) {
132 return covarianceNumerator / (n - 1d);
133 } else {
134 return covarianceNumerator / n;
135 }
136 }
137 }
138