1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.math4.legacy.stat.correlation;
18
19 import org.apache.commons.math4.legacy.exception.DimensionMismatchException;
20 import org.apache.commons.math4.legacy.exception.MathUnsupportedOperationException;
21 import org.apache.commons.math4.legacy.exception.NumberIsTooSmallException;
22 import org.apache.commons.math4.legacy.linear.MatrixUtils;
23 import org.apache.commons.math4.legacy.linear.RealMatrix;
24
25 /**
26 * Covariance implementation that does not require input data to be
27 * stored in memory. The size of the covariance matrix is specified in the
28 * constructor. Specific elements of the matrix are incrementally updated with
29 * calls to incrementRow() or increment Covariance().
30 *
31 * <p>This class is based on a paper written by Philippe Pébay:
32 * <a href="http://prod.sandia.gov/techlib/access-control.cgi/2008/086212.pdf">
33 * Formulas for Robust, One-Pass Parallel Computation of Covariances and
34 * Arbitrary-Order Statistical Moments</a>, 2008, Technical Report SAND2008-6212,
35 * Sandia National Laboratories.</p>
36 *
37 * <p>Note: the underlying covariance matrix is symmetric, thus only the
38 * upper triangular part of the matrix is stored and updated each increment.</p>
39 *
40 * @since 3.0
41 */
42 public class StorelessCovariance extends Covariance {
43
44 /** the square covariance matrix (upper triangular part). */
45 private StorelessBivariateCovariance[] covMatrix;
46
47 /** dimension of the square covariance matrix. */
48 private int dimension;
49
50 /**
51 * Create a bias corrected covariance matrix with a given dimension.
52 *
53 * @param dim the dimension of the square covariance matrix
54 */
55 public StorelessCovariance(final int dim) {
56 this(dim, true);
57 }
58
59 /**
60 * Create a covariance matrix with a given number of rows and columns and the
61 * indicated bias correction.
62 *
63 * @param dim the dimension of the covariance matrix
64 * @param biasCorrected if <code>true</code> the covariance estimate is corrected
65 * for bias, i.e. n-1 in the denominator, otherwise there is no bias correction,
66 * i.e. n in the denominator.
67 */
68 public StorelessCovariance(final int dim, final boolean biasCorrected) {
69 dimension = dim;
70 covMatrix = new StorelessBivariateCovariance[dimension * (dimension + 1) / 2];
71 initializeMatrix(biasCorrected);
72 }
73
74 /**
75 * Initialize the internal two-dimensional array of
76 * {@link StorelessBivariateCovariance} instances.
77 *
78 * @param biasCorrected if the covariance estimate shall be corrected for bias
79 */
80 private void initializeMatrix(final boolean biasCorrected) {
81 for(int i = 0; i < dimension; i++){
82 for(int j = 0; j < dimension; j++){
83 setElement(i, j, new StorelessBivariateCovariance(biasCorrected));
84 }
85 }
86 }
87
88 /**
89 * Returns the index (i, j) translated into the one-dimensional
90 * array used to store the upper triangular part of the symmetric
91 * covariance matrix.
92 *
93 * @param i the row index
94 * @param j the column index
95 * @return the corresponding index in the matrix array
96 */
97 private int indexOf(final int i, final int j) {
98 return j < i ? i * (i + 1) / 2 + j : j * (j + 1) / 2 + i;
99 }
100
101 /**
102 * Gets the element at index (i, j) from the covariance matrix.
103 * @param i the row index
104 * @param j the column index
105 * @return the {@link StorelessBivariateCovariance} element at the given index
106 */
107 private StorelessBivariateCovariance getElement(final int i, final int j) {
108 return covMatrix[indexOf(i, j)];
109 }
110
111 /**
112 * Sets the covariance element at index (i, j) in the covariance matrix.
113 * @param i the row index
114 * @param j the column index
115 * @param cov the {@link StorelessBivariateCovariance} element to be set
116 */
117 private void setElement(final int i, final int j,
118 final StorelessBivariateCovariance cov) {
119 covMatrix[indexOf(i, j)] = cov;
120 }
121
122 /**
123 * Get the covariance for an individual element of the covariance matrix.
124 *
125 * @param xIndex row index in the covariance matrix
126 * @param yIndex column index in the covariance matrix
127 * @return the covariance of the given element
128 * @throws NumberIsTooSmallException if the number of observations
129 * in the cell is < 2
130 */
131 public double getCovariance(final int xIndex,
132 final int yIndex)
133 throws NumberIsTooSmallException {
134
135 return getElement(xIndex, yIndex).getResult();
136 }
137
138 /**
139 * Increment the covariance matrix with one row of data.
140 *
141 * @param data array representing one row of data.
142 * @throws DimensionMismatchException if the length of <code>rowData</code>
143 * does not match with the covariance matrix
144 */
145 public void increment(final double[] data)
146 throws DimensionMismatchException {
147
148 int length = data.length;
149 if (length != dimension) {
150 throw new DimensionMismatchException(length, dimension);
151 }
152
153 // only update the upper triangular part of the covariance matrix
154 // as only these parts are actually stored
155 for (int i = 0; i < length; i++){
156 for (int j = i; j < length; j++){
157 getElement(i, j).increment(data[i], data[j]);
158 }
159 }
160 }
161
162 /**
163 * Appends {@code sc} to this, effectively aggregating the computations in {@code sc}
164 * with this. After invoking this method, covariances returned should be close
165 * to what would have been obtained by performing all of the {@link #increment(double[])}
166 * operations in {@code sc} directly on this.
167 *
168 * @param sc externally computed StorelessCovariance to add to this
169 * @throws DimensionMismatchException if the dimension of sc does not match this
170 * @since 3.3
171 */
172 public void append(StorelessCovariance sc) throws DimensionMismatchException {
173 if (sc.dimension != dimension) {
174 throw new DimensionMismatchException(sc.dimension, dimension);
175 }
176
177 // only update the upper triangular part of the covariance matrix
178 // as only these parts are actually stored
179 for (int i = 0; i < dimension; i++) {
180 for (int j = i; j < dimension; j++) {
181 getElement(i, j).append(sc.getElement(i, j));
182 }
183 }
184 }
185
186 /**
187 * {@inheritDoc}
188 * @throws NumberIsTooSmallException if the number of observations
189 * in a cell is < 2
190 */
191 @Override
192 public RealMatrix getCovarianceMatrix() throws NumberIsTooSmallException {
193 return MatrixUtils.createRealMatrix(getData());
194 }
195
196 /**
197 * Return the covariance matrix as two-dimensional array.
198 *
199 * @return a two-dimensional double array of covariance values
200 * @throws NumberIsTooSmallException if the number of observations
201 * for a cell is < 2
202 */
203 public double[][] getData() throws NumberIsTooSmallException {
204 final double[][] data = new double[dimension][dimension];
205 for (int i = 0; i < dimension; i++) {
206 for (int j = 0; j < dimension; j++) {
207 data[i][j] = getElement(i, j).getResult();
208 }
209 }
210 return data;
211 }
212
213 /**
214 * This {@link Covariance} method is not supported by a {@link StorelessCovariance},
215 * since the number of bivariate observations does not have to be the same for different
216 * pairs of covariates - i.e., N as defined in {@link Covariance#getN()} is undefined.
217 *
218 * @return nothing as this implementation always throws a
219 * {@link MathUnsupportedOperationException}
220 * @throws MathUnsupportedOperationException in all cases
221 */
222 @Override
223 public int getN()
224 throws MathUnsupportedOperationException {
225 throw new MathUnsupportedOperationException();
226 }
227 }