001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017 package org.apache.commons.math3.stat.correlation;
018
019 import org.apache.commons.math3.exception.MathIllegalArgumentException;
020 import org.apache.commons.math3.exception.NotStrictlyPositiveException;
021 import org.apache.commons.math3.exception.util.LocalizedFormats;
022 import org.apache.commons.math3.linear.RealMatrix;
023 import org.apache.commons.math3.linear.BlockRealMatrix;
024 import org.apache.commons.math3.stat.descriptive.moment.Mean;
025 import org.apache.commons.math3.stat.descriptive.moment.Variance;
026
027 /**
028 * Computes covariances for pairs of arrays or columns of a matrix.
029 *
030 * <p>The constructors that take <code>RealMatrix</code> or
031 * <code>double[][]</code> arguments generate covariance matrices. The
032 * columns of the input matrices are assumed to represent variable values.</p>
033 *
034 * <p>The constructor argument <code>biasCorrected</code> determines whether or
035 * not computed covariances are bias-corrected.</p>
036 *
037 * <p>Unbiased covariances are given by the formula</p>
038 * <code>cov(X, Y) = Σ[(x<sub>i</sub> - E(X))(y<sub>i</sub> - E(Y))] / (n - 1)</code>
039 * where <code>E(X)</code> is the mean of <code>X</code> and <code>E(Y)</code>
040 * is the mean of the <code>Y</code> values.
041 *
042 * <p>Non-bias-corrected estimates use <code>n</code> in place of <code>n - 1</code>
043 *
044 * @version $Id: Covariance.java 1453271 2013-03-06 10:29:51Z luc $
045 * @since 2.0
046 */
047 public class Covariance {
048
049 /** covariance matrix */
050 private final RealMatrix covarianceMatrix;
051
052 /**
053 * Create an empty covariance matrix.
054 */
055 /** Number of observations (length of covariate vectors) */
056 private final int n;
057
058 /**
059 * Create a Covariance with no data
060 */
061 public Covariance() {
062 super();
063 covarianceMatrix = null;
064 n = 0;
065 }
066
067 /**
068 * Create a Covariance matrix from a rectangular array
069 * whose columns represent covariates.
070 *
071 * <p>The <code>biasCorrected</code> parameter determines whether or not
072 * covariance estimates are bias-corrected.</p>
073 *
074 * <p>The input array must be rectangular with at least one column
075 * and two rows.</p>
076 *
077 * @param data rectangular array with columns representing covariates
078 * @param biasCorrected true means covariances are bias-corrected
079 * @throws MathIllegalArgumentException if the input data array is not
080 * rectangular with at least two rows and one column.
081 * @throws NotStrictlyPositiveException if the input data array is not
082 * rectangular with at least one row and one column.
083 */
084 public Covariance(double[][] data, boolean biasCorrected)
085 throws MathIllegalArgumentException, NotStrictlyPositiveException {
086 this(new BlockRealMatrix(data), biasCorrected);
087 }
088
089 /**
090 * Create a Covariance matrix from a rectangular array
091 * whose columns represent covariates.
092 *
093 * <p>The input array must be rectangular with at least one column
094 * and two rows</p>
095 *
096 * @param data rectangular array with columns representing covariates
097 * @throws MathIllegalArgumentException if the input data array is not
098 * rectangular with at least two rows and one column.
099 * @throws NotStrictlyPositiveException if the input data array is not
100 * rectangular with at least one row and one column.
101 */
102 public Covariance(double[][] data)
103 throws MathIllegalArgumentException, NotStrictlyPositiveException {
104 this(data, true);
105 }
106
107 /**
108 * Create a covariance matrix from a matrix whose columns
109 * represent covariates.
110 *
111 * <p>The <code>biasCorrected</code> parameter determines whether or not
112 * covariance estimates are bias-corrected.</p>
113 *
114 * <p>The matrix must have at least one column and two rows</p>
115 *
116 * @param matrix matrix with columns representing covariates
117 * @param biasCorrected true means covariances are bias-corrected
118 * @throws MathIllegalArgumentException if the input matrix does not have
119 * at least two rows and one column
120 */
121 public Covariance(RealMatrix matrix, boolean biasCorrected)
122 throws MathIllegalArgumentException {
123 checkSufficientData(matrix);
124 n = matrix.getRowDimension();
125 covarianceMatrix = computeCovarianceMatrix(matrix, biasCorrected);
126 }
127
128 /**
129 * Create a covariance matrix from a matrix whose columns
130 * represent covariates.
131 *
132 * <p>The matrix must have at least one column and two rows</p>
133 *
134 * @param matrix matrix with columns representing covariates
135 * @throws MathIllegalArgumentException if the input matrix does not have
136 * at least two rows and one column
137 */
138 public Covariance(RealMatrix matrix) throws MathIllegalArgumentException {
139 this(matrix, true);
140 }
141
142 /**
143 * Returns the covariance matrix
144 *
145 * @return covariance matrix
146 */
147 public RealMatrix getCovarianceMatrix() {
148 return covarianceMatrix;
149 }
150
151 /**
152 * Returns the number of observations (length of covariate vectors)
153 *
154 * @return number of observations
155 */
156 public int getN() {
157 return n;
158 }
159
160 /**
161 * Compute a covariance matrix from a matrix whose columns represent
162 * covariates.
163 * @param matrix input matrix (must have at least one column and two rows)
164 * @param biasCorrected determines whether or not covariance estimates are bias-corrected
165 * @return covariance matrix
166 * @throws MathIllegalArgumentException if the matrix does not contain sufficient data
167 */
168 protected RealMatrix computeCovarianceMatrix(RealMatrix matrix, boolean biasCorrected)
169 throws MathIllegalArgumentException {
170 int dimension = matrix.getColumnDimension();
171 Variance variance = new Variance(biasCorrected);
172 RealMatrix outMatrix = new BlockRealMatrix(dimension, dimension);
173 for (int i = 0; i < dimension; i++) {
174 for (int j = 0; j < i; j++) {
175 double cov = covariance(matrix.getColumn(i), matrix.getColumn(j), biasCorrected);
176 outMatrix.setEntry(i, j, cov);
177 outMatrix.setEntry(j, i, cov);
178 }
179 outMatrix.setEntry(i, i, variance.evaluate(matrix.getColumn(i)));
180 }
181 return outMatrix;
182 }
183
184 /**
185 * Create a covariance matrix from a matrix whose columns represent
186 * covariates. Covariances are computed using the bias-corrected formula.
187 * @param matrix input matrix (must have at least one column and two rows)
188 * @return covariance matrix
189 * @throws MathIllegalArgumentException if matrix does not contain sufficient data
190 * @see #Covariance
191 */
192 protected RealMatrix computeCovarianceMatrix(RealMatrix matrix)
193 throws MathIllegalArgumentException {
194 return computeCovarianceMatrix(matrix, true);
195 }
196
197 /**
198 * Compute a covariance matrix from a rectangular array whose columns represent
199 * covariates.
200 * @param data input array (must have at least one column and two rows)
201 * @param biasCorrected determines whether or not covariance estimates are bias-corrected
202 * @return covariance matrix
203 * @throws MathIllegalArgumentException if the data array does not contain sufficient
204 * data
205 * @throws NotStrictlyPositiveException if the input data array is not
206 * rectangular with at least one row and one column.
207 */
208 protected RealMatrix computeCovarianceMatrix(double[][] data, boolean biasCorrected)
209 throws MathIllegalArgumentException, NotStrictlyPositiveException {
210 return computeCovarianceMatrix(new BlockRealMatrix(data), biasCorrected);
211 }
212
213 /**
214 * Create a covariance matrix from a rectangular array whose columns represent
215 * covariates. Covariances are computed using the bias-corrected formula.
216 * @param data input array (must have at least one column and two rows)
217 * @return covariance matrix
218 * @throws MathIllegalArgumentException if the data array does not contain sufficient data
219 * @throws NotStrictlyPositiveException if the input data array is not
220 * rectangular with at least one row and one column.
221 * @see #Covariance
222 */
223 protected RealMatrix computeCovarianceMatrix(double[][] data)
224 throws MathIllegalArgumentException, NotStrictlyPositiveException {
225 return computeCovarianceMatrix(data, true);
226 }
227
228 /**
229 * Computes the covariance between the two arrays.
230 *
231 * <p>Array lengths must match and the common length must be at least 2.</p>
232 *
233 * @param xArray first data array
234 * @param yArray second data array
235 * @param biasCorrected if true, returned value will be bias-corrected
236 * @return returns the covariance for the two arrays
237 * @throws MathIllegalArgumentException if the arrays lengths do not match or
238 * there is insufficient data
239 */
240 public double covariance(final double[] xArray, final double[] yArray, boolean biasCorrected)
241 throws MathIllegalArgumentException {
242 Mean mean = new Mean();
243 double result = 0d;
244 int length = xArray.length;
245 if (length != yArray.length) {
246 throw new MathIllegalArgumentException(
247 LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE, length, yArray.length);
248 } else if (length < 2) {
249 throw new MathIllegalArgumentException(
250 LocalizedFormats.INSUFFICIENT_OBSERVED_POINTS_IN_SAMPLE, length, 2);
251 } else {
252 double xMean = mean.evaluate(xArray);
253 double yMean = mean.evaluate(yArray);
254 for (int i = 0; i < length; i++) {
255 double xDev = xArray[i] - xMean;
256 double yDev = yArray[i] - yMean;
257 result += (xDev * yDev - result) / (i + 1);
258 }
259 }
260 return biasCorrected ? result * ((double) length / (double)(length - 1)) : result;
261 }
262
263 /**
264 * Computes the covariance between the two arrays, using the bias-corrected
265 * formula.
266 *
267 * <p>Array lengths must match and the common length must be at least 2.</p>
268 *
269 * @param xArray first data array
270 * @param yArray second data array
271 * @return returns the covariance for the two arrays
272 * @throws MathIllegalArgumentException if the arrays lengths do not match or
273 * there is insufficient data
274 */
275 public double covariance(final double[] xArray, final double[] yArray)
276 throws MathIllegalArgumentException {
277 return covariance(xArray, yArray, true);
278 }
279
280 /**
281 * Throws MathIllegalArgumentException if the matrix does not have at least
282 * one column and two rows.
283 * @param matrix matrix to check
284 * @throws MathIllegalArgumentException if the matrix does not contain sufficient data
285 * to compute covariance
286 */
287 private void checkSufficientData(final RealMatrix matrix) throws MathIllegalArgumentException {
288 int nRows = matrix.getRowDimension();
289 int nCols = matrix.getColumnDimension();
290 if (nRows < 2 || nCols < 1) {
291 throw new MathIllegalArgumentException(
292 LocalizedFormats.INSUFFICIENT_ROWS_AND_COLUMNS,
293 nRows, nCols);
294 }
295 }
296 }