1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.commons.math4.legacy.stat.correlation; 18 19 import org.apache.commons.math4.legacy.exception.MathIllegalArgumentException; 20 import org.apache.commons.math4.legacy.exception.NotStrictlyPositiveException; 21 import org.apache.commons.math4.legacy.exception.util.LocalizedFormats; 22 import org.apache.commons.math4.legacy.linear.BlockRealMatrix; 23 import org.apache.commons.math4.legacy.linear.RealMatrix; 24 import org.apache.commons.math4.legacy.stat.descriptive.moment.Mean; 25 import org.apache.commons.math4.legacy.stat.descriptive.moment.Variance; 26 27 /** 28 * Computes covariances for pairs of arrays or columns of a matrix. 29 * 30 * <p>The constructors that take <code>RealMatrix</code> or 31 * <code>double[][]</code> arguments generate covariance matrices. The 32 * columns of the input matrices are assumed to represent variable values.</p> 33 * 34 * <p>The constructor argument <code>biasCorrected</code> determines whether or 35 * not computed covariances are bias-corrected.</p> 36 * 37 * <p>Unbiased covariances are given by the formula</p> 38 * <code>cov(X, Y) = Σ[(x<sub>i</sub> - E(X))(y<sub>i</sub> - E(Y))] / (n - 1)</code> 39 * where <code>E(X)</code> is the mean of <code>X</code> and <code>E(Y)</code> 40 * is the mean of the <code>Y</code> values. 41 * 42 * <p>Non-bias-corrected estimates use <code>n</code> in place of <code>n - 1</code> 43 * 44 * @since 2.0 45 */ 46 public class Covariance { 47 /** covariance matrix. */ 48 private final RealMatrix covarianceMatrix; 49 /** Number of observations (length of covariate vectors). */ 50 private final int n; 51 52 /** 53 * Create a Covariance with no data. 54 */ 55 public Covariance() { 56 super(); 57 covarianceMatrix = null; 58 n = 0; 59 } 60 61 /** 62 * Create a Covariance matrix from a rectangular array 63 * whose columns represent covariates. 64 * 65 * <p>The <code>biasCorrected</code> parameter determines whether or not 66 * covariance estimates are bias-corrected.</p> 67 * 68 * <p>The input array must be rectangular with at least one column 69 * and two rows.</p> 70 * 71 * @param data rectangular array with columns representing covariates 72 * @param biasCorrected true means covariances are bias-corrected 73 * @throws MathIllegalArgumentException if the input data array is not 74 * rectangular with at least two rows and one column. 75 * @throws NotStrictlyPositiveException if the input data array is not 76 * rectangular with at least one row and one column. 77 */ 78 public Covariance(double[][] data, boolean biasCorrected) 79 throws MathIllegalArgumentException, NotStrictlyPositiveException { 80 this(new BlockRealMatrix(data), biasCorrected); 81 } 82 83 /** 84 * Create a Covariance matrix from a rectangular array 85 * whose columns represent covariates. 86 * 87 * <p>The input array must be rectangular with at least one column 88 * and two rows</p> 89 * 90 * @param data rectangular array with columns representing covariates 91 * @throws MathIllegalArgumentException if the input data array is not 92 * rectangular with at least two rows and one column. 93 * @throws NotStrictlyPositiveException if the input data array is not 94 * rectangular with at least one row and one column. 95 */ 96 public Covariance(double[][] data) 97 throws MathIllegalArgumentException, NotStrictlyPositiveException { 98 this(data, true); 99 } 100 101 /** 102 * Create a covariance matrix from a matrix whose columns 103 * represent covariates. 104 * 105 * <p>The <code>biasCorrected</code> parameter determines whether or not 106 * covariance estimates are bias-corrected.</p> 107 * 108 * <p>The matrix must have at least one column and two rows</p> 109 * 110 * @param matrix matrix with columns representing covariates 111 * @param biasCorrected true means covariances are bias-corrected 112 * @throws MathIllegalArgumentException if the input matrix does not have 113 * at least two rows and one column 114 */ 115 public Covariance(RealMatrix matrix, boolean biasCorrected) 116 throws MathIllegalArgumentException { 117 checkSufficientData(matrix); 118 n = matrix.getRowDimension(); 119 covarianceMatrix = computeCovarianceMatrix(matrix, biasCorrected); 120 } 121 122 /** 123 * Create a covariance matrix from a matrix whose columns 124 * represent covariates. 125 * 126 * <p>The matrix must have at least one column and two rows</p> 127 * 128 * @param matrix matrix with columns representing covariates 129 * @throws MathIllegalArgumentException if the input matrix does not have 130 * at least two rows and one column 131 */ 132 public Covariance(RealMatrix matrix) throws MathIllegalArgumentException { 133 this(matrix, true); 134 } 135 136 /** 137 * Returns the covariance matrix. 138 * 139 * @return covariance matrix 140 */ 141 public RealMatrix getCovarianceMatrix() { 142 return covarianceMatrix; 143 } 144 145 /** 146 * Returns the number of observations (length of covariate vectors). 147 * 148 * @return number of observations 149 */ 150 public int getN() { 151 return n; 152 } 153 154 /** 155 * Compute a covariance matrix from a matrix whose columns represent 156 * covariates. 157 * @param matrix input matrix (must have at least one column and two rows) 158 * @param biasCorrected determines whether or not covariance estimates are bias-corrected 159 * @return covariance matrix 160 * @throws MathIllegalArgumentException if the matrix does not contain sufficient data 161 */ 162 protected RealMatrix computeCovarianceMatrix(RealMatrix matrix, boolean biasCorrected) 163 throws MathIllegalArgumentException { 164 int dimension = matrix.getColumnDimension(); 165 Variance variance = new Variance(biasCorrected); 166 RealMatrix outMatrix = new BlockRealMatrix(dimension, dimension); 167 for (int i = 0; i < dimension; i++) { 168 for (int j = 0; j < i; j++) { 169 double cov = covariance(matrix.getColumn(i), matrix.getColumn(j), biasCorrected); 170 outMatrix.setEntry(i, j, cov); 171 outMatrix.setEntry(j, i, cov); 172 } 173 outMatrix.setEntry(i, i, variance.evaluate(matrix.getColumn(i))); 174 } 175 return outMatrix; 176 } 177 178 /** 179 * Create a covariance matrix from a matrix whose columns represent 180 * covariates. Covariances are computed using the bias-corrected formula. 181 * @param matrix input matrix (must have at least one column and two rows) 182 * @return covariance matrix 183 * @throws MathIllegalArgumentException if matrix does not contain sufficient data 184 * @see #Covariance 185 */ 186 protected RealMatrix computeCovarianceMatrix(RealMatrix matrix) 187 throws MathIllegalArgumentException { 188 return computeCovarianceMatrix(matrix, true); 189 } 190 191 /** 192 * Compute a covariance matrix from a rectangular array whose columns represent 193 * covariates. 194 * @param data input array (must have at least one column and two rows) 195 * @param biasCorrected determines whether or not covariance estimates are bias-corrected 196 * @return covariance matrix 197 * @throws MathIllegalArgumentException if the data array does not contain sufficient 198 * data 199 * @throws NotStrictlyPositiveException if the input data array is not 200 * rectangular with at least one row and one column. 201 */ 202 protected RealMatrix computeCovarianceMatrix(double[][] data, boolean biasCorrected) 203 throws MathIllegalArgumentException, NotStrictlyPositiveException { 204 return computeCovarianceMatrix(new BlockRealMatrix(data), biasCorrected); 205 } 206 207 /** 208 * Create a covariance matrix from a rectangular array whose columns represent 209 * covariates. Covariances are computed using the bias-corrected formula. 210 * @param data input array (must have at least one column and two rows) 211 * @return covariance matrix 212 * @throws MathIllegalArgumentException if the data array does not contain sufficient data 213 * @throws NotStrictlyPositiveException if the input data array is not 214 * rectangular with at least one row and one column. 215 * @see #Covariance 216 */ 217 protected RealMatrix computeCovarianceMatrix(double[][] data) 218 throws MathIllegalArgumentException, NotStrictlyPositiveException { 219 return computeCovarianceMatrix(data, true); 220 } 221 222 /** 223 * Computes the covariance between the two arrays. 224 * 225 * <p>Array lengths must match and the common length must be at least 2.</p> 226 * 227 * @param xArray first data array 228 * @param yArray second data array 229 * @param biasCorrected if true, returned value will be bias-corrected 230 * @return returns the covariance for the two arrays 231 * @throws MathIllegalArgumentException if the arrays lengths do not match or 232 * there is insufficient data 233 */ 234 public double covariance(final double[] xArray, final double[] yArray, boolean biasCorrected) 235 throws MathIllegalArgumentException { 236 Mean mean = new Mean(); 237 double result = 0d; 238 int length = xArray.length; 239 if (length != yArray.length) { 240 throw new MathIllegalArgumentException( 241 LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE, length, yArray.length); 242 } else if (length < 2) { 243 throw new MathIllegalArgumentException( 244 LocalizedFormats.INSUFFICIENT_OBSERVED_POINTS_IN_SAMPLE, length, 2); 245 } else { 246 double xMean = mean.evaluate(xArray); 247 double yMean = mean.evaluate(yArray); 248 for (int i = 0; i < length; i++) { 249 double xDev = xArray[i] - xMean; 250 double yDev = yArray[i] - yMean; 251 result += (xDev * yDev - result) / (i + 1); 252 } 253 } 254 return biasCorrected ? result * ((double) length / (double)(length - 1)) : result; 255 } 256 257 /** 258 * Computes the covariance between the two arrays, using the bias-corrected 259 * formula. 260 * 261 * <p>Array lengths must match and the common length must be at least 2.</p> 262 * 263 * @param xArray first data array 264 * @param yArray second data array 265 * @return returns the covariance for the two arrays 266 * @throws MathIllegalArgumentException if the arrays lengths do not match or 267 * there is insufficient data 268 */ 269 public double covariance(final double[] xArray, final double[] yArray) 270 throws MathIllegalArgumentException { 271 return covariance(xArray, yArray, true); 272 } 273 274 /** 275 * Throws MathIllegalArgumentException if the matrix does not have at least 276 * one column and two rows. 277 * @param matrix matrix to check 278 * @throws MathIllegalArgumentException if the matrix does not contain sufficient data 279 * to compute covariance 280 */ 281 private void checkSufficientData(final RealMatrix matrix) throws MathIllegalArgumentException { 282 int nRows = matrix.getRowDimension(); 283 int nCols = matrix.getColumnDimension(); 284 if (nRows < 2 || nCols < 1) { 285 throw new MathIllegalArgumentException( 286 LocalizedFormats.INSUFFICIENT_ROWS_AND_COLUMNS, 287 nRows, nCols); 288 } 289 } 290 }