View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.math4.legacy.stat.correlation;
18  
19  import org.apache.commons.math4.legacy.exception.MathIllegalArgumentException;
20  import org.apache.commons.math4.legacy.exception.NotStrictlyPositiveException;
21  import org.apache.commons.math4.legacy.exception.util.LocalizedFormats;
22  import org.apache.commons.math4.legacy.linear.BlockRealMatrix;
23  import org.apache.commons.math4.legacy.linear.RealMatrix;
24  import org.apache.commons.math4.legacy.stat.descriptive.moment.Mean;
25  import org.apache.commons.math4.legacy.stat.descriptive.moment.Variance;
26  
27  /**
28   * Computes covariances for pairs of arrays or columns of a matrix.
29   *
30   * <p>The constructors that take <code>RealMatrix</code> or
31   * <code>double[][]</code> arguments generate covariance matrices.  The
32   * columns of the input matrices are assumed to represent variable values.</p>
33   *
34   * <p>The constructor argument <code>biasCorrected</code> determines whether or
35   * not computed covariances are bias-corrected.</p>
36   *
37   * <p>Unbiased covariances are given by the formula</p>
38   * <code>cov(X, Y) = &Sigma;[(x<sub>i</sub> - E(X))(y<sub>i</sub> - E(Y))] / (n - 1)</code>
39   * where <code>E(X)</code> is the mean of <code>X</code> and <code>E(Y)</code>
40   * is the mean of the <code>Y</code> values.
41   *
42   * <p>Non-bias-corrected estimates use <code>n</code> in place of <code>n - 1</code>
43   *
44   * @since 2.0
45   */
46  public class Covariance {
47      /** covariance matrix. */
48      private final RealMatrix covarianceMatrix;
49      /** Number of observations (length of covariate vectors). */
50      private final int n;
51  
52      /**
53       * Create a Covariance with no data.
54       */
55      public Covariance() {
56          super();
57          covarianceMatrix = null;
58          n = 0;
59      }
60  
61      /**
62       * Create a Covariance matrix from a rectangular array
63       * whose columns represent covariates.
64       *
65       * <p>The <code>biasCorrected</code> parameter determines whether or not
66       * covariance estimates are bias-corrected.</p>
67       *
68       * <p>The input array must be rectangular with at least one column
69       * and two rows.</p>
70       *
71       * @param data rectangular array with columns representing covariates
72       * @param biasCorrected true means covariances are bias-corrected
73       * @throws MathIllegalArgumentException if the input data array is not
74       * rectangular with at least two rows and one column.
75       * @throws NotStrictlyPositiveException if the input data array is not
76       * rectangular with at least one row and one column.
77       */
78      public Covariance(double[][] data, boolean biasCorrected)
79      throws MathIllegalArgumentException, NotStrictlyPositiveException {
80          this(new BlockRealMatrix(data), biasCorrected);
81      }
82  
83      /**
84       * Create a Covariance matrix from a rectangular array
85       * whose columns represent covariates.
86       *
87       * <p>The input array must be rectangular with at least one column
88       * and two rows</p>
89       *
90       * @param data rectangular array with columns representing covariates
91       * @throws MathIllegalArgumentException if the input data array is not
92       * rectangular with at least two rows and one column.
93       * @throws NotStrictlyPositiveException if the input data array is not
94       * rectangular with at least one row and one column.
95       */
96      public Covariance(double[][] data)
97      throws MathIllegalArgumentException, NotStrictlyPositiveException {
98          this(data, true);
99      }
100 
101     /**
102      * Create a covariance matrix from a matrix whose columns
103      * represent covariates.
104      *
105      * <p>The <code>biasCorrected</code> parameter determines whether or not
106      * covariance estimates are bias-corrected.</p>
107      *
108      * <p>The matrix must have at least one column and two rows</p>
109      *
110      * @param matrix matrix with columns representing covariates
111      * @param biasCorrected true means covariances are bias-corrected
112      * @throws MathIllegalArgumentException if the input matrix does not have
113      * at least two rows and one column
114      */
115     public Covariance(RealMatrix matrix, boolean biasCorrected)
116     throws MathIllegalArgumentException {
117        checkSufficientData(matrix);
118        n = matrix.getRowDimension();
119        covarianceMatrix = computeCovarianceMatrix(matrix, biasCorrected);
120     }
121 
122     /**
123      * Create a covariance matrix from a matrix whose columns
124      * represent covariates.
125      *
126      * <p>The matrix must have at least one column and two rows</p>
127      *
128      * @param matrix matrix with columns representing covariates
129      * @throws MathIllegalArgumentException if the input matrix does not have
130      * at least two rows and one column
131      */
132     public Covariance(RealMatrix matrix) throws MathIllegalArgumentException {
133         this(matrix, true);
134     }
135 
136     /**
137      * Returns the covariance matrix.
138      *
139      * @return covariance matrix
140      */
141     public RealMatrix getCovarianceMatrix() {
142         return covarianceMatrix;
143     }
144 
145     /**
146      * Returns the number of observations (length of covariate vectors).
147      *
148      * @return number of observations
149      */
150     public int getN() {
151         return n;
152     }
153 
154     /**
155      * Compute a covariance matrix from a matrix whose columns represent
156      * covariates.
157      * @param matrix input matrix (must have at least one column and two rows)
158      * @param biasCorrected determines whether or not covariance estimates are bias-corrected
159      * @return covariance matrix
160      * @throws MathIllegalArgumentException if the matrix does not contain sufficient data
161      */
162     protected RealMatrix computeCovarianceMatrix(RealMatrix matrix, boolean biasCorrected)
163     throws MathIllegalArgumentException {
164         int dimension = matrix.getColumnDimension();
165         Variance variance = new Variance(biasCorrected);
166         RealMatrix outMatrix = new BlockRealMatrix(dimension, dimension);
167         for (int i = 0; i < dimension; i++) {
168             for (int j = 0; j < i; j++) {
169               double cov = covariance(matrix.getColumn(i), matrix.getColumn(j), biasCorrected);
170               outMatrix.setEntry(i, j, cov);
171               outMatrix.setEntry(j, i, cov);
172             }
173             outMatrix.setEntry(i, i, variance.evaluate(matrix.getColumn(i)));
174         }
175         return outMatrix;
176     }
177 
178     /**
179      * Create a covariance matrix from a matrix whose columns represent
180      * covariates. Covariances are computed using the bias-corrected formula.
181      * @param matrix input matrix (must have at least one column and two rows)
182      * @return covariance matrix
183      * @throws MathIllegalArgumentException if matrix does not contain sufficient data
184      * @see #Covariance
185      */
186     protected RealMatrix computeCovarianceMatrix(RealMatrix matrix)
187     throws MathIllegalArgumentException {
188         return computeCovarianceMatrix(matrix, true);
189     }
190 
191     /**
192      * Compute a covariance matrix from a rectangular array whose columns represent
193      * covariates.
194      * @param data input array (must have at least one column and two rows)
195      * @param biasCorrected determines whether or not covariance estimates are bias-corrected
196      * @return covariance matrix
197      * @throws MathIllegalArgumentException if the data array does not contain sufficient
198      * data
199      * @throws NotStrictlyPositiveException if the input data array is not
200      * rectangular with at least one row and one column.
201      */
202     protected RealMatrix computeCovarianceMatrix(double[][] data, boolean biasCorrected)
203     throws MathIllegalArgumentException, NotStrictlyPositiveException {
204         return computeCovarianceMatrix(new BlockRealMatrix(data), biasCorrected);
205     }
206 
207     /**
208      * Create a covariance matrix from a rectangular array whose columns represent
209      * covariates. Covariances are computed using the bias-corrected formula.
210      * @param data input array (must have at least one column and two rows)
211      * @return covariance matrix
212      * @throws MathIllegalArgumentException if the data array does not contain sufficient data
213      * @throws NotStrictlyPositiveException if the input data array is not
214      * rectangular with at least one row and one column.
215      * @see #Covariance
216      */
217     protected RealMatrix computeCovarianceMatrix(double[][] data)
218     throws MathIllegalArgumentException, NotStrictlyPositiveException {
219         return computeCovarianceMatrix(data, true);
220     }
221 
222     /**
223      * Computes the covariance between the two arrays.
224      *
225      * <p>Array lengths must match and the common length must be at least 2.</p>
226      *
227      * @param xArray first data array
228      * @param yArray second data array
229      * @param biasCorrected if true, returned value will be bias-corrected
230      * @return returns the covariance for the two arrays
231      * @throws  MathIllegalArgumentException if the arrays lengths do not match or
232      * there is insufficient data
233      */
234     public double covariance(final double[] xArray, final double[] yArray, boolean biasCorrected)
235         throws MathIllegalArgumentException {
236         Mean mean = new Mean();
237         double result = 0d;
238         int length = xArray.length;
239         if (length != yArray.length) {
240             throw new MathIllegalArgumentException(
241                   LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE, length, yArray.length);
242         } else if (length < 2) {
243             throw new MathIllegalArgumentException(
244                   LocalizedFormats.INSUFFICIENT_OBSERVED_POINTS_IN_SAMPLE, length, 2);
245         } else {
246             double xMean = mean.evaluate(xArray);
247             double yMean = mean.evaluate(yArray);
248             for (int i = 0; i < length; i++) {
249                 double xDev = xArray[i] - xMean;
250                 double yDev = yArray[i] - yMean;
251                 result += (xDev * yDev - result) / (i + 1);
252             }
253         }
254         return biasCorrected ? result * ((double) length / (double)(length - 1)) : result;
255     }
256 
257     /**
258      * Computes the covariance between the two arrays, using the bias-corrected
259      * formula.
260      *
261      * <p>Array lengths must match and the common length must be at least 2.</p>
262      *
263      * @param xArray first data array
264      * @param yArray second data array
265      * @return returns the covariance for the two arrays
266      * @throws  MathIllegalArgumentException if the arrays lengths do not match or
267      * there is insufficient data
268      */
269     public double covariance(final double[] xArray, final double[] yArray)
270         throws MathIllegalArgumentException {
271         return covariance(xArray, yArray, true);
272     }
273 
274     /**
275      * Throws MathIllegalArgumentException if the matrix does not have at least
276      * one column and two rows.
277      * @param matrix matrix to check
278      * @throws MathIllegalArgumentException if the matrix does not contain sufficient data
279      * to compute covariance
280      */
281     private void checkSufficientData(final RealMatrix matrix) throws MathIllegalArgumentException {
282         int nRows = matrix.getRowDimension();
283         int nCols = matrix.getColumnDimension();
284         if (nRows < 2 || nCols < 1) {
285             throw new MathIllegalArgumentException(
286                     LocalizedFormats.INSUFFICIENT_ROWS_AND_COLUMNS,
287                     nRows, nCols);
288         }
289     }
290 }