001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.math3.stat.correlation;
018
019import org.apache.commons.math3.exception.MathIllegalArgumentException;
020import org.apache.commons.math3.exception.NotStrictlyPositiveException;
021import org.apache.commons.math3.exception.util.LocalizedFormats;
022import org.apache.commons.math3.linear.RealMatrix;
023import org.apache.commons.math3.linear.BlockRealMatrix;
024import org.apache.commons.math3.stat.descriptive.moment.Mean;
025import org.apache.commons.math3.stat.descriptive.moment.Variance;
026
027/**
028 * Computes covariances for pairs of arrays or columns of a matrix.
029 *
030 * <p>The constructors that take <code>RealMatrix</code> or
031 * <code>double[][]</code> arguments generate covariance matrices.  The
032 * columns of the input matrices are assumed to represent variable values.</p>
033 *
034 * <p>The constructor argument <code>biasCorrected</code> determines whether or
035 * not computed covariances are bias-corrected.</p>
036 *
037 * <p>Unbiased covariances are given by the formula</p>
038 * <code>cov(X, Y) = &Sigma;[(x<sub>i</sub> - E(X))(y<sub>i</sub> - E(Y))] / (n - 1)</code>
039 * where <code>E(X)</code> is the mean of <code>X</code> and <code>E(Y)</code>
040 * is the mean of the <code>Y</code> values.
041 *
042 * <p>Non-bias-corrected estimates use <code>n</code> in place of <code>n - 1</code>
043 *
044 * @since 2.0
045 */
046public class Covariance {
047
048    /** covariance matrix */
049    private final RealMatrix covarianceMatrix;
050
051    /**
052     * Create an empty covariance matrix.
053     */
054    /** Number of observations (length of covariate vectors) */
055    private final int n;
056
057    /**
058     * Create a Covariance with no data
059     */
060    public Covariance() {
061        super();
062        covarianceMatrix = null;
063        n = 0;
064    }
065
066    /**
067     * Create a Covariance matrix from a rectangular array
068     * whose columns represent covariates.
069     *
070     * <p>The <code>biasCorrected</code> parameter determines whether or not
071     * covariance estimates are bias-corrected.</p>
072     *
073     * <p>The input array must be rectangular with at least one column
074     * and two rows.</p>
075     *
076     * @param data rectangular array with columns representing covariates
077     * @param biasCorrected true means covariances are bias-corrected
078     * @throws MathIllegalArgumentException if the input data array is not
079     * rectangular with at least two rows and one column.
080     * @throws NotStrictlyPositiveException if the input data array is not
081     * rectangular with at least one row and one column.
082     */
083    public Covariance(double[][] data, boolean biasCorrected)
084    throws MathIllegalArgumentException, NotStrictlyPositiveException {
085        this(new BlockRealMatrix(data), biasCorrected);
086    }
087
088    /**
089     * Create a Covariance matrix from a rectangular array
090     * whose columns represent covariates.
091     *
092     * <p>The input array must be rectangular with at least one column
093     * and two rows</p>
094     *
095     * @param data rectangular array with columns representing covariates
096     * @throws MathIllegalArgumentException if the input data array is not
097     * rectangular with at least two rows and one column.
098     * @throws NotStrictlyPositiveException if the input data array is not
099     * rectangular with at least one row and one column.
100     */
101    public Covariance(double[][] data)
102    throws MathIllegalArgumentException, NotStrictlyPositiveException {
103        this(data, true);
104    }
105
106    /**
107     * Create a covariance matrix from a matrix whose columns
108     * represent covariates.
109     *
110     * <p>The <code>biasCorrected</code> parameter determines whether or not
111     * covariance estimates are bias-corrected.</p>
112     *
113     * <p>The matrix must have at least one column and two rows</p>
114     *
115     * @param matrix matrix with columns representing covariates
116     * @param biasCorrected true means covariances are bias-corrected
117     * @throws MathIllegalArgumentException if the input matrix does not have
118     * at least two rows and one column
119     */
120    public Covariance(RealMatrix matrix, boolean biasCorrected)
121    throws MathIllegalArgumentException {
122       checkSufficientData(matrix);
123       n = matrix.getRowDimension();
124       covarianceMatrix = computeCovarianceMatrix(matrix, biasCorrected);
125    }
126
127    /**
128     * Create a covariance matrix from a matrix whose columns
129     * represent covariates.
130     *
131     * <p>The matrix must have at least one column and two rows</p>
132     *
133     * @param matrix matrix with columns representing covariates
134     * @throws MathIllegalArgumentException if the input matrix does not have
135     * at least two rows and one column
136     */
137    public Covariance(RealMatrix matrix) throws MathIllegalArgumentException {
138        this(matrix, true);
139    }
140
141    /**
142     * Returns the covariance matrix
143     *
144     * @return covariance matrix
145     */
146    public RealMatrix getCovarianceMatrix() {
147        return covarianceMatrix;
148    }
149
150    /**
151     * Returns the number of observations (length of covariate vectors)
152     *
153     * @return number of observations
154     */
155    public int getN() {
156        return n;
157    }
158
159    /**
160     * Compute a covariance matrix from a matrix whose columns represent
161     * covariates.
162     * @param matrix input matrix (must have at least one column and two rows)
163     * @param biasCorrected determines whether or not covariance estimates are bias-corrected
164     * @return covariance matrix
165     * @throws MathIllegalArgumentException if the matrix does not contain sufficient data
166     */
167    protected RealMatrix computeCovarianceMatrix(RealMatrix matrix, boolean biasCorrected)
168    throws MathIllegalArgumentException {
169        int dimension = matrix.getColumnDimension();
170        Variance variance = new Variance(biasCorrected);
171        RealMatrix outMatrix = new BlockRealMatrix(dimension, dimension);
172        for (int i = 0; i < dimension; i++) {
173            for (int j = 0; j < i; j++) {
174              double cov = covariance(matrix.getColumn(i), matrix.getColumn(j), biasCorrected);
175              outMatrix.setEntry(i, j, cov);
176              outMatrix.setEntry(j, i, cov);
177            }
178            outMatrix.setEntry(i, i, variance.evaluate(matrix.getColumn(i)));
179        }
180        return outMatrix;
181    }
182
183    /**
184     * Create a covariance matrix from a matrix whose columns represent
185     * covariates. Covariances are computed using the bias-corrected formula.
186     * @param matrix input matrix (must have at least one column and two rows)
187     * @return covariance matrix
188     * @throws MathIllegalArgumentException if matrix does not contain sufficient data
189     * @see #Covariance
190     */
191    protected RealMatrix computeCovarianceMatrix(RealMatrix matrix)
192    throws MathIllegalArgumentException {
193        return computeCovarianceMatrix(matrix, true);
194    }
195
196    /**
197     * Compute a covariance matrix from a rectangular array whose columns represent
198     * covariates.
199     * @param data input array (must have at least one column and two rows)
200     * @param biasCorrected determines whether or not covariance estimates are bias-corrected
201     * @return covariance matrix
202     * @throws MathIllegalArgumentException if the data array does not contain sufficient
203     * data
204     * @throws NotStrictlyPositiveException if the input data array is not
205     * rectangular with at least one row and one column.
206     */
207    protected RealMatrix computeCovarianceMatrix(double[][] data, boolean biasCorrected)
208    throws MathIllegalArgumentException, NotStrictlyPositiveException {
209        return computeCovarianceMatrix(new BlockRealMatrix(data), biasCorrected);
210    }
211
212    /**
213     * Create a covariance matrix from a rectangular array whose columns represent
214     * covariates. Covariances are computed using the bias-corrected formula.
215     * @param data input array (must have at least one column and two rows)
216     * @return covariance matrix
217     * @throws MathIllegalArgumentException if the data array does not contain sufficient data
218     * @throws NotStrictlyPositiveException if the input data array is not
219     * rectangular with at least one row and one column.
220     * @see #Covariance
221     */
222    protected RealMatrix computeCovarianceMatrix(double[][] data)
223    throws MathIllegalArgumentException, NotStrictlyPositiveException {
224        return computeCovarianceMatrix(data, true);
225    }
226
227    /**
228     * Computes the covariance between the two arrays.
229     *
230     * <p>Array lengths must match and the common length must be at least 2.</p>
231     *
232     * @param xArray first data array
233     * @param yArray second data array
234     * @param biasCorrected if true, returned value will be bias-corrected
235     * @return returns the covariance for the two arrays
236     * @throws  MathIllegalArgumentException if the arrays lengths do not match or
237     * there is insufficient data
238     */
239    public double covariance(final double[] xArray, final double[] yArray, boolean biasCorrected)
240        throws MathIllegalArgumentException {
241        Mean mean = new Mean();
242        double result = 0d;
243        int length = xArray.length;
244        if (length != yArray.length) {
245            throw new MathIllegalArgumentException(
246                  LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE, length, yArray.length);
247        } else if (length < 2) {
248            throw new MathIllegalArgumentException(
249                  LocalizedFormats.INSUFFICIENT_OBSERVED_POINTS_IN_SAMPLE, length, 2);
250        } else {
251            double xMean = mean.evaluate(xArray);
252            double yMean = mean.evaluate(yArray);
253            for (int i = 0; i < length; i++) {
254                double xDev = xArray[i] - xMean;
255                double yDev = yArray[i] - yMean;
256                result += (xDev * yDev - result) / (i + 1);
257            }
258        }
259        return biasCorrected ? result * ((double) length / (double)(length - 1)) : result;
260    }
261
262    /**
263     * Computes the covariance between the two arrays, using the bias-corrected
264     * formula.
265     *
266     * <p>Array lengths must match and the common length must be at least 2.</p>
267     *
268     * @param xArray first data array
269     * @param yArray second data array
270     * @return returns the covariance for the two arrays
271     * @throws  MathIllegalArgumentException if the arrays lengths do not match or
272     * there is insufficient data
273     */
274    public double covariance(final double[] xArray, final double[] yArray)
275        throws MathIllegalArgumentException {
276        return covariance(xArray, yArray, true);
277    }
278
279    /**
280     * Throws MathIllegalArgumentException if the matrix does not have at least
281     * one column and two rows.
282     * @param matrix matrix to check
283     * @throws MathIllegalArgumentException if the matrix does not contain sufficient data
284     * to compute covariance
285     */
286    private void checkSufficientData(final RealMatrix matrix) throws MathIllegalArgumentException {
287        int nRows = matrix.getRowDimension();
288        int nCols = matrix.getColumnDimension();
289        if (nRows < 2 || nCols < 1) {
290            throw new MathIllegalArgumentException(
291                    LocalizedFormats.INSUFFICIENT_ROWS_AND_COLUMNS,
292                    nRows, nCols);
293        }
294    }
295}