001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.math4.stat.correlation;
018
019import org.apache.commons.math4.exception.MathIllegalArgumentException;
020import org.apache.commons.math4.exception.NotStrictlyPositiveException;
021import org.apache.commons.math4.exception.util.LocalizedFormats;
022import org.apache.commons.math4.linear.BlockRealMatrix;
023import org.apache.commons.math4.linear.RealMatrix;
024import org.apache.commons.math4.stat.descriptive.moment.Mean;
025import org.apache.commons.math4.stat.descriptive.moment.Variance;
026
027/**
028 * Computes covariances for pairs of arrays or columns of a matrix.
029 *
030 * <p>The constructors that take <code>RealMatrix</code> or
031 * <code>double[][]</code> arguments generate covariance matrices.  The
032 * columns of the input matrices are assumed to represent variable values.</p>
033 *
034 * <p>The constructor argument <code>biasCorrected</code> determines whether or
035 * not computed covariances are bias-corrected.</p>
036 *
037 * <p>Unbiased covariances are given by the formula</p>
038 * <code>cov(X, Y) = &Sigma;[(x<sub>i</sub> - E(X))(y<sub>i</sub> - E(Y))] / (n - 1)</code>
039 * where <code>E(X)</code> is the mean of <code>X</code> and <code>E(Y)</code>
040 * is the mean of the <code>Y</code> values.
041 *
042 * <p>Non-bias-corrected estimates use <code>n</code> in place of <code>n - 1</code>
043 *
044 * @since 2.0
045 */
046public class Covariance {
047    /** covariance matrix */
048    private final RealMatrix covarianceMatrix;
049    /** Number of observations (length of covariate vectors) */
050    private final int n;
051
052    /**
053     * Create a Covariance with no data
054     */
055    public Covariance() {
056        super();
057        covarianceMatrix = null;
058        n = 0;
059    }
060
061    /**
062     * Create a Covariance matrix from a rectangular array
063     * whose columns represent covariates.
064     *
065     * <p>The <code>biasCorrected</code> parameter determines whether or not
066     * covariance estimates are bias-corrected.</p>
067     *
068     * <p>The input array must be rectangular with at least one column
069     * and two rows.</p>
070     *
071     * @param data rectangular array with columns representing covariates
072     * @param biasCorrected true means covariances are bias-corrected
073     * @throws MathIllegalArgumentException if the input data array is not
074     * rectangular with at least two rows and one column.
075     * @throws NotStrictlyPositiveException if the input data array is not
076     * rectangular with at least one row and one column.
077     */
078    public Covariance(double[][] data, boolean biasCorrected)
079    throws MathIllegalArgumentException, NotStrictlyPositiveException {
080        this(new BlockRealMatrix(data), biasCorrected);
081    }
082
083    /**
084     * Create a Covariance matrix from a rectangular array
085     * whose columns represent covariates.
086     *
087     * <p>The input array must be rectangular with at least one column
088     * and two rows</p>
089     *
090     * @param data rectangular array with columns representing covariates
091     * @throws MathIllegalArgumentException if the input data array is not
092     * rectangular with at least two rows and one column.
093     * @throws NotStrictlyPositiveException if the input data array is not
094     * rectangular with at least one row and one column.
095     */
096    public Covariance(double[][] data)
097    throws MathIllegalArgumentException, NotStrictlyPositiveException {
098        this(data, true);
099    }
100
101    /**
102     * Create a covariance matrix from a matrix whose columns
103     * represent covariates.
104     *
105     * <p>The <code>biasCorrected</code> parameter determines whether or not
106     * covariance estimates are bias-corrected.</p>
107     *
108     * <p>The matrix must have at least one column and two rows</p>
109     *
110     * @param matrix matrix with columns representing covariates
111     * @param biasCorrected true means covariances are bias-corrected
112     * @throws MathIllegalArgumentException if the input matrix does not have
113     * at least two rows and one column
114     */
115    public Covariance(RealMatrix matrix, boolean biasCorrected)
116    throws MathIllegalArgumentException {
117       checkSufficientData(matrix);
118       n = matrix.getRowDimension();
119       covarianceMatrix = computeCovarianceMatrix(matrix, biasCorrected);
120    }
121
122    /**
123     * Create a covariance matrix from a matrix whose columns
124     * represent covariates.
125     *
126     * <p>The matrix must have at least one column and two rows</p>
127     *
128     * @param matrix matrix with columns representing covariates
129     * @throws MathIllegalArgumentException if the input matrix does not have
130     * at least two rows and one column
131     */
132    public Covariance(RealMatrix matrix) throws MathIllegalArgumentException {
133        this(matrix, true);
134    }
135
136    /**
137     * Returns the covariance matrix
138     *
139     * @return covariance matrix
140     */
141    public RealMatrix getCovarianceMatrix() {
142        return covarianceMatrix;
143    }
144
145    /**
146     * Returns the number of observations (length of covariate vectors)
147     *
148     * @return number of observations
149     */
150    public int getN() {
151        return n;
152    }
153
154    /**
155     * Compute a covariance matrix from a matrix whose columns represent
156     * covariates.
157     * @param matrix input matrix (must have at least one column and two rows)
158     * @param biasCorrected determines whether or not covariance estimates are bias-corrected
159     * @return covariance matrix
160     * @throws MathIllegalArgumentException if the matrix does not contain sufficient data
161     */
162    protected RealMatrix computeCovarianceMatrix(RealMatrix matrix, boolean biasCorrected)
163    throws MathIllegalArgumentException {
164        int dimension = matrix.getColumnDimension();
165        Variance variance = new Variance(biasCorrected);
166        RealMatrix outMatrix = new BlockRealMatrix(dimension, dimension);
167        for (int i = 0; i < dimension; i++) {
168            for (int j = 0; j < i; j++) {
169              double cov = covariance(matrix.getColumn(i), matrix.getColumn(j), biasCorrected);
170              outMatrix.setEntry(i, j, cov);
171              outMatrix.setEntry(j, i, cov);
172            }
173            outMatrix.setEntry(i, i, variance.evaluate(matrix.getColumn(i)));
174        }
175        return outMatrix;
176    }
177
178    /**
179     * Create a covariance matrix from a matrix whose columns represent
180     * covariates. Covariances are computed using the bias-corrected formula.
181     * @param matrix input matrix (must have at least one column and two rows)
182     * @return covariance matrix
183     * @throws MathIllegalArgumentException if matrix does not contain sufficient data
184     * @see #Covariance
185     */
186    protected RealMatrix computeCovarianceMatrix(RealMatrix matrix)
187    throws MathIllegalArgumentException {
188        return computeCovarianceMatrix(matrix, true);
189    }
190
191    /**
192     * Compute a covariance matrix from a rectangular array whose columns represent
193     * covariates.
194     * @param data input array (must have at least one column and two rows)
195     * @param biasCorrected determines whether or not covariance estimates are bias-corrected
196     * @return covariance matrix
197     * @throws MathIllegalArgumentException if the data array does not contain sufficient
198     * data
199     * @throws NotStrictlyPositiveException if the input data array is not
200     * rectangular with at least one row and one column.
201     */
202    protected RealMatrix computeCovarianceMatrix(double[][] data, boolean biasCorrected)
203    throws MathIllegalArgumentException, NotStrictlyPositiveException {
204        return computeCovarianceMatrix(new BlockRealMatrix(data), biasCorrected);
205    }
206
207    /**
208     * Create a covariance matrix from a rectangular array whose columns represent
209     * covariates. Covariances are computed using the bias-corrected formula.
210     * @param data input array (must have at least one column and two rows)
211     * @return covariance matrix
212     * @throws MathIllegalArgumentException if the data array does not contain sufficient data
213     * @throws NotStrictlyPositiveException if the input data array is not
214     * rectangular with at least one row and one column.
215     * @see #Covariance
216     */
217    protected RealMatrix computeCovarianceMatrix(double[][] data)
218    throws MathIllegalArgumentException, NotStrictlyPositiveException {
219        return computeCovarianceMatrix(data, true);
220    }
221
222    /**
223     * Computes the covariance between the two arrays.
224     *
225     * <p>Array lengths must match and the common length must be at least 2.</p>
226     *
227     * @param xArray first data array
228     * @param yArray second data array
229     * @param biasCorrected if true, returned value will be bias-corrected
230     * @return returns the covariance for the two arrays
231     * @throws  MathIllegalArgumentException if the arrays lengths do not match or
232     * there is insufficient data
233     */
234    public double covariance(final double[] xArray, final double[] yArray, boolean biasCorrected)
235        throws MathIllegalArgumentException {
236        Mean mean = new Mean();
237        double result = 0d;
238        int length = xArray.length;
239        if (length != yArray.length) {
240            throw new MathIllegalArgumentException(
241                  LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE, length, yArray.length);
242        } else if (length < 2) {
243            throw new MathIllegalArgumentException(
244                  LocalizedFormats.INSUFFICIENT_OBSERVED_POINTS_IN_SAMPLE, length, 2);
245        } else {
246            double xMean = mean.evaluate(xArray);
247            double yMean = mean.evaluate(yArray);
248            for (int i = 0; i < length; i++) {
249                double xDev = xArray[i] - xMean;
250                double yDev = yArray[i] - yMean;
251                result += (xDev * yDev - result) / (i + 1);
252            }
253        }
254        return biasCorrected ? result * ((double) length / (double)(length - 1)) : result;
255    }
256
257    /**
258     * Computes the covariance between the two arrays, using the bias-corrected
259     * formula.
260     *
261     * <p>Array lengths must match and the common length must be at least 2.</p>
262     *
263     * @param xArray first data array
264     * @param yArray second data array
265     * @return returns the covariance for the two arrays
266     * @throws  MathIllegalArgumentException if the arrays lengths do not match or
267     * there is insufficient data
268     */
269    public double covariance(final double[] xArray, final double[] yArray)
270        throws MathIllegalArgumentException {
271        return covariance(xArray, yArray, true);
272    }
273
274    /**
275     * Throws MathIllegalArgumentException if the matrix does not have at least
276     * one column and two rows.
277     * @param matrix matrix to check
278     * @throws MathIllegalArgumentException if the matrix does not contain sufficient data
279     * to compute covariance
280     */
281    private void checkSufficientData(final RealMatrix matrix) throws MathIllegalArgumentException {
282        int nRows = matrix.getRowDimension();
283        int nCols = matrix.getColumnDimension();
284        if (nRows < 2 || nCols < 1) {
285            throw new MathIllegalArgumentException(
286                    LocalizedFormats.INSUFFICIENT_ROWS_AND_COLUMNS,
287                    nRows, nCols);
288        }
289    }
290}