001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.math3.stat.correlation;
018
019import org.apache.commons.math3.exception.MathIllegalArgumentException;
020import org.apache.commons.math3.exception.NotStrictlyPositiveException;
021import org.apache.commons.math3.exception.util.LocalizedFormats;
022import org.apache.commons.math3.linear.RealMatrix;
023import org.apache.commons.math3.linear.BlockRealMatrix;
024import org.apache.commons.math3.stat.descriptive.moment.Mean;
025import org.apache.commons.math3.stat.descriptive.moment.Variance;
026
027/**
028 * Computes covariances for pairs of arrays or columns of a matrix.
029 *
030 * <p>The constructors that take <code>RealMatrix</code> or
031 * <code>double[][]</code> arguments generate covariance matrices.  The
032 * columns of the input matrices are assumed to represent variable values.</p>
033 *
034 * <p>The constructor argument <code>biasCorrected</code> determines whether or
035 * not computed covariances are bias-corrected.</p>
036 *
037 * <p>Unbiased covariances are given by the formula</p>
038 * <code>cov(X, Y) = &Sigma;[(x<sub>i</sub> - E(X))(y<sub>i</sub> - E(Y))] / (n - 1)</code>
039 * where <code>E(X)</code> is the mean of <code>X</code> and <code>E(Y)</code>
040 * is the mean of the <code>Y</code> values.
041 *
042 * <p>Non-bias-corrected estimates use <code>n</code> in place of <code>n - 1</code>
043 *
044 * @version $Id: Covariance.java 1453271 2013-03-06 10:29:51Z luc $
045 * @since 2.0
046 */
047public class Covariance {
048
049    /** covariance matrix */
050    private final RealMatrix covarianceMatrix;
051
052    /**
053     * Create an empty covariance matrix.
054     */
055    /** Number of observations (length of covariate vectors) */
056    private final int n;
057
058    /**
059     * Create a Covariance with no data
060     */
061    public Covariance() {
062        super();
063        covarianceMatrix = null;
064        n = 0;
065    }
066
067    /**
068     * Create a Covariance matrix from a rectangular array
069     * whose columns represent covariates.
070     *
071     * <p>The <code>biasCorrected</code> parameter determines whether or not
072     * covariance estimates are bias-corrected.</p>
073     *
074     * <p>The input array must be rectangular with at least one column
075     * and two rows.</p>
076     *
077     * @param data rectangular array with columns representing covariates
078     * @param biasCorrected true means covariances are bias-corrected
079     * @throws MathIllegalArgumentException if the input data array is not
080     * rectangular with at least two rows and one column.
081     * @throws NotStrictlyPositiveException if the input data array is not
082     * rectangular with at least one row and one column.
083     */
084    public Covariance(double[][] data, boolean biasCorrected)
085    throws MathIllegalArgumentException, NotStrictlyPositiveException {
086        this(new BlockRealMatrix(data), biasCorrected);
087    }
088
089    /**
090     * Create a Covariance matrix from a rectangular array
091     * whose columns represent covariates.
092     *
093     * <p>The input array must be rectangular with at least one column
094     * and two rows</p>
095     *
096     * @param data rectangular array with columns representing covariates
097     * @throws MathIllegalArgumentException if the input data array is not
098     * rectangular with at least two rows and one column.
099     * @throws NotStrictlyPositiveException if the input data array is not
100     * rectangular with at least one row and one column.
101     */
102    public Covariance(double[][] data)
103    throws MathIllegalArgumentException, NotStrictlyPositiveException {
104        this(data, true);
105    }
106
107    /**
108     * Create a covariance matrix from a matrix whose columns
109     * represent covariates.
110     *
111     * <p>The <code>biasCorrected</code> parameter determines whether or not
112     * covariance estimates are bias-corrected.</p>
113     *
114     * <p>The matrix must have at least one column and two rows</p>
115     *
116     * @param matrix matrix with columns representing covariates
117     * @param biasCorrected true means covariances are bias-corrected
118     * @throws MathIllegalArgumentException if the input matrix does not have
119     * at least two rows and one column
120     */
121    public Covariance(RealMatrix matrix, boolean biasCorrected)
122    throws MathIllegalArgumentException {
123       checkSufficientData(matrix);
124       n = matrix.getRowDimension();
125       covarianceMatrix = computeCovarianceMatrix(matrix, biasCorrected);
126    }
127
128    /**
129     * Create a covariance matrix from a matrix whose columns
130     * represent covariates.
131     *
132     * <p>The matrix must have at least one column and two rows</p>
133     *
134     * @param matrix matrix with columns representing covariates
135     * @throws MathIllegalArgumentException if the input matrix does not have
136     * at least two rows and one column
137     */
138    public Covariance(RealMatrix matrix) throws MathIllegalArgumentException {
139        this(matrix, true);
140    }
141
142    /**
143     * Returns the covariance matrix
144     *
145     * @return covariance matrix
146     */
147    public RealMatrix getCovarianceMatrix() {
148        return covarianceMatrix;
149    }
150
151    /**
152     * Returns the number of observations (length of covariate vectors)
153     *
154     * @return number of observations
155     */
156    public int getN() {
157        return n;
158    }
159
160    /**
161     * Compute a covariance matrix from a matrix whose columns represent
162     * covariates.
163     * @param matrix input matrix (must have at least one column and two rows)
164     * @param biasCorrected determines whether or not covariance estimates are bias-corrected
165     * @return covariance matrix
166     * @throws MathIllegalArgumentException if the matrix does not contain sufficient data
167     */
168    protected RealMatrix computeCovarianceMatrix(RealMatrix matrix, boolean biasCorrected)
169    throws MathIllegalArgumentException {
170        int dimension = matrix.getColumnDimension();
171        Variance variance = new Variance(biasCorrected);
172        RealMatrix outMatrix = new BlockRealMatrix(dimension, dimension);
173        for (int i = 0; i < dimension; i++) {
174            for (int j = 0; j < i; j++) {
175              double cov = covariance(matrix.getColumn(i), matrix.getColumn(j), biasCorrected);
176              outMatrix.setEntry(i, j, cov);
177              outMatrix.setEntry(j, i, cov);
178            }
179            outMatrix.setEntry(i, i, variance.evaluate(matrix.getColumn(i)));
180        }
181        return outMatrix;
182    }
183
184    /**
185     * Create a covariance matrix from a matrix whose columns represent
186     * covariates. Covariances are computed using the bias-corrected formula.
187     * @param matrix input matrix (must have at least one column and two rows)
188     * @return covariance matrix
189     * @throws MathIllegalArgumentException if matrix does not contain sufficient data
190     * @see #Covariance
191     */
192    protected RealMatrix computeCovarianceMatrix(RealMatrix matrix)
193    throws MathIllegalArgumentException {
194        return computeCovarianceMatrix(matrix, true);
195    }
196
197    /**
198     * Compute a covariance matrix from a rectangular array whose columns represent
199     * covariates.
200     * @param data input array (must have at least one column and two rows)
201     * @param biasCorrected determines whether or not covariance estimates are bias-corrected
202     * @return covariance matrix
203     * @throws MathIllegalArgumentException if the data array does not contain sufficient
204     * data
205     * @throws NotStrictlyPositiveException if the input data array is not
206     * rectangular with at least one row and one column.
207     */
208    protected RealMatrix computeCovarianceMatrix(double[][] data, boolean biasCorrected)
209    throws MathIllegalArgumentException, NotStrictlyPositiveException {
210        return computeCovarianceMatrix(new BlockRealMatrix(data), biasCorrected);
211    }
212
213    /**
214     * Create a covariance matrix from a rectangular array whose columns represent
215     * covariates. Covariances are computed using the bias-corrected formula.
216     * @param data input array (must have at least one column and two rows)
217     * @return covariance matrix
218     * @throws MathIllegalArgumentException if the data array does not contain sufficient data
219     * @throws NotStrictlyPositiveException if the input data array is not
220     * rectangular with at least one row and one column.
221     * @see #Covariance
222     */
223    protected RealMatrix computeCovarianceMatrix(double[][] data)
224    throws MathIllegalArgumentException, NotStrictlyPositiveException {
225        return computeCovarianceMatrix(data, true);
226    }
227
228    /**
229     * Computes the covariance between the two arrays.
230     *
231     * <p>Array lengths must match and the common length must be at least 2.</p>
232     *
233     * @param xArray first data array
234     * @param yArray second data array
235     * @param biasCorrected if true, returned value will be bias-corrected
236     * @return returns the covariance for the two arrays
237     * @throws  MathIllegalArgumentException if the arrays lengths do not match or
238     * there is insufficient data
239     */
240    public double covariance(final double[] xArray, final double[] yArray, boolean biasCorrected)
241        throws MathIllegalArgumentException {
242        Mean mean = new Mean();
243        double result = 0d;
244        int length = xArray.length;
245        if (length != yArray.length) {
246            throw new MathIllegalArgumentException(
247                  LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE, length, yArray.length);
248        } else if (length < 2) {
249            throw new MathIllegalArgumentException(
250                  LocalizedFormats.INSUFFICIENT_OBSERVED_POINTS_IN_SAMPLE, length, 2);
251        } else {
252            double xMean = mean.evaluate(xArray);
253            double yMean = mean.evaluate(yArray);
254            for (int i = 0; i < length; i++) {
255                double xDev = xArray[i] - xMean;
256                double yDev = yArray[i] - yMean;
257                result += (xDev * yDev - result) / (i + 1);
258            }
259        }
260        return biasCorrected ? result * ((double) length / (double)(length - 1)) : result;
261    }
262
263    /**
264     * Computes the covariance between the two arrays, using the bias-corrected
265     * formula.
266     *
267     * <p>Array lengths must match and the common length must be at least 2.</p>
268     *
269     * @param xArray first data array
270     * @param yArray second data array
271     * @return returns the covariance for the two arrays
272     * @throws  MathIllegalArgumentException if the arrays lengths do not match or
273     * there is insufficient data
274     */
275    public double covariance(final double[] xArray, final double[] yArray)
276        throws MathIllegalArgumentException {
277        return covariance(xArray, yArray, true);
278    }
279
280    /**
281     * Throws MathIllegalArgumentException if the matrix does not have at least
282     * one column and two rows.
283     * @param matrix matrix to check
284     * @throws MathIllegalArgumentException if the matrix does not contain sufficient data
285     * to compute covariance
286     */
287    private void checkSufficientData(final RealMatrix matrix) throws MathIllegalArgumentException {
288        int nRows = matrix.getRowDimension();
289        int nCols = matrix.getColumnDimension();
290        if (nRows < 2 || nCols < 1) {
291            throw new MathIllegalArgumentException(
292                    LocalizedFormats.INSUFFICIENT_ROWS_AND_COLUMNS,
293                    nRows, nCols);
294        }
295    }
296}