001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.commons.math3.stat.correlation;
018    
019    import org.apache.commons.math3.exception.MathIllegalArgumentException;
020    import org.apache.commons.math3.exception.NotStrictlyPositiveException;
021    import org.apache.commons.math3.exception.util.LocalizedFormats;
022    import org.apache.commons.math3.linear.RealMatrix;
023    import org.apache.commons.math3.linear.BlockRealMatrix;
024    import org.apache.commons.math3.stat.descriptive.moment.Mean;
025    import org.apache.commons.math3.stat.descriptive.moment.Variance;
026    
027    /**
028     * Computes covariances for pairs of arrays or columns of a matrix.
029     *
030     * <p>The constructors that take <code>RealMatrix</code> or
031     * <code>double[][]</code> arguments generate covariance matrices.  The
032     * columns of the input matrices are assumed to represent variable values.</p>
033     *
034     * <p>The constructor argument <code>biasCorrected</code> determines whether or
035     * not computed covariances are bias-corrected.</p>
036     *
037     * <p>Unbiased covariances are given by the formula</p>
038     * <code>cov(X, Y) = &Sigma;[(x<sub>i</sub> - E(X))(y<sub>i</sub> - E(Y))] / (n - 1)</code>
039     * where <code>E(X)</code> is the mean of <code>X</code> and <code>E(Y)</code>
040     * is the mean of the <code>Y</code> values.
041     *
042     * <p>Non-bias-corrected estimates use <code>n</code> in place of <code>n - 1</code>
043     *
044     * @version $Id: Covariance.java 1453271 2013-03-06 10:29:51Z luc $
045     * @since 2.0
046     */
047    public class Covariance {
048    
049        /** covariance matrix */
050        private final RealMatrix covarianceMatrix;
051    
052        /**
053         * Create an empty covariance matrix.
054         */
055        /** Number of observations (length of covariate vectors) */
056        private final int n;
057    
058        /**
059         * Create a Covariance with no data
060         */
061        public Covariance() {
062            super();
063            covarianceMatrix = null;
064            n = 0;
065        }
066    
067        /**
068         * Create a Covariance matrix from a rectangular array
069         * whose columns represent covariates.
070         *
071         * <p>The <code>biasCorrected</code> parameter determines whether or not
072         * covariance estimates are bias-corrected.</p>
073         *
074         * <p>The input array must be rectangular with at least one column
075         * and two rows.</p>
076         *
077         * @param data rectangular array with columns representing covariates
078         * @param biasCorrected true means covariances are bias-corrected
079         * @throws MathIllegalArgumentException if the input data array is not
080         * rectangular with at least two rows and one column.
081         * @throws NotStrictlyPositiveException if the input data array is not
082         * rectangular with at least one row and one column.
083         */
084        public Covariance(double[][] data, boolean biasCorrected)
085        throws MathIllegalArgumentException, NotStrictlyPositiveException {
086            this(new BlockRealMatrix(data), biasCorrected);
087        }
088    
089        /**
090         * Create a Covariance matrix from a rectangular array
091         * whose columns represent covariates.
092         *
093         * <p>The input array must be rectangular with at least one column
094         * and two rows</p>
095         *
096         * @param data rectangular array with columns representing covariates
097         * @throws MathIllegalArgumentException if the input data array is not
098         * rectangular with at least two rows and one column.
099         * @throws NotStrictlyPositiveException if the input data array is not
100         * rectangular with at least one row and one column.
101         */
102        public Covariance(double[][] data)
103        throws MathIllegalArgumentException, NotStrictlyPositiveException {
104            this(data, true);
105        }
106    
107        /**
108         * Create a covariance matrix from a matrix whose columns
109         * represent covariates.
110         *
111         * <p>The <code>biasCorrected</code> parameter determines whether or not
112         * covariance estimates are bias-corrected.</p>
113         *
114         * <p>The matrix must have at least one column and two rows</p>
115         *
116         * @param matrix matrix with columns representing covariates
117         * @param biasCorrected true means covariances are bias-corrected
118         * @throws MathIllegalArgumentException if the input matrix does not have
119         * at least two rows and one column
120         */
121        public Covariance(RealMatrix matrix, boolean biasCorrected)
122        throws MathIllegalArgumentException {
123           checkSufficientData(matrix);
124           n = matrix.getRowDimension();
125           covarianceMatrix = computeCovarianceMatrix(matrix, biasCorrected);
126        }
127    
128        /**
129         * Create a covariance matrix from a matrix whose columns
130         * represent covariates.
131         *
132         * <p>The matrix must have at least one column and two rows</p>
133         *
134         * @param matrix matrix with columns representing covariates
135         * @throws MathIllegalArgumentException if the input matrix does not have
136         * at least two rows and one column
137         */
138        public Covariance(RealMatrix matrix) throws MathIllegalArgumentException {
139            this(matrix, true);
140        }
141    
142        /**
143         * Returns the covariance matrix
144         *
145         * @return covariance matrix
146         */
147        public RealMatrix getCovarianceMatrix() {
148            return covarianceMatrix;
149        }
150    
151        /**
152         * Returns the number of observations (length of covariate vectors)
153         *
154         * @return number of observations
155         */
156        public int getN() {
157            return n;
158        }
159    
160        /**
161         * Compute a covariance matrix from a matrix whose columns represent
162         * covariates.
163         * @param matrix input matrix (must have at least one column and two rows)
164         * @param biasCorrected determines whether or not covariance estimates are bias-corrected
165         * @return covariance matrix
166         * @throws MathIllegalArgumentException if the matrix does not contain sufficient data
167         */
168        protected RealMatrix computeCovarianceMatrix(RealMatrix matrix, boolean biasCorrected)
169        throws MathIllegalArgumentException {
170            int dimension = matrix.getColumnDimension();
171            Variance variance = new Variance(biasCorrected);
172            RealMatrix outMatrix = new BlockRealMatrix(dimension, dimension);
173            for (int i = 0; i < dimension; i++) {
174                for (int j = 0; j < i; j++) {
175                  double cov = covariance(matrix.getColumn(i), matrix.getColumn(j), biasCorrected);
176                  outMatrix.setEntry(i, j, cov);
177                  outMatrix.setEntry(j, i, cov);
178                }
179                outMatrix.setEntry(i, i, variance.evaluate(matrix.getColumn(i)));
180            }
181            return outMatrix;
182        }
183    
184        /**
185         * Create a covariance matrix from a matrix whose columns represent
186         * covariates. Covariances are computed using the bias-corrected formula.
187         * @param matrix input matrix (must have at least one column and two rows)
188         * @return covariance matrix
189         * @throws MathIllegalArgumentException if matrix does not contain sufficient data
190         * @see #Covariance
191         */
192        protected RealMatrix computeCovarianceMatrix(RealMatrix matrix)
193        throws MathIllegalArgumentException {
194            return computeCovarianceMatrix(matrix, true);
195        }
196    
197        /**
198         * Compute a covariance matrix from a rectangular array whose columns represent
199         * covariates.
200         * @param data input array (must have at least one column and two rows)
201         * @param biasCorrected determines whether or not covariance estimates are bias-corrected
202         * @return covariance matrix
203         * @throws MathIllegalArgumentException if the data array does not contain sufficient
204         * data
205         * @throws NotStrictlyPositiveException if the input data array is not
206         * rectangular with at least one row and one column.
207         */
208        protected RealMatrix computeCovarianceMatrix(double[][] data, boolean biasCorrected)
209        throws MathIllegalArgumentException, NotStrictlyPositiveException {
210            return computeCovarianceMatrix(new BlockRealMatrix(data), biasCorrected);
211        }
212    
213        /**
214         * Create a covariance matrix from a rectangular array whose columns represent
215         * covariates. Covariances are computed using the bias-corrected formula.
216         * @param data input array (must have at least one column and two rows)
217         * @return covariance matrix
218         * @throws MathIllegalArgumentException if the data array does not contain sufficient data
219         * @throws NotStrictlyPositiveException if the input data array is not
220         * rectangular with at least one row and one column.
221         * @see #Covariance
222         */
223        protected RealMatrix computeCovarianceMatrix(double[][] data)
224        throws MathIllegalArgumentException, NotStrictlyPositiveException {
225            return computeCovarianceMatrix(data, true);
226        }
227    
228        /**
229         * Computes the covariance between the two arrays.
230         *
231         * <p>Array lengths must match and the common length must be at least 2.</p>
232         *
233         * @param xArray first data array
234         * @param yArray second data array
235         * @param biasCorrected if true, returned value will be bias-corrected
236         * @return returns the covariance for the two arrays
237         * @throws  MathIllegalArgumentException if the arrays lengths do not match or
238         * there is insufficient data
239         */
240        public double covariance(final double[] xArray, final double[] yArray, boolean biasCorrected)
241            throws MathIllegalArgumentException {
242            Mean mean = new Mean();
243            double result = 0d;
244            int length = xArray.length;
245            if (length != yArray.length) {
246                throw new MathIllegalArgumentException(
247                      LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE, length, yArray.length);
248            } else if (length < 2) {
249                throw new MathIllegalArgumentException(
250                      LocalizedFormats.INSUFFICIENT_OBSERVED_POINTS_IN_SAMPLE, length, 2);
251            } else {
252                double xMean = mean.evaluate(xArray);
253                double yMean = mean.evaluate(yArray);
254                for (int i = 0; i < length; i++) {
255                    double xDev = xArray[i] - xMean;
256                    double yDev = yArray[i] - yMean;
257                    result += (xDev * yDev - result) / (i + 1);
258                }
259            }
260            return biasCorrected ? result * ((double) length / (double)(length - 1)) : result;
261        }
262    
263        /**
264         * Computes the covariance between the two arrays, using the bias-corrected
265         * formula.
266         *
267         * <p>Array lengths must match and the common length must be at least 2.</p>
268         *
269         * @param xArray first data array
270         * @param yArray second data array
271         * @return returns the covariance for the two arrays
272         * @throws  MathIllegalArgumentException if the arrays lengths do not match or
273         * there is insufficient data
274         */
275        public double covariance(final double[] xArray, final double[] yArray)
276            throws MathIllegalArgumentException {
277            return covariance(xArray, yArray, true);
278        }
279    
280        /**
281         * Throws MathIllegalArgumentException if the matrix does not have at least
282         * one column and two rows.
283         * @param matrix matrix to check
284         * @throws MathIllegalArgumentException if the matrix does not contain sufficient data
285         * to compute covariance
286         */
287        private void checkSufficientData(final RealMatrix matrix) throws MathIllegalArgumentException {
288            int nRows = matrix.getRowDimension();
289            int nCols = matrix.getColumnDimension();
290            if (nRows < 2 || nCols < 1) {
291                throw new MathIllegalArgumentException(
292                        LocalizedFormats.INSUFFICIENT_ROWS_AND_COLUMNS,
293                        nRows, nCols);
294            }
295        }
296    }