001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.commons.math3.stat.correlation;
018    
019    import org.apache.commons.math3.exception.MathIllegalArgumentException;
020    import org.apache.commons.math3.exception.util.LocalizedFormats;
021    import org.apache.commons.math3.linear.RealMatrix;
022    import org.apache.commons.math3.linear.BlockRealMatrix;
023    import org.apache.commons.math3.stat.descriptive.moment.Mean;
024    import org.apache.commons.math3.stat.descriptive.moment.Variance;
025    
026    /**
027     * Computes covariances for pairs of arrays or columns of a matrix.
028     *
029     * <p>The constructors that take <code>RealMatrix</code> or
030     * <code>double[][]</code> arguments generate covariance matrices.  The
031     * columns of the input matrices are assumed to represent variable values.</p>
032     *
033     * <p>The constructor argument <code>biasCorrected</code> determines whether or
034     * not computed covariances are bias-corrected.</p>
035     *
036     * <p>Unbiased covariances are given by the formula</p>
037     * <code>cov(X, Y) = &Sigma;[(x<sub>i</sub> - E(X))(y<sub>i</sub> - E(Y))] / (n - 1)</code>
038     * where <code>E(X)</code> is the mean of <code>X</code> and <code>E(Y)</code>
039     * is the mean of the <code>Y</code> values.
040     *
041     * <p>Non-bias-corrected estimates use <code>n</code> in place of <code>n - 1</code>
042     *
043     * @version $Id: Covariance.java 1416643 2012-12-03 19:37:14Z tn $
044     * @since 2.0
045     */
046    public class Covariance {
047    
048        /** covariance matrix */
049        private final RealMatrix covarianceMatrix;
050    
051        /**
052         * Create an empty covariance matrix.
053         */
054        /** Number of observations (length of covariate vectors) */
055        private final int n;
056    
057        /**
058         * Create a Covariance with no data
059         */
060        public Covariance() {
061            super();
062            covarianceMatrix = null;
063            n = 0;
064        }
065    
066        /**
067         * Create a Covariance matrix from a rectangular array
068         * whose columns represent covariates.
069         *
070         * <p>The <code>biasCorrected</code> parameter determines whether or not
071         * covariance estimates are bias-corrected.</p>
072         *
073         * <p>The input array must be rectangular with at least two columns
074         * and two rows.</p>
075         *
076         * @param data rectangular array with columns representing covariates
077         * @param biasCorrected true means covariances are bias-corrected
078         * @throws MathIllegalArgumentException if the input data array is not
079         * rectangular with at least two rows and two columns.
080         */
081        public Covariance(double[][] data, boolean biasCorrected)
082        throws MathIllegalArgumentException {
083            this(new BlockRealMatrix(data), biasCorrected);
084        }
085    
086        /**
087         * Create a Covariance matrix from a rectangular array
088         * whose columns represent covariates.
089         *
090         * <p>The input array must be rectangular with at least two columns
091         * and two rows</p>
092         *
093         * @param data rectangular array with columns representing covariates
094         * @throws MathIllegalArgumentException if the input data array is not
095         * rectangular with at least two rows and two columns.
096         */
097        public Covariance(double[][] data) throws MathIllegalArgumentException {
098            this(data, true);
099        }
100    
101        /**
102         * Create a covariance matrix from a matrix whose columns
103         * represent covariates.
104         *
105         * <p>The <code>biasCorrected</code> parameter determines whether or not
106         * covariance estimates are bias-corrected.</p>
107         *
108         * <p>The matrix must have at least two columns and two rows</p>
109         *
110         * @param matrix matrix with columns representing covariates
111         * @param biasCorrected true means covariances are bias-corrected
112         * @throws MathIllegalArgumentException if the input matrix does not have
113         * at least two rows and two columns
114         */
115        public Covariance(RealMatrix matrix, boolean biasCorrected)
116        throws MathIllegalArgumentException {
117           checkSufficientData(matrix);
118           n = matrix.getRowDimension();
119           covarianceMatrix = computeCovarianceMatrix(matrix, biasCorrected);
120        }
121    
122        /**
123         * Create a covariance matrix from a matrix whose columns
124         * represent covariates.
125         *
126         * <p>The matrix must have at least two columns and two rows</p>
127         *
128         * @param matrix matrix with columns representing covariates
129         * @throws MathIllegalArgumentException if the input matrix does not have
130         * at least two rows and two columns
131         */
132        public Covariance(RealMatrix matrix) throws MathIllegalArgumentException {
133            this(matrix, true);
134        }
135    
136        /**
137         * Returns the covariance matrix
138         *
139         * @return covariance matrix
140         */
141        public RealMatrix getCovarianceMatrix() {
142            return covarianceMatrix;
143        }
144    
145        /**
146         * Returns the number of observations (length of covariate vectors)
147         *
148         * @return number of observations
149         */
150        public int getN() {
151            return n;
152        }
153    
154        /**
155         * Compute a covariance matrix from a matrix whose columns represent
156         * covariates.
157         * @param matrix input matrix (must have at least two columns and two rows)
158         * @param biasCorrected determines whether or not covariance estimates are bias-corrected
159         * @return covariance matrix
160         * @throws MathIllegalArgumentException if the matrix does not contain sufficient data
161         */
162        protected RealMatrix computeCovarianceMatrix(RealMatrix matrix, boolean biasCorrected)
163        throws MathIllegalArgumentException {
164            int dimension = matrix.getColumnDimension();
165            Variance variance = new Variance(biasCorrected);
166            RealMatrix outMatrix = new BlockRealMatrix(dimension, dimension);
167            for (int i = 0; i < dimension; i++) {
168                for (int j = 0; j < i; j++) {
169                  double cov = covariance(matrix.getColumn(i), matrix.getColumn(j), biasCorrected);
170                  outMatrix.setEntry(i, j, cov);
171                  outMatrix.setEntry(j, i, cov);
172                }
173                outMatrix.setEntry(i, i, variance.evaluate(matrix.getColumn(i)));
174            }
175            return outMatrix;
176        }
177    
178        /**
179         * Create a covariance matrix from a matrix whose columns represent
180         * covariates. Covariances are computed using the bias-corrected formula.
181         * @param matrix input matrix (must have at least two columns and two rows)
182         * @return covariance matrix
183         * @throws MathIllegalArgumentException if matrix does not contain sufficient data
184         * @see #Covariance
185         */
186        protected RealMatrix computeCovarianceMatrix(RealMatrix matrix)
187        throws MathIllegalArgumentException {
188            return computeCovarianceMatrix(matrix, true);
189        }
190    
191        /**
192         * Compute a covariance matrix from a rectangular array whose columns represent
193         * covariates.
194         * @param data input array (must have at least two columns and two rows)
195         * @param biasCorrected determines whether or not covariance estimates are bias-corrected
196         * @return covariance matrix
197         * @throws MathIllegalArgumentException if the data array does not contain sufficient
198         * data
199         */
200        protected RealMatrix computeCovarianceMatrix(double[][] data, boolean biasCorrected)
201        throws MathIllegalArgumentException {
202            return computeCovarianceMatrix(new BlockRealMatrix(data), biasCorrected);
203        }
204    
205        /**
206         * Create a covariance matrix from a rectangular array whose columns represent
207         * covariates. Covariances are computed using the bias-corrected formula.
208         * @param data input array (must have at least two columns and two rows)
209         * @return covariance matrix
210         * @throws MathIllegalArgumentException if the data array does not contain sufficient data
211         * @see #Covariance
212         */
213        protected RealMatrix computeCovarianceMatrix(double[][] data) throws MathIllegalArgumentException {
214            return computeCovarianceMatrix(data, true);
215        }
216    
217        /**
218         * Computes the covariance between the two arrays.
219         *
220         * <p>Array lengths must match and the common length must be at least 2.</p>
221         *
222         * @param xArray first data array
223         * @param yArray second data array
224         * @param biasCorrected if true, returned value will be bias-corrected
225         * @return returns the covariance for the two arrays
226         * @throws  MathIllegalArgumentException if the arrays lengths do not match or
227         * there is insufficient data
228         */
229        public double covariance(final double[] xArray, final double[] yArray, boolean biasCorrected)
230            throws MathIllegalArgumentException {
231            Mean mean = new Mean();
232            double result = 0d;
233            int length = xArray.length;
234            if (length != yArray.length) {
235                throw new MathIllegalArgumentException(
236                      LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE, length, yArray.length);
237            } else if (length < 2) {
238                throw new MathIllegalArgumentException(
239                      LocalizedFormats.INSUFFICIENT_OBSERVED_POINTS_IN_SAMPLE, length, 2);
240            } else {
241                double xMean = mean.evaluate(xArray);
242                double yMean = mean.evaluate(yArray);
243                for (int i = 0; i < length; i++) {
244                    double xDev = xArray[i] - xMean;
245                    double yDev = yArray[i] - yMean;
246                    result += (xDev * yDev - result) / (i + 1);
247                }
248            }
249            return biasCorrected ? result * ((double) length / (double)(length - 1)) : result;
250        }
251    
252        /**
253         * Computes the covariance between the two arrays, using the bias-corrected
254         * formula.
255         *
256         * <p>Array lengths must match and the common length must be at least 2.</p>
257         *
258         * @param xArray first data array
259         * @param yArray second data array
260         * @return returns the covariance for the two arrays
261         * @throws  MathIllegalArgumentException if the arrays lengths do not match or
262         * there is insufficient data
263         */
264        public double covariance(final double[] xArray, final double[] yArray)
265            throws MathIllegalArgumentException {
266            return covariance(xArray, yArray, true);
267        }
268    
269        /**
270         * Throws MathIllegalArgumentException if the matrix does not have at least
271         * two columns and two rows.
272         * @param matrix matrix to check
273         * @throws MathIllegalArgumentException if the matrix does not contain sufficient data
274         * to compute covariance
275         */
276        private void checkSufficientData(final RealMatrix matrix) throws MathIllegalArgumentException {
277            int nRows = matrix.getRowDimension();
278            int nCols = matrix.getColumnDimension();
279            if (nRows < 2 || nCols < 2) {
280                throw new MathIllegalArgumentException(
281                        LocalizedFormats.INSUFFICIENT_ROWS_AND_COLUMNS,
282                        nRows, nCols);
283            }
284        }
285    }