Covariance.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.apache.commons.math4.legacy.stat.correlation;

  18. import org.apache.commons.math4.legacy.exception.MathIllegalArgumentException;
  19. import org.apache.commons.math4.legacy.exception.NotStrictlyPositiveException;
  20. import org.apache.commons.math4.legacy.exception.util.LocalizedFormats;
  21. import org.apache.commons.math4.legacy.linear.BlockRealMatrix;
  22. import org.apache.commons.math4.legacy.linear.RealMatrix;
  23. import org.apache.commons.math4.legacy.stat.descriptive.moment.Mean;
  24. import org.apache.commons.math4.legacy.stat.descriptive.moment.Variance;

  25. /**
  26.  * Computes covariances for pairs of arrays or columns of a matrix.
  27.  *
  28.  * <p>The constructors that take <code>RealMatrix</code> or
  29.  * <code>double[][]</code> arguments generate covariance matrices.  The
  30.  * columns of the input matrices are assumed to represent variable values.</p>
  31.  *
  32.  * <p>The constructor argument <code>biasCorrected</code> determines whether or
  33.  * not computed covariances are bias-corrected.</p>
  34.  *
  35.  * <p>Unbiased covariances are given by the formula</p>
  36.  * <code>cov(X, Y) = &Sigma;[(x<sub>i</sub> - E(X))(y<sub>i</sub> - E(Y))] / (n - 1)</code>
  37.  * where <code>E(X)</code> is the mean of <code>X</code> and <code>E(Y)</code>
  38.  * is the mean of the <code>Y</code> values.
  39.  *
  40.  * <p>Non-bias-corrected estimates use <code>n</code> in place of <code>n - 1</code>
  41.  *
  42.  * @since 2.0
  43.  */
  44. public class Covariance {
  45.     /** covariance matrix. */
  46.     private final RealMatrix covarianceMatrix;
  47.     /** Number of observations (length of covariate vectors). */
  48.     private final int n;

  49.     /**
  50.      * Create a Covariance with no data.
  51.      */
  52.     public Covariance() {
  53.         super();
  54.         covarianceMatrix = null;
  55.         n = 0;
  56.     }

  57.     /**
  58.      * Create a Covariance matrix from a rectangular array
  59.      * whose columns represent covariates.
  60.      *
  61.      * <p>The <code>biasCorrected</code> parameter determines whether or not
  62.      * covariance estimates are bias-corrected.</p>
  63.      *
  64.      * <p>The input array must be rectangular with at least one column
  65.      * and two rows.</p>
  66.      *
  67.      * @param data rectangular array with columns representing covariates
  68.      * @param biasCorrected true means covariances are bias-corrected
  69.      * @throws MathIllegalArgumentException if the input data array is not
  70.      * rectangular with at least two rows and one column.
  71.      * @throws NotStrictlyPositiveException if the input data array is not
  72.      * rectangular with at least one row and one column.
  73.      */
  74.     public Covariance(double[][] data, boolean biasCorrected)
  75.     throws MathIllegalArgumentException, NotStrictlyPositiveException {
  76.         this(new BlockRealMatrix(data), biasCorrected);
  77.     }

  78.     /**
  79.      * Create a Covariance matrix from a rectangular array
  80.      * whose columns represent covariates.
  81.      *
  82.      * <p>The input array must be rectangular with at least one column
  83.      * and two rows</p>
  84.      *
  85.      * @param data rectangular array with columns representing covariates
  86.      * @throws MathIllegalArgumentException if the input data array is not
  87.      * rectangular with at least two rows and one column.
  88.      * @throws NotStrictlyPositiveException if the input data array is not
  89.      * rectangular with at least one row and one column.
  90.      */
  91.     public Covariance(double[][] data)
  92.     throws MathIllegalArgumentException, NotStrictlyPositiveException {
  93.         this(data, true);
  94.     }

  95.     /**
  96.      * Create a covariance matrix from a matrix whose columns
  97.      * represent covariates.
  98.      *
  99.      * <p>The <code>biasCorrected</code> parameter determines whether or not
  100.      * covariance estimates are bias-corrected.</p>
  101.      *
  102.      * <p>The matrix must have at least one column and two rows</p>
  103.      *
  104.      * @param matrix matrix with columns representing covariates
  105.      * @param biasCorrected true means covariances are bias-corrected
  106.      * @throws MathIllegalArgumentException if the input matrix does not have
  107.      * at least two rows and one column
  108.      */
  109.     public Covariance(RealMatrix matrix, boolean biasCorrected)
  110.     throws MathIllegalArgumentException {
  111.        checkSufficientData(matrix);
  112.        n = matrix.getRowDimension();
  113.        covarianceMatrix = computeCovarianceMatrix(matrix, biasCorrected);
  114.     }

  115.     /**
  116.      * Create a covariance matrix from a matrix whose columns
  117.      * represent covariates.
  118.      *
  119.      * <p>The matrix must have at least one column and two rows</p>
  120.      *
  121.      * @param matrix matrix with columns representing covariates
  122.      * @throws MathIllegalArgumentException if the input matrix does not have
  123.      * at least two rows and one column
  124.      */
  125.     public Covariance(RealMatrix matrix) throws MathIllegalArgumentException {
  126.         this(matrix, true);
  127.     }

  128.     /**
  129.      * Returns the covariance matrix.
  130.      *
  131.      * @return covariance matrix
  132.      */
  133.     public RealMatrix getCovarianceMatrix() {
  134.         return covarianceMatrix;
  135.     }

  136.     /**
  137.      * Returns the number of observations (length of covariate vectors).
  138.      *
  139.      * @return number of observations
  140.      */
  141.     public int getN() {
  142.         return n;
  143.     }

  144.     /**
  145.      * Compute a covariance matrix from a matrix whose columns represent
  146.      * covariates.
  147.      * @param matrix input matrix (must have at least one column and two rows)
  148.      * @param biasCorrected determines whether or not covariance estimates are bias-corrected
  149.      * @return covariance matrix
  150.      * @throws MathIllegalArgumentException if the matrix does not contain sufficient data
  151.      */
  152.     protected RealMatrix computeCovarianceMatrix(RealMatrix matrix, boolean biasCorrected)
  153.     throws MathIllegalArgumentException {
  154.         int dimension = matrix.getColumnDimension();
  155.         Variance variance = new Variance(biasCorrected);
  156.         RealMatrix outMatrix = new BlockRealMatrix(dimension, dimension);
  157.         for (int i = 0; i < dimension; i++) {
  158.             for (int j = 0; j < i; j++) {
  159.               double cov = covariance(matrix.getColumn(i), matrix.getColumn(j), biasCorrected);
  160.               outMatrix.setEntry(i, j, cov);
  161.               outMatrix.setEntry(j, i, cov);
  162.             }
  163.             outMatrix.setEntry(i, i, variance.evaluate(matrix.getColumn(i)));
  164.         }
  165.         return outMatrix;
  166.     }

  167.     /**
  168.      * Create a covariance matrix from a matrix whose columns represent
  169.      * covariates. Covariances are computed using the bias-corrected formula.
  170.      * @param matrix input matrix (must have at least one column and two rows)
  171.      * @return covariance matrix
  172.      * @throws MathIllegalArgumentException if matrix does not contain sufficient data
  173.      * @see #Covariance
  174.      */
  175.     protected RealMatrix computeCovarianceMatrix(RealMatrix matrix)
  176.     throws MathIllegalArgumentException {
  177.         return computeCovarianceMatrix(matrix, true);
  178.     }

  179.     /**
  180.      * Compute a covariance matrix from a rectangular array whose columns represent
  181.      * covariates.
  182.      * @param data input array (must have at least one column and two rows)
  183.      * @param biasCorrected determines whether or not covariance estimates are bias-corrected
  184.      * @return covariance matrix
  185.      * @throws MathIllegalArgumentException if the data array does not contain sufficient
  186.      * data
  187.      * @throws NotStrictlyPositiveException if the input data array is not
  188.      * rectangular with at least one row and one column.
  189.      */
  190.     protected RealMatrix computeCovarianceMatrix(double[][] data, boolean biasCorrected)
  191.     throws MathIllegalArgumentException, NotStrictlyPositiveException {
  192.         return computeCovarianceMatrix(new BlockRealMatrix(data), biasCorrected);
  193.     }

  194.     /**
  195.      * Create a covariance matrix from a rectangular array whose columns represent
  196.      * covariates. Covariances are computed using the bias-corrected formula.
  197.      * @param data input array (must have at least one column and two rows)
  198.      * @return covariance matrix
  199.      * @throws MathIllegalArgumentException if the data array does not contain sufficient data
  200.      * @throws NotStrictlyPositiveException if the input data array is not
  201.      * rectangular with at least one row and one column.
  202.      * @see #Covariance
  203.      */
  204.     protected RealMatrix computeCovarianceMatrix(double[][] data)
  205.     throws MathIllegalArgumentException, NotStrictlyPositiveException {
  206.         return computeCovarianceMatrix(data, true);
  207.     }

  208.     /**
  209.      * Computes the covariance between the two arrays.
  210.      *
  211.      * <p>Array lengths must match and the common length must be at least 2.</p>
  212.      *
  213.      * @param xArray first data array
  214.      * @param yArray second data array
  215.      * @param biasCorrected if true, returned value will be bias-corrected
  216.      * @return returns the covariance for the two arrays
  217.      * @throws  MathIllegalArgumentException if the arrays lengths do not match or
  218.      * there is insufficient data
  219.      */
  220.     public double covariance(final double[] xArray, final double[] yArray, boolean biasCorrected)
  221.         throws MathIllegalArgumentException {
  222.         Mean mean = new Mean();
  223.         double result = 0d;
  224.         int length = xArray.length;
  225.         if (length != yArray.length) {
  226.             throw new MathIllegalArgumentException(
  227.                   LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE, length, yArray.length);
  228.         } else if (length < 2) {
  229.             throw new MathIllegalArgumentException(
  230.                   LocalizedFormats.INSUFFICIENT_OBSERVED_POINTS_IN_SAMPLE, length, 2);
  231.         } else {
  232.             double xMean = mean.evaluate(xArray);
  233.             double yMean = mean.evaluate(yArray);
  234.             for (int i = 0; i < length; i++) {
  235.                 double xDev = xArray[i] - xMean;
  236.                 double yDev = yArray[i] - yMean;
  237.                 result += (xDev * yDev - result) / (i + 1);
  238.             }
  239.         }
  240.         return biasCorrected ? result * ((double) length / (double)(length - 1)) : result;
  241.     }

  242.     /**
  243.      * Computes the covariance between the two arrays, using the bias-corrected
  244.      * formula.
  245.      *
  246.      * <p>Array lengths must match and the common length must be at least 2.</p>
  247.      *
  248.      * @param xArray first data array
  249.      * @param yArray second data array
  250.      * @return returns the covariance for the two arrays
  251.      * @throws  MathIllegalArgumentException if the arrays lengths do not match or
  252.      * there is insufficient data
  253.      */
  254.     public double covariance(final double[] xArray, final double[] yArray)
  255.         throws MathIllegalArgumentException {
  256.         return covariance(xArray, yArray, true);
  257.     }

  258.     /**
  259.      * Throws MathIllegalArgumentException if the matrix does not have at least
  260.      * one column and two rows.
  261.      * @param matrix matrix to check
  262.      * @throws MathIllegalArgumentException if the matrix does not contain sufficient data
  263.      * to compute covariance
  264.      */
  265.     private void checkSufficientData(final RealMatrix matrix) throws MathIllegalArgumentException {
  266.         int nRows = matrix.getRowDimension();
  267.         int nCols = matrix.getColumnDimension();
  268.         if (nRows < 2 || nCols < 1) {
  269.             throw new MathIllegalArgumentException(
  270.                     LocalizedFormats.INSUFFICIENT_ROWS_AND_COLUMNS,
  271.                     nRows, nCols);
  272.         }
  273.     }
  274. }