StorelessCovariance.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.apache.commons.math4.legacy.stat.correlation;

  18. import org.apache.commons.math4.legacy.exception.DimensionMismatchException;
  19. import org.apache.commons.math4.legacy.exception.MathUnsupportedOperationException;
  20. import org.apache.commons.math4.legacy.exception.NumberIsTooSmallException;
  21. import org.apache.commons.math4.legacy.linear.MatrixUtils;
  22. import org.apache.commons.math4.legacy.linear.RealMatrix;

  23. /**
  24.  * Covariance implementation that does not require input data to be
  25.  * stored in memory. The size of the covariance matrix is specified in the
  26.  * constructor. Specific elements of the matrix are incrementally updated with
  27.  * calls to incrementRow() or increment Covariance().
  28.  *
  29.  * <p>This class is based on a paper written by Philippe P&eacute;bay:
  30.  * <a href="http://prod.sandia.gov/techlib/access-control.cgi/2008/086212.pdf">
  31.  * Formulas for Robust, One-Pass Parallel Computation of Covariances and
  32.  * Arbitrary-Order Statistical Moments</a>, 2008, Technical Report SAND2008-6212,
  33.  * Sandia National Laboratories.</p>
  34.  *
  35.  * <p>Note: the underlying covariance matrix is symmetric, thus only the
  36.  * upper triangular part of the matrix is stored and updated each increment.</p>
  37.  *
  38.  * @since 3.0
  39.  */
  40. public class StorelessCovariance extends Covariance {

  41.     /** the square covariance matrix (upper triangular part). */
  42.     private StorelessBivariateCovariance[] covMatrix;

  43.     /** dimension of the square covariance matrix. */
  44.     private int dimension;

  45.     /**
  46.      * Create a bias corrected covariance matrix with a given dimension.
  47.      *
  48.      * @param dim the dimension of the square covariance matrix
  49.      */
  50.     public StorelessCovariance(final int dim) {
  51.         this(dim, true);
  52.     }

  53.     /**
  54.      * Create a covariance matrix with a given number of rows and columns and the
  55.      * indicated bias correction.
  56.      *
  57.      * @param dim the dimension of the covariance matrix
  58.      * @param biasCorrected if <code>true</code> the covariance estimate is corrected
  59.      * for bias, i.e. n-1 in the denominator, otherwise there is no bias correction,
  60.      * i.e. n in the denominator.
  61.      */
  62.     public StorelessCovariance(final int dim, final boolean biasCorrected) {
  63.         dimension = dim;
  64.         covMatrix = new StorelessBivariateCovariance[dimension * (dimension + 1) / 2];
  65.         initializeMatrix(biasCorrected);
  66.     }

  67.     /**
  68.      * Initialize the internal two-dimensional array of
  69.      * {@link StorelessBivariateCovariance} instances.
  70.      *
  71.      * @param biasCorrected if the covariance estimate shall be corrected for bias
  72.      */
  73.     private void initializeMatrix(final boolean biasCorrected) {
  74.         for(int i = 0; i < dimension; i++){
  75.             for(int j = 0; j < dimension; j++){
  76.                 setElement(i, j, new StorelessBivariateCovariance(biasCorrected));
  77.             }
  78.         }
  79.     }

  80.     /**
  81.      * Returns the index (i, j) translated into the one-dimensional
  82.      * array used to store the upper triangular part of the symmetric
  83.      * covariance matrix.
  84.      *
  85.      * @param i the row index
  86.      * @param j the column index
  87.      * @return the corresponding index in the matrix array
  88.      */
  89.     private int indexOf(final int i, final int j) {
  90.         return j < i ? i * (i + 1) / 2 + j : j * (j + 1) / 2 + i;
  91.     }

  92.     /**
  93.      * Gets the element at index (i, j) from the covariance matrix.
  94.      * @param i the row index
  95.      * @param j the column index
  96.      * @return the {@link StorelessBivariateCovariance} element at the given index
  97.      */
  98.     private StorelessBivariateCovariance getElement(final int i, final int j) {
  99.         return covMatrix[indexOf(i, j)];
  100.     }

  101.     /**
  102.      * Sets the covariance element at index (i, j) in the covariance matrix.
  103.      * @param i the row index
  104.      * @param j the column index
  105.      * @param cov the {@link StorelessBivariateCovariance} element to be set
  106.      */
  107.     private void setElement(final int i, final int j,
  108.                             final StorelessBivariateCovariance cov) {
  109.         covMatrix[indexOf(i, j)] = cov;
  110.     }

  111.     /**
  112.      * Get the covariance for an individual element of the covariance matrix.
  113.      *
  114.      * @param xIndex row index in the covariance matrix
  115.      * @param yIndex column index in the covariance matrix
  116.      * @return the covariance of the given element
  117.      * @throws NumberIsTooSmallException if the number of observations
  118.      * in the cell is &lt; 2
  119.      */
  120.     public double getCovariance(final int xIndex,
  121.                                 final int yIndex)
  122.         throws NumberIsTooSmallException {

  123.         return getElement(xIndex, yIndex).getResult();
  124.     }

  125.     /**
  126.      * Increment the covariance matrix with one row of data.
  127.      *
  128.      * @param data array representing one row of data.
  129.      * @throws DimensionMismatchException if the length of <code>rowData</code>
  130.      * does not match with the covariance matrix
  131.      */
  132.     public void increment(final double[] data)
  133.         throws DimensionMismatchException {

  134.         int length = data.length;
  135.         if (length != dimension) {
  136.             throw new DimensionMismatchException(length, dimension);
  137.         }

  138.         // only update the upper triangular part of the covariance matrix
  139.         // as only these parts are actually stored
  140.         for (int i = 0; i < length; i++){
  141.             for (int j = i; j < length; j++){
  142.                 getElement(i, j).increment(data[i], data[j]);
  143.             }
  144.         }
  145.     }

  146.     /**
  147.      * Appends {@code sc} to this, effectively aggregating the computations in {@code sc}
  148.      * with this.  After invoking this method, covariances returned should be close
  149.      * to what would have been obtained by performing all of the {@link #increment(double[])}
  150.      * operations in {@code sc} directly on this.
  151.      *
  152.      * @param sc externally computed StorelessCovariance to add to this
  153.      * @throws DimensionMismatchException if the dimension of sc does not match this
  154.      * @since 3.3
  155.      */
  156.     public void append(StorelessCovariance sc) throws DimensionMismatchException {
  157.         if (sc.dimension != dimension) {
  158.             throw new DimensionMismatchException(sc.dimension, dimension);
  159.         }

  160.         // only update the upper triangular part of the covariance matrix
  161.         // as only these parts are actually stored
  162.         for (int i = 0; i < dimension; i++) {
  163.             for (int j = i; j < dimension; j++) {
  164.                 getElement(i, j).append(sc.getElement(i, j));
  165.             }
  166.         }
  167.     }

  168.     /**
  169.      * {@inheritDoc}
  170.      * @throws NumberIsTooSmallException if the number of observations
  171.      * in a cell is &lt; 2
  172.      */
  173.     @Override
  174.     public RealMatrix getCovarianceMatrix() throws NumberIsTooSmallException {
  175.         return MatrixUtils.createRealMatrix(getData());
  176.     }

  177.     /**
  178.      * Return the covariance matrix as two-dimensional array.
  179.      *
  180.      * @return a two-dimensional double array of covariance values
  181.      * @throws NumberIsTooSmallException if the number of observations
  182.      * for a cell is &lt; 2
  183.      */
  184.     public double[][] getData() throws NumberIsTooSmallException {
  185.         final double[][] data = new double[dimension][dimension];
  186.         for (int i = 0; i < dimension; i++) {
  187.             for (int j = 0; j < dimension; j++) {
  188.                 data[i][j] = getElement(i, j).getResult();
  189.             }
  190.         }
  191.         return data;
  192.     }

  193.     /**
  194.      * This {@link Covariance} method is not supported by a {@link StorelessCovariance},
  195.      * since the number of bivariate observations does not have to be the same for different
  196.      * pairs of covariates - i.e., N as defined in {@link Covariance#getN()} is undefined.
  197.      *
  198.      * @return nothing as this implementation always throws a
  199.      * {@link MathUnsupportedOperationException}
  200.      * @throws MathUnsupportedOperationException in all cases
  201.      */
  202.     @Override
  203.     public int getN()
  204.         throws MathUnsupportedOperationException {
  205.         throw new MathUnsupportedOperationException();
  206.     }
  207. }