001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.math3.stat.correlation;
018
019import org.apache.commons.math3.exception.DimensionMismatchException;
020import org.apache.commons.math3.exception.MathUnsupportedOperationException;
021import org.apache.commons.math3.exception.NumberIsTooSmallException;
022import org.apache.commons.math3.linear.MatrixUtils;
023import org.apache.commons.math3.linear.RealMatrix;
024
025/**
026 * Covariance implementation that does not require input data to be
027 * stored in memory. The size of the covariance matrix is specified in the
028 * constructor. Specific elements of the matrix are incrementally updated with
029 * calls to incrementRow() or increment Covariance().
030 *
031 * <p>This class is based on a paper written by Philippe P&eacute;bay:
032 * <a href="http://prod.sandia.gov/techlib/access-control.cgi/2008/086212.pdf">
033 * Formulas for Robust, One-Pass Parallel Computation of Covariances and
034 * Arbitrary-Order Statistical Moments</a>, 2008, Technical Report SAND2008-6212,
035 * Sandia National Laboratories.</p>
036 *
037 * <p>Note: the underlying covariance matrix is symmetric, thus only the
038 * upper triangular part of the matrix is stored and updated each increment.</p>
039 *
040 * @version $Id: StorelessCovariance.java 1519851 2013-09-03 21:16:35Z tn $
041 * @since 3.0
042 */
043public class StorelessCovariance extends Covariance {
044
045    /** the square covariance matrix (upper triangular part) */
046    private StorelessBivariateCovariance[] covMatrix;
047
048    /** dimension of the square covariance matrix */
049    private int dimension;
050
051    /**
052     * Create a bias corrected covariance matrix with a given dimension.
053     *
054     * @param dim the dimension of the square covariance matrix
055     */
056    public StorelessCovariance(final int dim) {
057        this(dim, true);
058    }
059
060    /**
061     * Create a covariance matrix with a given number of rows and columns and the
062     * indicated bias correction.
063     *
064     * @param dim the dimension of the covariance matrix
065     * @param biasCorrected if <code>true</code> the covariance estimate is corrected
066     * for bias, i.e. n-1 in the denominator, otherwise there is no bias correction,
067     * i.e. n in the denominator.
068     */
069    public StorelessCovariance(final int dim, final boolean biasCorrected) {
070        dimension = dim;
071        covMatrix = new StorelessBivariateCovariance[dimension * (dimension + 1) / 2];
072        initializeMatrix(biasCorrected);
073    }
074
075    /**
076     * Initialize the internal two-dimensional array of
077     * {@link StorelessBivariateCovariance} instances.
078     *
079     * @param biasCorrected if the covariance estimate shall be corrected for bias
080     */
081    private void initializeMatrix(final boolean biasCorrected) {
082        for(int i = 0; i < dimension; i++){
083            for(int j = 0; j < dimension; j++){
084                setElement(i, j, new StorelessBivariateCovariance(biasCorrected));
085            }
086        }
087    }
088
089    /**
090     * Returns the index (i, j) translated into the one-dimensional
091     * array used to store the upper triangular part of the symmetric
092     * covariance matrix.
093     *
094     * @param i the row index
095     * @param j the column index
096     * @return the corresponding index in the matrix array
097     */
098    private int indexOf(final int i, final int j) {
099        return j < i ? i * (i + 1) / 2 + j : j * (j + 1) / 2 + i;
100    }
101
102    /**
103     * Gets the element at index (i, j) from the covariance matrix
104     * @param i the row index
105     * @param j the column index
106     * @return the {@link StorelessBivariateCovariance} element at the given index
107     */
108    private StorelessBivariateCovariance getElement(final int i, final int j) {
109        return covMatrix[indexOf(i, j)];
110    }
111
112    /**
113     * Sets the covariance element at index (i, j) in the covariance matrix
114     * @param i the row index
115     * @param j the column index
116     * @param cov the {@link StorelessBivariateCovariance} element to be set
117     */
118    private void setElement(final int i, final int j,
119                            final StorelessBivariateCovariance cov) {
120        covMatrix[indexOf(i, j)] = cov;
121    }
122
123    /**
124     * Get the covariance for an individual element of the covariance matrix.
125     *
126     * @param xIndex row index in the covariance matrix
127     * @param yIndex column index in the covariance matrix
128     * @return the covariance of the given element
129     * @throws NumberIsTooSmallException if the number of observations
130     * in the cell is &lt; 2
131     */
132    public double getCovariance(final int xIndex,
133                                final int yIndex)
134        throws NumberIsTooSmallException {
135
136        return getElement(xIndex, yIndex).getResult();
137
138    }
139
140    /**
141     * Increment the covariance matrix with one row of data.
142     *
143     * @param data array representing one row of data.
144     * @throws DimensionMismatchException if the length of <code>rowData</code>
145     * does not match with the covariance matrix
146     */
147    public void increment(final double[] data)
148        throws DimensionMismatchException {
149
150        int length = data.length;
151        if (length != dimension) {
152            throw new DimensionMismatchException(length, dimension);
153        }
154
155        // only update the upper triangular part of the covariance matrix
156        // as only these parts are actually stored
157        for (int i = 0; i < length; i++){
158            for (int j = i; j < length; j++){
159                getElement(i, j).increment(data[i], data[j]);
160            }
161        }
162
163    }
164
165    /**
166     * Appends {@code sc} to this, effectively aggregating the computations in {@code sc}
167     * with this.  After invoking this method, covariances returned should be close
168     * to what would have been obtained by performing all of the {@link #increment(double[])}
169     * operations in {@code sc} directly on this.
170     *
171     * @param sc externally computed StorelessCovariance to add to this
172     * @throws DimensionMismatchException if the dimension of sc does not match this
173     * @since 3.3
174     */
175    public void append(StorelessCovariance sc) throws DimensionMismatchException {
176        if (sc.dimension != dimension) {
177            throw new DimensionMismatchException(sc.dimension, dimension);
178        }
179
180        // only update the upper triangular part of the covariance matrix
181        // as only these parts are actually stored
182        for (int i = 0; i < dimension; i++) {
183            for (int j = i; j < dimension; j++) {
184                getElement(i, j).append(sc.getElement(i, j));
185            }
186        }
187    }
188
189    /**
190     * {@inheritDoc}
191     * @throws NumberIsTooSmallException if the number of observations
192     * in a cell is &lt; 2
193     */
194    @Override
195    public RealMatrix getCovarianceMatrix() throws NumberIsTooSmallException {
196        return MatrixUtils.createRealMatrix(getData());
197    }
198
199    /**
200     * Return the covariance matrix as two-dimensional array.
201     *
202     * @return a two-dimensional double array of covariance values
203     * @throws NumberIsTooSmallException if the number of observations
204     * for a cell is &lt; 2
205     */
206    public double[][] getData() throws NumberIsTooSmallException {
207        final double[][] data = new double[dimension][dimension];
208        for (int i = 0; i < dimension; i++) {
209            for (int j = 0; j < dimension; j++) {
210                data[i][j] = getElement(i, j).getResult();
211            }
212        }
213        return data;
214    }
215
216    /**
217     * This {@link Covariance} method is not supported by a {@link StorelessCovariance},
218     * since the number of bivariate observations does not have to be the same for different
219     * pairs of covariates - i.e., N as defined in {@link Covariance#getN()} is undefined.
220     *
221     * @return nothing as this implementation always throws a
222     * {@link MathUnsupportedOperationException}
223     * @throws MathUnsupportedOperationException in all cases
224     */
225    @Override
226    public int getN()
227        throws MathUnsupportedOperationException {
228        throw new MathUnsupportedOperationException();
229    }
230}