001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.commons.math3.stat.correlation;
018    
019    import org.apache.commons.math3.exception.DimensionMismatchException;
020    import org.apache.commons.math3.exception.MathUnsupportedOperationException;
021    import org.apache.commons.math3.exception.NumberIsTooSmallException;
022    import org.apache.commons.math3.linear.MatrixUtils;
023    import org.apache.commons.math3.linear.RealMatrix;
024    
025    /**
026     * Covariance implementation that does not require input data to be
027     * stored in memory. The size of the covariance matrix is specified in the
028     * constructor. Specific elements of the matrix are incrementally updated with
029     * calls to incrementRow() or increment Covariance().
030     *
031     * <p>This class is based on a paper written by Philippe P&eacute;bay:
032     * <a href="http://prod.sandia.gov/techlib/access-control.cgi/2008/086212.pdf">
033     * Formulas for Robust, One-Pass Parallel Computation of Covariances and
034     * Arbitrary-Order Statistical Moments</a>, 2008, Technical Report SAND2008-6212,
035     * Sandia National Laboratories.</p>
036     *
037     * <p>Note: the underlying covariance matrix is symmetric, thus only the
038     * upper triangular part of the matrix is stored and updated each increment.</p>
039     *
040     * @version $Id: StorelessCovariance.java 1410238 2012-11-16 07:58:49Z luc $
041     * @since 3.0
042     */
043    public class StorelessCovariance extends Covariance {
044    
045        /** the square covariance matrix (upper triangular part) */
046        private StorelessBivariateCovariance[] covMatrix;
047    
048        /** dimension of the square covariance matrix */
049        private int dimension;
050    
051        /**
052         * Create a bias corrected covariance matrix with a given dimension.
053         *
054         * @param dim the dimension of the square covariance matrix
055         */
056        public StorelessCovariance(final int dim) {
057            this(dim, true);
058        }
059    
060        /**
061         * Create a covariance matrix with a given number of rows and columns and the
062         * indicated bias correction.
063         *
064         * @param dim the dimension of the covariance matrix
065         * @param biasCorrected if <code>true</code> the covariance estimate is corrected
066         * for bias, i.e. n-1 in the denominator, otherwise there is no bias correction,
067         * i.e. n in the denominator.
068         */
069        public StorelessCovariance(final int dim, final boolean biasCorrected) {
070            dimension = dim;
071            covMatrix = new StorelessBivariateCovariance[dimension * (dimension + 1) / 2];
072            initializeMatrix(biasCorrected);
073        }
074    
075        /**
076         * Initialize the internal two-dimensional array of
077         * {@link StorelessBivariateCovariance} instances.
078         *
079         * @param biasCorrected if the covariance estimate shall be corrected for bias
080         */
081        private void initializeMatrix(final boolean biasCorrected) {
082            for(int i = 0; i < dimension; i++){
083                for(int j = 0; j < dimension; j++){
084                    setElement(i, j, new StorelessBivariateCovariance(biasCorrected));
085                }
086            }
087        }
088    
089        /**
090         * Returns the index (i, j) translated into the one-dimensional
091         * array used to store the upper triangular part of the symmetric
092         * covariance matrix.
093         *
094         * @param i the row index
095         * @param j the column index
096         * @return the corresponding index in the matrix array
097         */
098        private int indexOf(final int i, final int j) {
099            return j < i ? i * (i + 1) / 2 + j : j * (j + 1) / 2 + i;
100        }
101    
102        /**
103         * Gets the element at index (i, j) from the covariance matrix
104         * @param i the row index
105         * @param j the column index
106         * @return the {@link StorelessBivariateCovariance} element at the given index
107         */
108        private StorelessBivariateCovariance getElement(final int i, final int j) {
109            return covMatrix[indexOf(i, j)];
110        }
111    
112        /**
113         * Sets the covariance element at index (i, j) in the covariance matrix
114         * @param i the row index
115         * @param j the column index
116         * @param cov the {@link StorelessBivariateCovariance} element to be set
117         */
118        private void setElement(final int i, final int j,
119                                final StorelessBivariateCovariance cov) {
120            covMatrix[indexOf(i, j)] = cov;
121        }
122    
123        /**
124         * Get the covariance for an individual element of the covariance matrix.
125         *
126         * @param xIndex row index in the covariance matrix
127         * @param yIndex column index in the covariance matrix
128         * @return the covariance of the given element
129         * @throws NumberIsTooSmallException if the number of observations
130         * in the cell is &lt; 2
131         */
132        public double getCovariance(final int xIndex,
133                                    final int yIndex)
134            throws NumberIsTooSmallException {
135    
136            return getElement(xIndex, yIndex).getResult();
137    
138        }
139    
140        /**
141         * Increment the covariance matrix with one row of data.
142         *
143         * @param data array representing one row of data.
144         * @throws DimensionMismatchException if the length of <code>rowData</code>
145         * does not match with the covariance matrix
146         */
147        public void increment(final double[] data)
148            throws DimensionMismatchException {
149    
150            int length = data.length;
151            if (length != dimension) {
152                throw new DimensionMismatchException(length, dimension);
153            }
154    
155            // only update the upper triangular part of the covariance matrix
156            // as only these parts are actually stored
157            for (int i = 0; i < length; i++){
158                for (int j = i; j < length; j++){
159                    getElement(i, j).increment(data[i], data[j]);
160                }
161            }
162    
163        }
164    
165        /**
166         * {@inheritDoc}
167         * @throws NumberIsTooSmallException if the number of observations
168         * in a cell is &lt; 2
169         */
170        @Override
171        public RealMatrix getCovarianceMatrix() throws NumberIsTooSmallException {
172            return MatrixUtils.createRealMatrix(getData());
173        }
174    
175        /**
176         * Return the covariance matrix as two-dimensional array.
177         *
178         * @return a two-dimensional double array of covariance values
179         * @throws NumberIsTooSmallException if the number of observations
180         * for a cell is &lt; 2
181         */
182        public double[][] getData() throws NumberIsTooSmallException {
183            final double[][] data = new double[dimension][dimension];
184            for (int i = 0; i < dimension; i++) {
185                for (int j = 0; j < dimension; j++) {
186                    data[i][j] = getElement(i, j).getResult();
187                }
188            }
189            return data;
190        }
191    
192        /**
193         * This {@link Covariance} method is not supported by a {@link StorelessCovariance},
194         * since the number of bivariate observations does not have to be the same for different
195         * pairs of covariates - i.e., N as defined in {@link Covariance#getN()} is undefined.
196         *
197         * @return nothing as this implementation always throws a
198         * {@link MathUnsupportedOperationException}
199         * @throws MathUnsupportedOperationException in all cases
200         */
201        @Override
202        public int getN()
203            throws MathUnsupportedOperationException {
204            throw new MathUnsupportedOperationException();
205        }
206    }