View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.math4.legacy.stat.correlation;
18  
19  import org.apache.commons.math4.legacy.exception.DimensionMismatchException;
20  import org.apache.commons.math4.legacy.exception.MathUnsupportedOperationException;
21  import org.apache.commons.math4.legacy.exception.NumberIsTooSmallException;
22  import org.apache.commons.math4.legacy.linear.MatrixUtils;
23  import org.apache.commons.math4.legacy.linear.RealMatrix;
24  
25  /**
26   * Covariance implementation that does not require input data to be
27   * stored in memory. The size of the covariance matrix is specified in the
28   * constructor. Specific elements of the matrix are incrementally updated with
29   * calls to incrementRow() or increment Covariance().
30   *
31   * <p>This class is based on a paper written by Philippe P&eacute;bay:
32   * <a href="http://prod.sandia.gov/techlib/access-control.cgi/2008/086212.pdf">
33   * Formulas for Robust, One-Pass Parallel Computation of Covariances and
34   * Arbitrary-Order Statistical Moments</a>, 2008, Technical Report SAND2008-6212,
35   * Sandia National Laboratories.</p>
36   *
37   * <p>Note: the underlying covariance matrix is symmetric, thus only the
38   * upper triangular part of the matrix is stored and updated each increment.</p>
39   *
40   * @since 3.0
41   */
42  public class StorelessCovariance extends Covariance {
43  
44      /** the square covariance matrix (upper triangular part). */
45      private StorelessBivariateCovariance[] covMatrix;
46  
47      /** dimension of the square covariance matrix. */
48      private int dimension;
49  
50      /**
51       * Create a bias corrected covariance matrix with a given dimension.
52       *
53       * @param dim the dimension of the square covariance matrix
54       */
55      public StorelessCovariance(final int dim) {
56          this(dim, true);
57      }
58  
59      /**
60       * Create a covariance matrix with a given number of rows and columns and the
61       * indicated bias correction.
62       *
63       * @param dim the dimension of the covariance matrix
64       * @param biasCorrected if <code>true</code> the covariance estimate is corrected
65       * for bias, i.e. n-1 in the denominator, otherwise there is no bias correction,
66       * i.e. n in the denominator.
67       */
68      public StorelessCovariance(final int dim, final boolean biasCorrected) {
69          dimension = dim;
70          covMatrix = new StorelessBivariateCovariance[dimension * (dimension + 1) / 2];
71          initializeMatrix(biasCorrected);
72      }
73  
74      /**
75       * Initialize the internal two-dimensional array of
76       * {@link StorelessBivariateCovariance} instances.
77       *
78       * @param biasCorrected if the covariance estimate shall be corrected for bias
79       */
80      private void initializeMatrix(final boolean biasCorrected) {
81          for(int i = 0; i < dimension; i++){
82              for(int j = 0; j < dimension; j++){
83                  setElement(i, j, new StorelessBivariateCovariance(biasCorrected));
84              }
85          }
86      }
87  
88      /**
89       * Returns the index (i, j) translated into the one-dimensional
90       * array used to store the upper triangular part of the symmetric
91       * covariance matrix.
92       *
93       * @param i the row index
94       * @param j the column index
95       * @return the corresponding index in the matrix array
96       */
97      private int indexOf(final int i, final int j) {
98          return j < i ? i * (i + 1) / 2 + j : j * (j + 1) / 2 + i;
99      }
100 
101     /**
102      * Gets the element at index (i, j) from the covariance matrix.
103      * @param i the row index
104      * @param j the column index
105      * @return the {@link StorelessBivariateCovariance} element at the given index
106      */
107     private StorelessBivariateCovariance getElement(final int i, final int j) {
108         return covMatrix[indexOf(i, j)];
109     }
110 
111     /**
112      * Sets the covariance element at index (i, j) in the covariance matrix.
113      * @param i the row index
114      * @param j the column index
115      * @param cov the {@link StorelessBivariateCovariance} element to be set
116      */
117     private void setElement(final int i, final int j,
118                             final StorelessBivariateCovariance cov) {
119         covMatrix[indexOf(i, j)] = cov;
120     }
121 
122     /**
123      * Get the covariance for an individual element of the covariance matrix.
124      *
125      * @param xIndex row index in the covariance matrix
126      * @param yIndex column index in the covariance matrix
127      * @return the covariance of the given element
128      * @throws NumberIsTooSmallException if the number of observations
129      * in the cell is &lt; 2
130      */
131     public double getCovariance(final int xIndex,
132                                 final int yIndex)
133         throws NumberIsTooSmallException {
134 
135         return getElement(xIndex, yIndex).getResult();
136     }
137 
138     /**
139      * Increment the covariance matrix with one row of data.
140      *
141      * @param data array representing one row of data.
142      * @throws DimensionMismatchException if the length of <code>rowData</code>
143      * does not match with the covariance matrix
144      */
145     public void increment(final double[] data)
146         throws DimensionMismatchException {
147 
148         int length = data.length;
149         if (length != dimension) {
150             throw new DimensionMismatchException(length, dimension);
151         }
152 
153         // only update the upper triangular part of the covariance matrix
154         // as only these parts are actually stored
155         for (int i = 0; i < length; i++){
156             for (int j = i; j < length; j++){
157                 getElement(i, j).increment(data[i], data[j]);
158             }
159         }
160     }
161 
162     /**
163      * Appends {@code sc} to this, effectively aggregating the computations in {@code sc}
164      * with this.  After invoking this method, covariances returned should be close
165      * to what would have been obtained by performing all of the {@link #increment(double[])}
166      * operations in {@code sc} directly on this.
167      *
168      * @param sc externally computed StorelessCovariance to add to this
169      * @throws DimensionMismatchException if the dimension of sc does not match this
170      * @since 3.3
171      */
172     public void append(StorelessCovariance sc) throws DimensionMismatchException {
173         if (sc.dimension != dimension) {
174             throw new DimensionMismatchException(sc.dimension, dimension);
175         }
176 
177         // only update the upper triangular part of the covariance matrix
178         // as only these parts are actually stored
179         for (int i = 0; i < dimension; i++) {
180             for (int j = i; j < dimension; j++) {
181                 getElement(i, j).append(sc.getElement(i, j));
182             }
183         }
184     }
185 
186     /**
187      * {@inheritDoc}
188      * @throws NumberIsTooSmallException if the number of observations
189      * in a cell is &lt; 2
190      */
191     @Override
192     public RealMatrix getCovarianceMatrix() throws NumberIsTooSmallException {
193         return MatrixUtils.createRealMatrix(getData());
194     }
195 
196     /**
197      * Return the covariance matrix as two-dimensional array.
198      *
199      * @return a two-dimensional double array of covariance values
200      * @throws NumberIsTooSmallException if the number of observations
201      * for a cell is &lt; 2
202      */
203     public double[][] getData() throws NumberIsTooSmallException {
204         final double[][] data = new double[dimension][dimension];
205         for (int i = 0; i < dimension; i++) {
206             for (int j = 0; j < dimension; j++) {
207                 data[i][j] = getElement(i, j).getResult();
208             }
209         }
210         return data;
211     }
212 
213     /**
214      * This {@link Covariance} method is not supported by a {@link StorelessCovariance},
215      * since the number of bivariate observations does not have to be the same for different
216      * pairs of covariates - i.e., N as defined in {@link Covariance#getN()} is undefined.
217      *
218      * @return nothing as this implementation always throws a
219      * {@link MathUnsupportedOperationException}
220      * @throws MathUnsupportedOperationException in all cases
221      */
222     @Override
223     public int getN()
224         throws MathUnsupportedOperationException {
225         throw new MathUnsupportedOperationException();
226     }
227 }