001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.math4.legacy.stat.correlation; 018 019import org.apache.commons.math4.legacy.exception.DimensionMismatchException; 020import org.apache.commons.math4.legacy.exception.MathUnsupportedOperationException; 021import org.apache.commons.math4.legacy.exception.NumberIsTooSmallException; 022import org.apache.commons.math4.legacy.linear.MatrixUtils; 023import org.apache.commons.math4.legacy.linear.RealMatrix; 024 025/** 026 * Covariance implementation that does not require input data to be 027 * stored in memory. The size of the covariance matrix is specified in the 028 * constructor. Specific elements of the matrix are incrementally updated with 029 * calls to incrementRow() or increment Covariance(). 030 * 031 * <p>This class is based on a paper written by Philippe Pébay: 032 * <a href="http://prod.sandia.gov/techlib/access-control.cgi/2008/086212.pdf"> 033 * Formulas for Robust, One-Pass Parallel Computation of Covariances and 034 * Arbitrary-Order Statistical Moments</a>, 2008, Technical Report SAND2008-6212, 035 * Sandia National Laboratories.</p> 036 * 037 * <p>Note: the underlying covariance matrix is symmetric, thus only the 038 * upper triangular part of the matrix is stored and updated each increment.</p> 039 * 040 * @since 3.0 041 */ 042public class StorelessCovariance extends Covariance { 043 044 /** the square covariance matrix (upper triangular part). */ 045 private StorelessBivariateCovariance[] covMatrix; 046 047 /** dimension of the square covariance matrix. */ 048 private int dimension; 049 050 /** 051 * Create a bias corrected covariance matrix with a given dimension. 052 * 053 * @param dim the dimension of the square covariance matrix 054 */ 055 public StorelessCovariance(final int dim) { 056 this(dim, true); 057 } 058 059 /** 060 * Create a covariance matrix with a given number of rows and columns and the 061 * indicated bias correction. 062 * 063 * @param dim the dimension of the covariance matrix 064 * @param biasCorrected if <code>true</code> the covariance estimate is corrected 065 * for bias, i.e. n-1 in the denominator, otherwise there is no bias correction, 066 * i.e. n in the denominator. 067 */ 068 public StorelessCovariance(final int dim, final boolean biasCorrected) { 069 dimension = dim; 070 covMatrix = new StorelessBivariateCovariance[dimension * (dimension + 1) / 2]; 071 initializeMatrix(biasCorrected); 072 } 073 074 /** 075 * Initialize the internal two-dimensional array of 076 * {@link StorelessBivariateCovariance} instances. 077 * 078 * @param biasCorrected if the covariance estimate shall be corrected for bias 079 */ 080 private void initializeMatrix(final boolean biasCorrected) { 081 for(int i = 0; i < dimension; i++){ 082 for(int j = 0; j < dimension; j++){ 083 setElement(i, j, new StorelessBivariateCovariance(biasCorrected)); 084 } 085 } 086 } 087 088 /** 089 * Returns the index (i, j) translated into the one-dimensional 090 * array used to store the upper triangular part of the symmetric 091 * covariance matrix. 092 * 093 * @param i the row index 094 * @param j the column index 095 * @return the corresponding index in the matrix array 096 */ 097 private int indexOf(final int i, final int j) { 098 return j < i ? i * (i + 1) / 2 + j : j * (j + 1) / 2 + i; 099 } 100 101 /** 102 * Gets the element at index (i, j) from the covariance matrix. 103 * @param i the row index 104 * @param j the column index 105 * @return the {@link StorelessBivariateCovariance} element at the given index 106 */ 107 private StorelessBivariateCovariance getElement(final int i, final int j) { 108 return covMatrix[indexOf(i, j)]; 109 } 110 111 /** 112 * Sets the covariance element at index (i, j) in the covariance matrix. 113 * @param i the row index 114 * @param j the column index 115 * @param cov the {@link StorelessBivariateCovariance} element to be set 116 */ 117 private void setElement(final int i, final int j, 118 final StorelessBivariateCovariance cov) { 119 covMatrix[indexOf(i, j)] = cov; 120 } 121 122 /** 123 * Get the covariance for an individual element of the covariance matrix. 124 * 125 * @param xIndex row index in the covariance matrix 126 * @param yIndex column index in the covariance matrix 127 * @return the covariance of the given element 128 * @throws NumberIsTooSmallException if the number of observations 129 * in the cell is < 2 130 */ 131 public double getCovariance(final int xIndex, 132 final int yIndex) 133 throws NumberIsTooSmallException { 134 135 return getElement(xIndex, yIndex).getResult(); 136 } 137 138 /** 139 * Increment the covariance matrix with one row of data. 140 * 141 * @param data array representing one row of data. 142 * @throws DimensionMismatchException if the length of <code>rowData</code> 143 * does not match with the covariance matrix 144 */ 145 public void increment(final double[] data) 146 throws DimensionMismatchException { 147 148 int length = data.length; 149 if (length != dimension) { 150 throw new DimensionMismatchException(length, dimension); 151 } 152 153 // only update the upper triangular part of the covariance matrix 154 // as only these parts are actually stored 155 for (int i = 0; i < length; i++){ 156 for (int j = i; j < length; j++){ 157 getElement(i, j).increment(data[i], data[j]); 158 } 159 } 160 } 161 162 /** 163 * Appends {@code sc} to this, effectively aggregating the computations in {@code sc} 164 * with this. After invoking this method, covariances returned should be close 165 * to what would have been obtained by performing all of the {@link #increment(double[])} 166 * operations in {@code sc} directly on this. 167 * 168 * @param sc externally computed StorelessCovariance to add to this 169 * @throws DimensionMismatchException if the dimension of sc does not match this 170 * @since 3.3 171 */ 172 public void append(StorelessCovariance sc) throws DimensionMismatchException { 173 if (sc.dimension != dimension) { 174 throw new DimensionMismatchException(sc.dimension, dimension); 175 } 176 177 // only update the upper triangular part of the covariance matrix 178 // as only these parts are actually stored 179 for (int i = 0; i < dimension; i++) { 180 for (int j = i; j < dimension; j++) { 181 getElement(i, j).append(sc.getElement(i, j)); 182 } 183 } 184 } 185 186 /** 187 * {@inheritDoc} 188 * @throws NumberIsTooSmallException if the number of observations 189 * in a cell is < 2 190 */ 191 @Override 192 public RealMatrix getCovarianceMatrix() throws NumberIsTooSmallException { 193 return MatrixUtils.createRealMatrix(getData()); 194 } 195 196 /** 197 * Return the covariance matrix as two-dimensional array. 198 * 199 * @return a two-dimensional double array of covariance values 200 * @throws NumberIsTooSmallException if the number of observations 201 * for a cell is < 2 202 */ 203 public double[][] getData() throws NumberIsTooSmallException { 204 final double[][] data = new double[dimension][dimension]; 205 for (int i = 0; i < dimension; i++) { 206 for (int j = 0; j < dimension; j++) { 207 data[i][j] = getElement(i, j).getResult(); 208 } 209 } 210 return data; 211 } 212 213 /** 214 * This {@link Covariance} method is not supported by a {@link StorelessCovariance}, 215 * since the number of bivariate observations does not have to be the same for different 216 * pairs of covariates - i.e., N as defined in {@link Covariance#getN()} is undefined. 217 * 218 * @return nothing as this implementation always throws a 219 * {@link MathUnsupportedOperationException} 220 * @throws MathUnsupportedOperationException in all cases 221 */ 222 @Override 223 public int getN() 224 throws MathUnsupportedOperationException { 225 throw new MathUnsupportedOperationException(); 226 } 227}