001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.math3.stat.regression; 018 019import java.io.Serializable; 020import java.util.Arrays; 021import org.apache.commons.math3.util.FastMath; 022import org.apache.commons.math3.util.MathArrays; 023import org.apache.commons.math3.exception.OutOfRangeException; 024 025/** 026 * Results of a Multiple Linear Regression model fit. 027 * 028 * @since 3.0 029 */ 030public class RegressionResults implements Serializable { 031 032 /** INDEX of Sum of Squared Errors */ 033 private static final int SSE_IDX = 0; 034 /** INDEX of Sum of Squares of Model */ 035 private static final int SST_IDX = 1; 036 /** INDEX of R-Squared of regression */ 037 private static final int RSQ_IDX = 2; 038 /** INDEX of Mean Squared Error */ 039 private static final int MSE_IDX = 3; 040 /** INDEX of Adjusted R Squared */ 041 private static final int ADJRSQ_IDX = 4; 042 /** UID */ 043 private static final long serialVersionUID = 1l; 044 /** regression slope parameters */ 045 private final double[] parameters; 046 /** variance covariance matrix of parameters */ 047 private final double[][] varCovData; 048 /** boolean flag for variance covariance matrix in symm compressed storage */ 049 private final boolean isSymmetricVCD; 050 /** rank of the solution */ 051 @SuppressWarnings("unused") 052 private final int rank; 053 /** number of observations on which results are based */ 054 private final long nobs; 055 /** boolean flag indicator of whether a constant was included*/ 056 private final boolean containsConstant; 057 /** array storing global results, SSE, MSE, RSQ, adjRSQ */ 058 private final double[] globalFitInfo; 059 060 /** 061 * Set the default constructor to private access 062 * to prevent inadvertent instantiation 063 */ 064 @SuppressWarnings("unused") 065 private RegressionResults() { 066 this.parameters = null; 067 this.varCovData = null; 068 this.rank = -1; 069 this.nobs = -1; 070 this.containsConstant = false; 071 this.isSymmetricVCD = false; 072 this.globalFitInfo = null; 073 } 074 075 /** 076 * Constructor for Regression Results. 077 * 078 * @param parameters a double array with the regression slope estimates 079 * @param varcov the variance covariance matrix, stored either in a square matrix 080 * or as a compressed 081 * @param isSymmetricCompressed a flag which denotes that the variance covariance 082 * matrix is in symmetric compressed format 083 * @param nobs the number of observations of the regression estimation 084 * @param rank the number of independent variables in the regression 085 * @param sumy the sum of the independent variable 086 * @param sumysq the sum of the squared independent variable 087 * @param sse sum of squared errors 088 * @param containsConstant true model has constant, false model does not have constant 089 * @param copyData if true a deep copy of all input data is made, if false only references 090 * are copied and the RegressionResults become mutable 091 */ 092 public RegressionResults( 093 final double[] parameters, final double[][] varcov, 094 final boolean isSymmetricCompressed, 095 final long nobs, final int rank, 096 final double sumy, final double sumysq, final double sse, 097 final boolean containsConstant, 098 final boolean copyData) { 099 if (copyData) { 100 this.parameters = MathArrays.copyOf(parameters); 101 this.varCovData = new double[varcov.length][]; 102 for (int i = 0; i < varcov.length; i++) { 103 this.varCovData[i] = MathArrays.copyOf(varcov[i]); 104 } 105 } else { 106 this.parameters = parameters; 107 this.varCovData = varcov; 108 } 109 this.isSymmetricVCD = isSymmetricCompressed; 110 this.nobs = nobs; 111 this.rank = rank; 112 this.containsConstant = containsConstant; 113 this.globalFitInfo = new double[5]; 114 Arrays.fill(this.globalFitInfo, Double.NaN); 115 116 if (rank > 0) { 117 this.globalFitInfo[SST_IDX] = containsConstant ? 118 (sumysq - sumy * sumy / nobs) : sumysq; 119 } 120 121 this.globalFitInfo[SSE_IDX] = sse; 122 this.globalFitInfo[MSE_IDX] = this.globalFitInfo[SSE_IDX] / 123 (nobs - rank); 124 this.globalFitInfo[RSQ_IDX] = 1.0 - 125 this.globalFitInfo[SSE_IDX] / 126 this.globalFitInfo[SST_IDX]; 127 128 if (!containsConstant) { 129 this.globalFitInfo[ADJRSQ_IDX] = 1.0- 130 (1.0 - this.globalFitInfo[RSQ_IDX]) * 131 ( (double) nobs / ( (double) (nobs - rank))); 132 } else { 133 this.globalFitInfo[ADJRSQ_IDX] = 1.0 - (sse * (nobs - 1.0)) / 134 (globalFitInfo[SST_IDX] * (nobs - rank)); 135 } 136 } 137 138 /** 139 * <p>Returns the parameter estimate for the regressor at the given index.</p> 140 * 141 * <p>A redundant regressor will have its redundancy flag set, as well as 142 * a parameters estimated equal to {@code Double.NaN}</p> 143 * 144 * @param index Index. 145 * @return the parameters estimated for regressor at index. 146 * @throws OutOfRangeException if {@code index} is not in the interval 147 * {@code [0, number of parameters)}. 148 */ 149 public double getParameterEstimate(int index) throws OutOfRangeException { 150 if (parameters == null) { 151 return Double.NaN; 152 } 153 if (index < 0 || index >= this.parameters.length) { 154 throw new OutOfRangeException(index, 0, this.parameters.length - 1); 155 } 156 return this.parameters[index]; 157 } 158 159 /** 160 * <p>Returns a copy of the regression parameters estimates.</p> 161 * 162 * <p>The parameter estimates are returned in the natural order of the data.</p> 163 * 164 * <p>A redundant regressor will have its redundancy flag set, as will 165 * a parameter estimate equal to {@code Double.NaN}.</p> 166 * 167 * @return array of parameter estimates, null if no estimation occurred 168 */ 169 public double[] getParameterEstimates() { 170 if (this.parameters == null) { 171 return null; 172 } 173 return MathArrays.copyOf(parameters); 174 } 175 176 /** 177 * Returns the <a href="http://www.xycoon.com/standerrorb(1).htm">standard 178 * error of the parameter estimate at index</a>, 179 * usually denoted s(b<sub>index</sub>). 180 * 181 * @param index Index. 182 * @return the standard errors associated with parameters estimated at index. 183 * @throws OutOfRangeException if {@code index} is not in the interval 184 * {@code [0, number of parameters)}. 185 */ 186 public double getStdErrorOfEstimate(int index) throws OutOfRangeException { 187 if (parameters == null) { 188 return Double.NaN; 189 } 190 if (index < 0 || index >= this.parameters.length) { 191 throw new OutOfRangeException(index, 0, this.parameters.length - 1); 192 } 193 double var = this.getVcvElement(index, index); 194 if (!Double.isNaN(var) && var > Double.MIN_VALUE) { 195 return FastMath.sqrt(var); 196 } 197 return Double.NaN; 198 } 199 200 /** 201 * <p>Returns the <a href="http://www.xycoon.com/standerrorb(1).htm">standard 202 * error of the parameter estimates</a>, 203 * usually denoted s(b<sub>i</sub>).</p> 204 * 205 * <p>If there are problems with an ill conditioned design matrix then the regressor 206 * which is redundant will be assigned <code>Double.NaN</code>. </p> 207 * 208 * @return an array standard errors associated with parameters estimates, 209 * null if no estimation occurred 210 */ 211 public double[] getStdErrorOfEstimates() { 212 if (parameters == null) { 213 return null; 214 } 215 double[] se = new double[this.parameters.length]; 216 for (int i = 0; i < this.parameters.length; i++) { 217 double var = this.getVcvElement(i, i); 218 if (!Double.isNaN(var) && var > Double.MIN_VALUE) { 219 se[i] = FastMath.sqrt(var); 220 continue; 221 } 222 se[i] = Double.NaN; 223 } 224 return se; 225 } 226 227 /** 228 * <p>Returns the covariance between regression parameters i and j.</p> 229 * 230 * <p>If there are problems with an ill conditioned design matrix then the covariance 231 * which involves redundant columns will be assigned {@code Double.NaN}. </p> 232 * 233 * @param i {@code i}th regression parameter. 234 * @param j {@code j}th regression parameter. 235 * @return the covariance of the parameter estimates. 236 * @throws OutOfRangeException if {@code i} or {@code j} is not in the 237 * interval {@code [0, number of parameters)}. 238 */ 239 public double getCovarianceOfParameters(int i, int j) throws OutOfRangeException { 240 if (parameters == null) { 241 return Double.NaN; 242 } 243 if (i < 0 || i >= this.parameters.length) { 244 throw new OutOfRangeException(i, 0, this.parameters.length - 1); 245 } 246 if (j < 0 || j >= this.parameters.length) { 247 throw new OutOfRangeException(j, 0, this.parameters.length - 1); 248 } 249 return this.getVcvElement(i, j); 250 } 251 252 /** 253 * <p>Returns the number of parameters estimated in the model.</p> 254 * 255 * <p>This is the maximum number of regressors, some techniques may drop 256 * redundant parameters</p> 257 * 258 * @return number of regressors, -1 if not estimated 259 */ 260 public int getNumberOfParameters() { 261 if (this.parameters == null) { 262 return -1; 263 } 264 return this.parameters.length; 265 } 266 267 /** 268 * Returns the number of observations added to the regression model. 269 * 270 * @return Number of observations, -1 if an error condition prevents estimation 271 */ 272 public long getN() { 273 return this.nobs; 274 } 275 276 /** 277 * <p>Returns the sum of squared deviations of the y values about their mean.</p> 278 * 279 * <p>This is defined as SSTO 280 * <a href="http://www.xycoon.com/SumOfSquares.htm">here</a>.</p> 281 * 282 * <p>If {@code n < 2}, this returns {@code Double.NaN}.</p> 283 * 284 * @return sum of squared deviations of y values 285 */ 286 public double getTotalSumSquares() { 287 return this.globalFitInfo[SST_IDX]; 288 } 289 290 /** 291 * <p>Returns the sum of squared deviations of the predicted y values about 292 * their mean (which equals the mean of y).</p> 293 * 294 * <p>This is usually abbreviated SSR or SSM. It is defined as SSM 295 * <a href="http://www.xycoon.com/SumOfSquares.htm">here</a></p> 296 * 297 * <p><strong>Preconditions</strong>: <ul> 298 * <li>At least two observations (with at least two different x values) 299 * must have been added before invoking this method. If this method is 300 * invoked before a model can be estimated, <code>Double.NaN</code> is 301 * returned. 302 * </li></ul></p> 303 * 304 * @return sum of squared deviations of predicted y values 305 */ 306 public double getRegressionSumSquares() { 307 return this.globalFitInfo[SST_IDX] - this.globalFitInfo[SSE_IDX]; 308 } 309 310 /** 311 * <p>Returns the <a href="http://www.xycoon.com/SumOfSquares.htm"> 312 * sum of squared errors</a> (SSE) associated with the regression 313 * model.</p> 314 * 315 * <p>The return value is constrained to be non-negative - i.e., if due to 316 * rounding errors the computational formula returns a negative result, 317 * 0 is returned.</p> 318 * 319 * <p><strong>Preconditions</strong>: <ul> 320 * <li>numberOfParameters data pairs 321 * must have been added before invoking this method. If this method is 322 * invoked before a model can be estimated, <code>Double,NaN</code> is 323 * returned. 324 * </li></ul></p> 325 * 326 * @return sum of squared errors associated with the regression model 327 */ 328 public double getErrorSumSquares() { 329 return this.globalFitInfo[ SSE_IDX]; 330 } 331 332 /** 333 * <p>Returns the sum of squared errors divided by the degrees of freedom, 334 * usually abbreviated MSE.</p> 335 * 336 * <p>If there are fewer than <strong>numberOfParameters + 1</strong> data pairs in the model, 337 * or if there is no variation in <code>x</code>, this returns 338 * <code>Double.NaN</code>.</p> 339 * 340 * @return sum of squared deviations of y values 341 */ 342 public double getMeanSquareError() { 343 return this.globalFitInfo[ MSE_IDX]; 344 } 345 346 /** 347 * <p>Returns the <a href="http://www.xycoon.com/coefficient1.htm"> 348 * coefficient of multiple determination</a>, 349 * usually denoted r-square.</p> 350 * 351 * <p><strong>Preconditions</strong>: <ul> 352 * <li>At least numberOfParameters observations (with at least numberOfParameters different x values) 353 * must have been added before invoking this method. If this method is 354 * invoked before a model can be estimated, {@code Double,NaN} is 355 * returned. 356 * </li></ul></p> 357 * 358 * @return r-square, a double in the interval [0, 1] 359 */ 360 public double getRSquared() { 361 return this.globalFitInfo[ RSQ_IDX]; 362 } 363 364 /** 365 * <p>Returns the adjusted R-squared statistic, defined by the formula <pre> 366 * R<sup>2</sup><sub>adj</sub> = 1 - [SSR (n - 1)] / [SSTO (n - p)] 367 * </pre> 368 * where SSR is the sum of squared residuals}, 369 * SSTO is the total sum of squares}, n is the number 370 * of observations and p is the number of parameters estimated (including the intercept).</p> 371 * 372 * <p>If the regression is estimated without an intercept term, what is returned is <pre> 373 * <code> 1 - (1 - {@link #getRSquared()} ) * (n / (n - p)) </code> 374 * </pre></p> 375 * 376 * @return adjusted R-Squared statistic 377 */ 378 public double getAdjustedRSquared() { 379 return this.globalFitInfo[ ADJRSQ_IDX]; 380 } 381 382 /** 383 * Returns true if the regression model has been computed including an intercept. 384 * In this case, the coefficient of the intercept is the first element of the 385 * {@link #getParameterEstimates() parameter estimates}. 386 * @return true if the model has an intercept term 387 */ 388 public boolean hasIntercept() { 389 return this.containsConstant; 390 } 391 392 /** 393 * Gets the i-jth element of the variance-covariance matrix. 394 * 395 * @param i first variable index 396 * @param j second variable index 397 * @return the requested variance-covariance matrix entry 398 */ 399 private double getVcvElement(int i, int j) { 400 if (this.isSymmetricVCD) { 401 if (this.varCovData.length > 1) { 402 //could be stored in upper or lower triangular 403 if (i == j) { 404 return varCovData[i][i]; 405 } else if (i >= varCovData[j].length) { 406 return varCovData[i][j]; 407 } else { 408 return varCovData[j][i]; 409 } 410 } else {//could be in single array 411 if (i > j) { 412 return varCovData[0][(i + 1) * i / 2 + j]; 413 } else { 414 return varCovData[0][(j + 1) * j / 2 + i]; 415 } 416 } 417 } else { 418 return this.varCovData[i][j]; 419 } 420 } 421}