001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.math4.legacy.stat.regression; 018 019import java.util.Arrays; 020 021import org.apache.commons.math4.legacy.exception.OutOfRangeException; 022import org.apache.commons.math4.core.jdkmath.JdkMath; 023 024/** 025 * Results of a Multiple Linear Regression model fit. 026 * 027 * @since 3.0 028 */ 029public class RegressionResults { 030 031 /** INDEX of Sum of Squared Errors. */ 032 private static final int SSE_IDX = 0; 033 /** INDEX of Sum of Squares of Model. */ 034 private static final int SST_IDX = 1; 035 /** INDEX of R-Squared of regression. */ 036 private static final int RSQ_IDX = 2; 037 /** INDEX of Mean Squared Error. */ 038 private static final int MSE_IDX = 3; 039 /** INDEX of Adjusted R Squared. */ 040 private static final int ADJRSQ_IDX = 4; 041 /** UID. */ 042 private static final long serialVersionUID = 1L; 043 /** regression slope parameters. */ 044 private final double[] parameters; 045 /** variance covariance matrix of parameters. */ 046 private final double[][] varCovData; 047 /** boolean flag for variance covariance matrix in symm compressed storage. */ 048 private final boolean isSymmetricVCD; 049 /** rank of the solution. */ 050 @SuppressWarnings("unused") 051 private final int rank; 052 /** number of observations on which results are based. */ 053 private final long nobs; 054 /** boolean flag indicator of whether a constant was included. */ 055 private final boolean containsConstant; 056 /** array storing global results, SSE, MSE, RSQ, adjRSQ. */ 057 private final double[] globalFitInfo; 058 059 /** 060 * Set the default constructor to private access. 061 * to prevent inadvertent instantiation 062 */ 063 @SuppressWarnings("unused") 064 private RegressionResults() { 065 this.parameters = null; 066 this.varCovData = null; 067 this.rank = -1; 068 this.nobs = -1; 069 this.containsConstant = false; 070 this.isSymmetricVCD = false; 071 this.globalFitInfo = null; 072 } 073 074 /** 075 * Constructor for Regression Results. 076 * 077 * @param parameters a double array with the regression slope estimates 078 * @param varcov the variance covariance matrix, stored either in a square matrix 079 * or as a compressed 080 * @param isSymmetricCompressed a flag which denotes that the variance covariance 081 * matrix is in symmetric compressed format 082 * @param nobs the number of observations of the regression estimation 083 * @param rank the number of independent variables in the regression 084 * @param sumy the sum of the independent variable 085 * @param sumysq the sum of the squared independent variable 086 * @param sse sum of squared errors 087 * @param containsConstant true model has constant, false model does not have constant 088 * @param copyData if true a deep copy of all input data is made, if false only references 089 * are copied and the RegressionResults become mutable 090 */ 091 public RegressionResults( 092 final double[] parameters, final double[][] varcov, 093 final boolean isSymmetricCompressed, 094 final long nobs, final int rank, 095 final double sumy, final double sumysq, final double sse, 096 final boolean containsConstant, 097 final boolean copyData) { 098 if (copyData) { 099 this.parameters = Arrays.copyOf(parameters, parameters.length); 100 this.varCovData = new double[varcov.length][]; 101 for (int i = 0; i < varcov.length; i++) { 102 this.varCovData[i] = Arrays.copyOf(varcov[i], varcov[i].length); 103 } 104 } else { 105 this.parameters = parameters; 106 this.varCovData = varcov; 107 } 108 this.isSymmetricVCD = isSymmetricCompressed; 109 this.nobs = nobs; 110 this.rank = rank; 111 this.containsConstant = containsConstant; 112 this.globalFitInfo = new double[5]; 113 Arrays.fill(this.globalFitInfo, Double.NaN); 114 115 if (rank > 0) { 116 this.globalFitInfo[SST_IDX] = containsConstant ? 117 (sumysq - sumy * sumy / nobs) : sumysq; 118 } 119 120 this.globalFitInfo[SSE_IDX] = sse; 121 this.globalFitInfo[MSE_IDX] = this.globalFitInfo[SSE_IDX] / 122 (nobs - rank); 123 this.globalFitInfo[RSQ_IDX] = 1.0 - 124 this.globalFitInfo[SSE_IDX] / 125 this.globalFitInfo[SST_IDX]; 126 127 if (!containsConstant) { 128 this.globalFitInfo[ADJRSQ_IDX] = 1.0- 129 (1.0 - this.globalFitInfo[RSQ_IDX]) * 130 ( (double) nobs / ( (double) (nobs - rank))); 131 } else { 132 this.globalFitInfo[ADJRSQ_IDX] = 1.0 - (sse * (nobs - 1.0)) / 133 (globalFitInfo[SST_IDX] * (nobs - rank)); 134 } 135 } 136 137 /** 138 * <p>Returns the parameter estimate for the regressor at the given index.</p> 139 * 140 * <p>A redundant regressor will have its redundancy flag set, as well as 141 * a parameters estimated equal to {@code Double.NaN}</p> 142 * 143 * @param index Index. 144 * @return the parameters estimated for regressor at index. 145 * @throws OutOfRangeException if {@code index} is not in the interval 146 * {@code [0, number of parameters)}. 147 */ 148 public double getParameterEstimate(int index) throws OutOfRangeException { 149 if (parameters == null) { 150 return Double.NaN; 151 } 152 if (index < 0 || index >= this.parameters.length) { 153 throw new OutOfRangeException(index, 0, this.parameters.length - 1); 154 } 155 return this.parameters[index]; 156 } 157 158 /** 159 * <p>Returns a copy of the regression parameters estimates.</p> 160 * 161 * <p>The parameter estimates are returned in the natural order of the data.</p> 162 * 163 * <p>A redundant regressor will have its redundancy flag set, as will 164 * a parameter estimate equal to {@code Double.NaN}.</p> 165 * 166 * @return array of parameter estimates, null if no estimation occurred 167 */ 168 public double[] getParameterEstimates() { 169 if (this.parameters == null) { 170 return null; 171 } 172 return Arrays.copyOf(parameters, parameters.length); 173 } 174 175 /** 176 * Returns the <a href="http://www.xycoon.com/standerrorb(1).htm">standard 177 * error of the parameter estimate at index</a>, 178 * usually denoted s(b<sub>index</sub>). 179 * 180 * @param index Index. 181 * @return the standard errors associated with parameters estimated at index. 182 * @throws OutOfRangeException if {@code index} is not in the interval 183 * {@code [0, number of parameters)}. 184 */ 185 public double getStdErrorOfEstimate(int index) throws OutOfRangeException { 186 if (parameters == null) { 187 return Double.NaN; 188 } 189 if (index < 0 || index >= this.parameters.length) { 190 throw new OutOfRangeException(index, 0, this.parameters.length - 1); 191 } 192 double var = this.getVcvElement(index, index); 193 if (!Double.isNaN(var) && var > Double.MIN_VALUE) { 194 return JdkMath.sqrt(var); 195 } 196 return Double.NaN; 197 } 198 199 /** 200 * <p>Returns the <a href="http://www.xycoon.com/standerrorb(1).htm">standard 201 * error of the parameter estimates</a>, 202 * usually denoted s(b<sub>i</sub>).</p> 203 * 204 * <p>If there are problems with an ill conditioned design matrix then the regressor 205 * which is redundant will be assigned <code>Double.NaN</code>. </p> 206 * 207 * @return an array standard errors associated with parameters estimates, 208 * null if no estimation occurred 209 */ 210 public double[] getStdErrorOfEstimates() { 211 if (parameters == null) { 212 return null; 213 } 214 double[] se = new double[this.parameters.length]; 215 for (int i = 0; i < this.parameters.length; i++) { 216 double var = this.getVcvElement(i, i); 217 if (!Double.isNaN(var) && var > Double.MIN_VALUE) { 218 se[i] = JdkMath.sqrt(var); 219 continue; 220 } 221 se[i] = Double.NaN; 222 } 223 return se; 224 } 225 226 /** 227 * <p>Returns the covariance between regression parameters i and j.</p> 228 * 229 * <p>If there are problems with an ill conditioned design matrix then the covariance 230 * which involves redundant columns will be assigned {@code Double.NaN}. </p> 231 * 232 * @param i {@code i}th regression parameter. 233 * @param j {@code j}th regression parameter. 234 * @return the covariance of the parameter estimates. 235 * @throws OutOfRangeException if {@code i} or {@code j} is not in the 236 * interval {@code [0, number of parameters)}. 237 */ 238 public double getCovarianceOfParameters(int i, int j) throws OutOfRangeException { 239 if (parameters == null) { 240 return Double.NaN; 241 } 242 if (i < 0 || i >= this.parameters.length) { 243 throw new OutOfRangeException(i, 0, this.parameters.length - 1); 244 } 245 if (j < 0 || j >= this.parameters.length) { 246 throw new OutOfRangeException(j, 0, this.parameters.length - 1); 247 } 248 return this.getVcvElement(i, j); 249 } 250 251 /** 252 * <p>Returns the number of parameters estimated in the model.</p> 253 * 254 * <p>This is the maximum number of regressors, some techniques may drop 255 * redundant parameters</p> 256 * 257 * @return number of regressors, -1 if not estimated 258 */ 259 public int getNumberOfParameters() { 260 if (this.parameters == null) { 261 return -1; 262 } 263 return this.parameters.length; 264 } 265 266 /** 267 * Returns the number of observations added to the regression model. 268 * 269 * @return Number of observations, -1 if an error condition prevents estimation 270 */ 271 public long getN() { 272 return this.nobs; 273 } 274 275 /** 276 * <p>Returns the sum of squared deviations of the y values about their mean.</p> 277 * 278 * <p>This is defined as SSTO 279 * <a href="http://www.xycoon.com/SumOfSquares.htm">here</a>.</p> 280 * 281 * <p>If {@code n < 2}, this returns {@code Double.NaN}.</p> 282 * 283 * @return sum of squared deviations of y values 284 */ 285 public double getTotalSumSquares() { 286 return this.globalFitInfo[SST_IDX]; 287 } 288 289 /** 290 * <p>Returns the sum of squared deviations of the predicted y values about 291 * their mean (which equals the mean of y).</p> 292 * 293 * <p>This is usually abbreviated SSR or SSM. It is defined as SSM 294 * <a href="http://www.xycoon.com/SumOfSquares.htm">here</a></p> 295 * 296 * <p><strong>Preconditions</strong>: <ul> 297 * <li>At least two observations (with at least two different x values) 298 * must have been added before invoking this method. If this method is 299 * invoked before a model can be estimated, <code>Double.NaN</code> is 300 * returned. 301 * </li></ul> 302 * 303 * @return sum of squared deviations of predicted y values 304 */ 305 public double getRegressionSumSquares() { 306 return this.globalFitInfo[SST_IDX] - this.globalFitInfo[SSE_IDX]; 307 } 308 309 /** 310 * <p>Returns the <a href="http://www.xycoon.com/SumOfSquares.htm"> 311 * sum of squared errors</a> (SSE) associated with the regression 312 * model.</p> 313 * 314 * <p>The return value is constrained to be non-negative - i.e., if due to 315 * rounding errors the computational formula returns a negative result, 316 * 0 is returned.</p> 317 * 318 * <p><strong>Preconditions</strong>: <ul> 319 * <li>numberOfParameters data pairs 320 * must have been added before invoking this method. If this method is 321 * invoked before a model can be estimated, <code>Double,NaN</code> is 322 * returned. 323 * </li></ul> 324 * 325 * @return sum of squared errors associated with the regression model 326 */ 327 public double getErrorSumSquares() { 328 return this.globalFitInfo[ SSE_IDX]; 329 } 330 331 /** 332 * <p>Returns the sum of squared errors divided by the degrees of freedom, 333 * usually abbreviated MSE.</p> 334 * 335 * <p>If there are fewer than <strong>numberOfParameters + 1</strong> data pairs in the model, 336 * or if there is no variation in <code>x</code>, this returns 337 * <code>Double.NaN</code>.</p> 338 * 339 * @return sum of squared deviations of y values 340 */ 341 public double getMeanSquareError() { 342 return this.globalFitInfo[ MSE_IDX]; 343 } 344 345 /** 346 * <p>Returns the <a href="http://www.xycoon.com/coefficient1.htm"> 347 * coefficient of multiple determination</a>, 348 * usually denoted r-square.</p> 349 * 350 * <p><strong>Preconditions</strong>: <ul> 351 * <li>At least numberOfParameters observations (with at least numberOfParameters different x values) 352 * must have been added before invoking this method. If this method is 353 * invoked before a model can be estimated, {@code Double,NaN} is 354 * returned. 355 * </li></ul> 356 * 357 * @return r-square, a double in the interval [0, 1] 358 */ 359 public double getRSquared() { 360 return this.globalFitInfo[ RSQ_IDX]; 361 } 362 363 /** 364 * <p>Returns the adjusted R-squared statistic, defined by the formula <div style="white-space: pre"><code> 365 * R<sup>2</sup><sub>adj</sub> = 1 - [SSR (n - 1)] / [SSTO (n - p)] 366 * </code></div> 367 * where SSR is the sum of squared residuals}, 368 * SSTO is the total sum of squares}, n is the number 369 * of observations and p is the number of parameters estimated (including the intercept). 370 * 371 * <p>If the regression is estimated without an intercept term, what is returned is <pre> 372 * <code> 1 - (1 - {@link #getRSquared()} ) * (n / (n - p)) </code> 373 * </pre> 374 * 375 * @return adjusted R-Squared statistic 376 */ 377 public double getAdjustedRSquared() { 378 return this.globalFitInfo[ ADJRSQ_IDX]; 379 } 380 381 /** 382 * Returns true if the regression model has been computed including an intercept. 383 * In this case, the coefficient of the intercept is the first element of the 384 * {@link #getParameterEstimates() parameter estimates}. 385 * @return true if the model has an intercept term 386 */ 387 public boolean hasIntercept() { 388 return this.containsConstant; 389 } 390 391 /** 392 * Gets the i-jth element of the variance-covariance matrix. 393 * 394 * @param i first variable index 395 * @param j second variable index 396 * @return the requested variance-covariance matrix entry 397 */ 398 private double getVcvElement(int i, int j) { 399 if (this.isSymmetricVCD) { 400 if (this.varCovData.length > 1) { 401 //could be stored in upper or lower triangular 402 if (i == j) { 403 return varCovData[i][i]; 404 } else if (i >= varCovData[j].length) { 405 return varCovData[i][j]; 406 } else { 407 return varCovData[j][i]; 408 } 409 } else {//could be in single array 410 if (i > j) { 411 return varCovData[0][(i + 1) * i / 2 + j]; 412 } else { 413 return varCovData[0][(j + 1) * j / 2 + i]; 414 } 415 } 416 } else { 417 return this.varCovData[i][j]; 418 } 419 } 420}