1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.math4.legacy.stat.regression;
18
19 import java.util.Arrays;
20
21 import org.apache.commons.math4.legacy.exception.OutOfRangeException;
22 import org.apache.commons.math4.core.jdkmath.JdkMath;
23
24 /**
25 * Results of a Multiple Linear Regression model fit.
26 *
27 * @since 3.0
28 */
29 public class RegressionResults {
30
31 /** INDEX of Sum of Squared Errors. */
32 private static final int SSE_IDX = 0;
33 /** INDEX of Sum of Squares of Model. */
34 private static final int SST_IDX = 1;
35 /** INDEX of R-Squared of regression. */
36 private static final int RSQ_IDX = 2;
37 /** INDEX of Mean Squared Error. */
38 private static final int MSE_IDX = 3;
39 /** INDEX of Adjusted R Squared. */
40 private static final int ADJRSQ_IDX = 4;
41 /** UID. */
42 private static final long serialVersionUID = 1L;
43 /** regression slope parameters. */
44 private final double[] parameters;
45 /** variance covariance matrix of parameters. */
46 private final double[][] varCovData;
47 /** boolean flag for variance covariance matrix in symm compressed storage. */
48 private final boolean isSymmetricVCD;
49 /** rank of the solution. */
50 @SuppressWarnings("unused")
51 private final int rank;
52 /** number of observations on which results are based. */
53 private final long nobs;
54 /** boolean flag indicator of whether a constant was included. */
55 private final boolean containsConstant;
56 /** array storing global results, SSE, MSE, RSQ, adjRSQ. */
57 private final double[] globalFitInfo;
58
59 /**
60 * Set the default constructor to private access.
61 * to prevent inadvertent instantiation
62 */
63 @SuppressWarnings("unused")
64 private RegressionResults() {
65 this.parameters = null;
66 this.varCovData = null;
67 this.rank = -1;
68 this.nobs = -1;
69 this.containsConstant = false;
70 this.isSymmetricVCD = false;
71 this.globalFitInfo = null;
72 }
73
74 /**
75 * Constructor for Regression Results.
76 *
77 * @param parameters a double array with the regression slope estimates
78 * @param varcov the variance covariance matrix, stored either in a square matrix
79 * or as a compressed
80 * @param isSymmetricCompressed a flag which denotes that the variance covariance
81 * matrix is in symmetric compressed format
82 * @param nobs the number of observations of the regression estimation
83 * @param rank the number of independent variables in the regression
84 * @param sumy the sum of the independent variable
85 * @param sumysq the sum of the squared independent variable
86 * @param sse sum of squared errors
87 * @param containsConstant true model has constant, false model does not have constant
88 * @param copyData if true a deep copy of all input data is made, if false only references
89 * are copied and the RegressionResults become mutable
90 */
91 public RegressionResults(
92 final double[] parameters, final double[][] varcov,
93 final boolean isSymmetricCompressed,
94 final long nobs, final int rank,
95 final double sumy, final double sumysq, final double sse,
96 final boolean containsConstant,
97 final boolean copyData) {
98 if (copyData) {
99 this.parameters = Arrays.copyOf(parameters, parameters.length);
100 this.varCovData = new double[varcov.length][];
101 for (int i = 0; i < varcov.length; i++) {
102 this.varCovData[i] = Arrays.copyOf(varcov[i], varcov[i].length);
103 }
104 } else {
105 this.parameters = parameters;
106 this.varCovData = varcov;
107 }
108 this.isSymmetricVCD = isSymmetricCompressed;
109 this.nobs = nobs;
110 this.rank = rank;
111 this.containsConstant = containsConstant;
112 this.globalFitInfo = new double[5];
113 Arrays.fill(this.globalFitInfo, Double.NaN);
114
115 if (rank > 0) {
116 this.globalFitInfo[SST_IDX] = containsConstant ?
117 (sumysq - sumy * sumy / nobs) : sumysq;
118 }
119
120 this.globalFitInfo[SSE_IDX] = sse;
121 this.globalFitInfo[MSE_IDX] = this.globalFitInfo[SSE_IDX] /
122 (nobs - rank);
123 this.globalFitInfo[RSQ_IDX] = 1.0 -
124 this.globalFitInfo[SSE_IDX] /
125 this.globalFitInfo[SST_IDX];
126
127 if (!containsConstant) {
128 this.globalFitInfo[ADJRSQ_IDX] = 1.0-
129 (1.0 - this.globalFitInfo[RSQ_IDX]) *
130 ( (double) nobs / ( (double) (nobs - rank)));
131 } else {
132 this.globalFitInfo[ADJRSQ_IDX] = 1.0 - (sse * (nobs - 1.0)) /
133 (globalFitInfo[SST_IDX] * (nobs - rank));
134 }
135 }
136
137 /**
138 * <p>Returns the parameter estimate for the regressor at the given index.</p>
139 *
140 * <p>A redundant regressor will have its redundancy flag set, as well as
141 * a parameters estimated equal to {@code Double.NaN}</p>
142 *
143 * @param index Index.
144 * @return the parameters estimated for regressor at index.
145 * @throws OutOfRangeException if {@code index} is not in the interval
146 * {@code [0, number of parameters)}.
147 */
148 public double getParameterEstimate(int index) throws OutOfRangeException {
149 if (parameters == null) {
150 return Double.NaN;
151 }
152 if (index < 0 || index >= this.parameters.length) {
153 throw new OutOfRangeException(index, 0, this.parameters.length - 1);
154 }
155 return this.parameters[index];
156 }
157
158 /**
159 * <p>Returns a copy of the regression parameters estimates.</p>
160 *
161 * <p>The parameter estimates are returned in the natural order of the data.</p>
162 *
163 * <p>A redundant regressor will have its redundancy flag set, as will
164 * a parameter estimate equal to {@code Double.NaN}.</p>
165 *
166 * @return array of parameter estimates, null if no estimation occurred
167 */
168 public double[] getParameterEstimates() {
169 if (this.parameters == null) {
170 return null;
171 }
172 return Arrays.copyOf(parameters, parameters.length);
173 }
174
175 /**
176 * Returns the <a href="http://www.xycoon.com/standerrorb(1).htm">standard
177 * error of the parameter estimate at index</a>,
178 * usually denoted s(b<sub>index</sub>).
179 *
180 * @param index Index.
181 * @return the standard errors associated with parameters estimated at index.
182 * @throws OutOfRangeException if {@code index} is not in the interval
183 * {@code [0, number of parameters)}.
184 */
185 public double getStdErrorOfEstimate(int index) throws OutOfRangeException {
186 if (parameters == null) {
187 return Double.NaN;
188 }
189 if (index < 0 || index >= this.parameters.length) {
190 throw new OutOfRangeException(index, 0, this.parameters.length - 1);
191 }
192 double var = this.getVcvElement(index, index);
193 if (!Double.isNaN(var) && var > Double.MIN_VALUE) {
194 return JdkMath.sqrt(var);
195 }
196 return Double.NaN;
197 }
198
199 /**
200 * <p>Returns the <a href="http://www.xycoon.com/standerrorb(1).htm">standard
201 * error of the parameter estimates</a>,
202 * usually denoted s(b<sub>i</sub>).</p>
203 *
204 * <p>If there are problems with an ill conditioned design matrix then the regressor
205 * which is redundant will be assigned <code>Double.NaN</code>. </p>
206 *
207 * @return an array standard errors associated with parameters estimates,
208 * null if no estimation occurred
209 */
210 public double[] getStdErrorOfEstimates() {
211 if (parameters == null) {
212 return null;
213 }
214 double[] se = new double[this.parameters.length];
215 for (int i = 0; i < this.parameters.length; i++) {
216 double var = this.getVcvElement(i, i);
217 if (!Double.isNaN(var) && var > Double.MIN_VALUE) {
218 se[i] = JdkMath.sqrt(var);
219 continue;
220 }
221 se[i] = Double.NaN;
222 }
223 return se;
224 }
225
226 /**
227 * <p>Returns the covariance between regression parameters i and j.</p>
228 *
229 * <p>If there are problems with an ill conditioned design matrix then the covariance
230 * which involves redundant columns will be assigned {@code Double.NaN}. </p>
231 *
232 * @param i {@code i}th regression parameter.
233 * @param j {@code j}th regression parameter.
234 * @return the covariance of the parameter estimates.
235 * @throws OutOfRangeException if {@code i} or {@code j} is not in the
236 * interval {@code [0, number of parameters)}.
237 */
238 public double getCovarianceOfParameters(int i, int j) throws OutOfRangeException {
239 if (parameters == null) {
240 return Double.NaN;
241 }
242 if (i < 0 || i >= this.parameters.length) {
243 throw new OutOfRangeException(i, 0, this.parameters.length - 1);
244 }
245 if (j < 0 || j >= this.parameters.length) {
246 throw new OutOfRangeException(j, 0, this.parameters.length - 1);
247 }
248 return this.getVcvElement(i, j);
249 }
250
251 /**
252 * <p>Returns the number of parameters estimated in the model.</p>
253 *
254 * <p>This is the maximum number of regressors, some techniques may drop
255 * redundant parameters</p>
256 *
257 * @return number of regressors, -1 if not estimated
258 */
259 public int getNumberOfParameters() {
260 if (this.parameters == null) {
261 return -1;
262 }
263 return this.parameters.length;
264 }
265
266 /**
267 * Returns the number of observations added to the regression model.
268 *
269 * @return Number of observations, -1 if an error condition prevents estimation
270 */
271 public long getN() {
272 return this.nobs;
273 }
274
275 /**
276 * <p>Returns the sum of squared deviations of the y values about their mean.</p>
277 *
278 * <p>This is defined as SSTO
279 * <a href="http://www.xycoon.com/SumOfSquares.htm">here</a>.</p>
280 *
281 * <p>If {@code n < 2}, this returns {@code Double.NaN}.</p>
282 *
283 * @return sum of squared deviations of y values
284 */
285 public double getTotalSumSquares() {
286 return this.globalFitInfo[SST_IDX];
287 }
288
289 /**
290 * <p>Returns the sum of squared deviations of the predicted y values about
291 * their mean (which equals the mean of y).</p>
292 *
293 * <p>This is usually abbreviated SSR or SSM. It is defined as SSM
294 * <a href="http://www.xycoon.com/SumOfSquares.htm">here</a></p>
295 *
296 * <p><strong>Preconditions</strong>: <ul>
297 * <li>At least two observations (with at least two different x values)
298 * must have been added before invoking this method. If this method is
299 * invoked before a model can be estimated, <code>Double.NaN</code> is
300 * returned.
301 * </li></ul>
302 *
303 * @return sum of squared deviations of predicted y values
304 */
305 public double getRegressionSumSquares() {
306 return this.globalFitInfo[SST_IDX] - this.globalFitInfo[SSE_IDX];
307 }
308
309 /**
310 * <p>Returns the <a href="http://www.xycoon.com/SumOfSquares.htm">
311 * sum of squared errors</a> (SSE) associated with the regression
312 * model.</p>
313 *
314 * <p>The return value is constrained to be non-negative - i.e., if due to
315 * rounding errors the computational formula returns a negative result,
316 * 0 is returned.</p>
317 *
318 * <p><strong>Preconditions</strong>: <ul>
319 * <li>numberOfParameters data pairs
320 * must have been added before invoking this method. If this method is
321 * invoked before a model can be estimated, <code>Double,NaN</code> is
322 * returned.
323 * </li></ul>
324 *
325 * @return sum of squared errors associated with the regression model
326 */
327 public double getErrorSumSquares() {
328 return this.globalFitInfo[ SSE_IDX];
329 }
330
331 /**
332 * <p>Returns the sum of squared errors divided by the degrees of freedom,
333 * usually abbreviated MSE.</p>
334 *
335 * <p>If there are fewer than <strong>numberOfParameters + 1</strong> data pairs in the model,
336 * or if there is no variation in <code>x</code>, this returns
337 * <code>Double.NaN</code>.</p>
338 *
339 * @return sum of squared deviations of y values
340 */
341 public double getMeanSquareError() {
342 return this.globalFitInfo[ MSE_IDX];
343 }
344
345 /**
346 * <p>Returns the <a href="http://www.xycoon.com/coefficient1.htm">
347 * coefficient of multiple determination</a>,
348 * usually denoted r-square.</p>
349 *
350 * <p><strong>Preconditions</strong>: <ul>
351 * <li>At least numberOfParameters observations (with at least numberOfParameters different x values)
352 * must have been added before invoking this method. If this method is
353 * invoked before a model can be estimated, {@code Double,NaN} is
354 * returned.
355 * </li></ul>
356 *
357 * @return r-square, a double in the interval [0, 1]
358 */
359 public double getRSquared() {
360 return this.globalFitInfo[ RSQ_IDX];
361 }
362
363 /**
364 * <p>Returns the adjusted R-squared statistic, defined by the formula <div style="white-space: pre"><code>
365 * R<sup>2</sup><sub>adj</sub> = 1 - [SSR (n - 1)] / [SSTO (n - p)]
366 * </code></div>
367 * where SSR is the sum of squared residuals},
368 * SSTO is the total sum of squares}, n is the number
369 * of observations and p is the number of parameters estimated (including the intercept).
370 *
371 * <p>If the regression is estimated without an intercept term, what is returned is <pre>
372 * <code> 1 - (1 - {@link #getRSquared()} ) * (n / (n - p)) </code>
373 * </pre>
374 *
375 * @return adjusted R-Squared statistic
376 */
377 public double getAdjustedRSquared() {
378 return this.globalFitInfo[ ADJRSQ_IDX];
379 }
380
381 /**
382 * Returns true if the regression model has been computed including an intercept.
383 * In this case, the coefficient of the intercept is the first element of the
384 * {@link #getParameterEstimates() parameter estimates}.
385 * @return true if the model has an intercept term
386 */
387 public boolean hasIntercept() {
388 return this.containsConstant;
389 }
390
391 /**
392 * Gets the i-jth element of the variance-covariance matrix.
393 *
394 * @param i first variable index
395 * @param j second variable index
396 * @return the requested variance-covariance matrix entry
397 */
398 private double getVcvElement(int i, int j) {
399 if (this.isSymmetricVCD) {
400 if (this.varCovData.length > 1) {
401 //could be stored in upper or lower triangular
402 if (i == j) {
403 return varCovData[i][i];
404 } else if (i >= varCovData[j].length) {
405 return varCovData[i][j];
406 } else {
407 return varCovData[j][i];
408 }
409 } else {//could be in single array
410 if (i > j) {
411 return varCovData[0][(i + 1) * i / 2 + j];
412 } else {
413 return varCovData[0][(j + 1) * j / 2 + i];
414 }
415 }
416 } else {
417 return this.varCovData[i][j];
418 }
419 }
420 }