View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.math4.legacy.stat.regression;
18  
19  import java.util.Arrays;
20  
21  import org.apache.commons.math4.legacy.exception.OutOfRangeException;
22  import org.apache.commons.math4.core.jdkmath.JdkMath;
23  
24  /**
25   * Results of a Multiple Linear Regression model fit.
26   *
27   * @since 3.0
28   */
29  public class RegressionResults {
30  
31      /** INDEX of Sum of Squared Errors. */
32      private static final int SSE_IDX = 0;
33      /** INDEX of Sum of Squares of Model. */
34      private static final int SST_IDX = 1;
35      /** INDEX of R-Squared of regression. */
36      private static final int RSQ_IDX = 2;
37      /** INDEX of Mean Squared Error. */
38      private static final int MSE_IDX = 3;
39      /** INDEX of Adjusted R Squared. */
40      private static final int ADJRSQ_IDX = 4;
41      /** UID. */
42      private static final long serialVersionUID = 1L;
43      /** regression slope parameters. */
44      private final double[] parameters;
45      /** variance covariance matrix of parameters. */
46      private final double[][] varCovData;
47      /** boolean flag for variance covariance matrix in symm compressed storage. */
48      private final boolean isSymmetricVCD;
49      /** rank of the solution. */
50      @SuppressWarnings("unused")
51      private final int rank;
52      /** number of observations on which results are based. */
53      private final long nobs;
54      /** boolean flag indicator of whether a constant was included. */
55      private final boolean containsConstant;
56      /** array storing global results, SSE, MSE, RSQ, adjRSQ. */
57      private final double[] globalFitInfo;
58  
59      /**
60       *  Set the default constructor to private access.
61       *  to prevent inadvertent instantiation
62       */
63      @SuppressWarnings("unused")
64      private RegressionResults() {
65          this.parameters = null;
66          this.varCovData = null;
67          this.rank = -1;
68          this.nobs = -1;
69          this.containsConstant = false;
70          this.isSymmetricVCD = false;
71          this.globalFitInfo = null;
72      }
73  
74      /**
75       * Constructor for Regression Results.
76       *
77       * @param parameters a double array with the regression slope estimates
78       * @param varcov the variance covariance matrix, stored either in a square matrix
79       * or as a compressed
80       * @param isSymmetricCompressed a flag which denotes that the variance covariance
81       * matrix is in symmetric compressed format
82       * @param nobs the number of observations of the regression estimation
83       * @param rank the number of independent variables in the regression
84       * @param sumy the sum of the independent variable
85       * @param sumysq the sum of the squared independent variable
86       * @param sse sum of squared errors
87       * @param containsConstant true model has constant,  false model does not have constant
88       * @param copyData if true a deep copy of all input data is made, if false only references
89       * are copied and the RegressionResults become mutable
90       */
91      public RegressionResults(
92              final double[] parameters, final double[][] varcov,
93              final boolean isSymmetricCompressed,
94              final long nobs, final int rank,
95              final double sumy, final double sumysq, final double sse,
96              final boolean containsConstant,
97              final boolean copyData) {
98          if (copyData) {
99              this.parameters = Arrays.copyOf(parameters, parameters.length);
100             this.varCovData = new double[varcov.length][];
101             for (int i = 0; i < varcov.length; i++) {
102                 this.varCovData[i] = Arrays.copyOf(varcov[i], varcov[i].length);
103             }
104         } else {
105             this.parameters = parameters;
106             this.varCovData = varcov;
107         }
108         this.isSymmetricVCD = isSymmetricCompressed;
109         this.nobs = nobs;
110         this.rank = rank;
111         this.containsConstant = containsConstant;
112         this.globalFitInfo = new double[5];
113         Arrays.fill(this.globalFitInfo, Double.NaN);
114 
115         if (rank > 0) {
116             this.globalFitInfo[SST_IDX] = containsConstant ?
117                     (sumysq - sumy * sumy / nobs) : sumysq;
118         }
119 
120         this.globalFitInfo[SSE_IDX] = sse;
121         this.globalFitInfo[MSE_IDX] = this.globalFitInfo[SSE_IDX] /
122                 (nobs - rank);
123         this.globalFitInfo[RSQ_IDX] = 1.0 -
124                 this.globalFitInfo[SSE_IDX] /
125                 this.globalFitInfo[SST_IDX];
126 
127         if (!containsConstant) {
128             this.globalFitInfo[ADJRSQ_IDX] = 1.0-
129                     (1.0 - this.globalFitInfo[RSQ_IDX]) *
130                     ( (double) nobs / ( (double) (nobs - rank)));
131         } else {
132             this.globalFitInfo[ADJRSQ_IDX] = 1.0 - (sse * (nobs - 1.0)) /
133                     (globalFitInfo[SST_IDX] * (nobs - rank));
134         }
135     }
136 
137     /**
138      * <p>Returns the parameter estimate for the regressor at the given index.</p>
139      *
140      * <p>A redundant regressor will have its redundancy flag set, as well as
141      *  a parameters estimated equal to {@code Double.NaN}</p>
142      *
143      * @param index Index.
144      * @return the parameters estimated for regressor at index.
145      * @throws OutOfRangeException if {@code index} is not in the interval
146      * {@code [0, number of parameters)}.
147      */
148     public double getParameterEstimate(int index) throws OutOfRangeException {
149         if (parameters == null) {
150             return Double.NaN;
151         }
152         if (index < 0 || index >= this.parameters.length) {
153             throw new OutOfRangeException(index, 0, this.parameters.length - 1);
154         }
155         return this.parameters[index];
156     }
157 
158     /**
159      * <p>Returns a copy of the regression parameters estimates.</p>
160      *
161      * <p>The parameter estimates are returned in the natural order of the data.</p>
162      *
163      * <p>A redundant regressor will have its redundancy flag set, as will
164      *  a parameter estimate equal to {@code Double.NaN}.</p>
165      *
166      * @return array of parameter estimates, null if no estimation occurred
167      */
168     public double[] getParameterEstimates() {
169         if (this.parameters == null) {
170             return null;
171         }
172         return Arrays.copyOf(parameters, parameters.length);
173     }
174 
175     /**
176      * Returns the <a href="http://www.xycoon.com/standerrorb(1).htm">standard
177      * error of the parameter estimate at index</a>,
178      * usually denoted s(b<sub>index</sub>).
179      *
180      * @param index Index.
181      * @return the standard errors associated with parameters estimated at index.
182      * @throws OutOfRangeException if {@code index} is not in the interval
183      * {@code [0, number of parameters)}.
184      */
185     public double getStdErrorOfEstimate(int index) throws OutOfRangeException {
186         if (parameters == null) {
187             return Double.NaN;
188         }
189         if (index < 0 || index >= this.parameters.length) {
190             throw new OutOfRangeException(index, 0, this.parameters.length - 1);
191         }
192         double var = this.getVcvElement(index, index);
193         if (!Double.isNaN(var) && var > Double.MIN_VALUE) {
194             return JdkMath.sqrt(var);
195         }
196         return Double.NaN;
197     }
198 
199     /**
200      * <p>Returns the <a href="http://www.xycoon.com/standerrorb(1).htm">standard
201      * error of the parameter estimates</a>,
202      * usually denoted s(b<sub>i</sub>).</p>
203      *
204      * <p>If there are problems with an ill conditioned design matrix then the regressor
205      * which is redundant will be assigned <code>Double.NaN</code>. </p>
206      *
207      * @return an array standard errors associated with parameters estimates,
208      *  null if no estimation occurred
209      */
210     public double[] getStdErrorOfEstimates() {
211         if (parameters == null) {
212             return null;
213         }
214         double[] se = new double[this.parameters.length];
215         for (int i = 0; i < this.parameters.length; i++) {
216             double var = this.getVcvElement(i, i);
217             if (!Double.isNaN(var) && var > Double.MIN_VALUE) {
218                 se[i] = JdkMath.sqrt(var);
219                 continue;
220             }
221             se[i] = Double.NaN;
222         }
223         return se;
224     }
225 
226     /**
227      * <p>Returns the covariance between regression parameters i and j.</p>
228      *
229      * <p>If there are problems with an ill conditioned design matrix then the covariance
230      * which involves redundant columns will be assigned {@code Double.NaN}. </p>
231      *
232      * @param i {@code i}th regression parameter.
233      * @param j {@code j}th regression parameter.
234      * @return the covariance of the parameter estimates.
235      * @throws OutOfRangeException if {@code i} or {@code j} is not in the
236      * interval {@code [0, number of parameters)}.
237      */
238     public double getCovarianceOfParameters(int i, int j) throws OutOfRangeException {
239         if (parameters == null) {
240             return Double.NaN;
241         }
242         if (i < 0 || i >= this.parameters.length) {
243             throw new OutOfRangeException(i, 0, this.parameters.length - 1);
244         }
245         if (j < 0 || j >= this.parameters.length) {
246             throw new OutOfRangeException(j, 0, this.parameters.length - 1);
247         }
248         return this.getVcvElement(i, j);
249     }
250 
251     /**
252      * <p>Returns the number of parameters estimated in the model.</p>
253      *
254      * <p>This is the maximum number of regressors, some techniques may drop
255      * redundant parameters</p>
256      *
257      * @return number of regressors, -1 if not estimated
258      */
259     public int getNumberOfParameters() {
260         if (this.parameters == null) {
261             return -1;
262         }
263         return this.parameters.length;
264     }
265 
266     /**
267      * Returns the number of observations added to the regression model.
268      *
269      * @return Number of observations, -1 if an error condition prevents estimation
270      */
271     public long getN() {
272         return this.nobs;
273     }
274 
275     /**
276      * <p>Returns the sum of squared deviations of the y values about their mean.</p>
277      *
278      * <p>This is defined as SSTO
279      * <a href="http://www.xycoon.com/SumOfSquares.htm">here</a>.</p>
280      *
281      * <p>If {@code n < 2}, this returns {@code Double.NaN}.</p>
282      *
283      * @return sum of squared deviations of y values
284      */
285     public double getTotalSumSquares() {
286         return this.globalFitInfo[SST_IDX];
287     }
288 
289     /**
290      * <p>Returns the sum of squared deviations of the predicted y values about
291      * their mean (which equals the mean of y).</p>
292      *
293      * <p>This is usually abbreviated SSR or SSM.  It is defined as SSM
294      * <a href="http://www.xycoon.com/SumOfSquares.htm">here</a></p>
295      *
296      * <p><strong>Preconditions</strong>: <ul>
297      * <li>At least two observations (with at least two different x values)
298      * must have been added before invoking this method. If this method is
299      * invoked before a model can be estimated, <code>Double.NaN</code> is
300      * returned.
301      * </li></ul>
302      *
303      * @return sum of squared deviations of predicted y values
304      */
305     public double getRegressionSumSquares() {
306         return this.globalFitInfo[SST_IDX] - this.globalFitInfo[SSE_IDX];
307     }
308 
309     /**
310      * <p>Returns the <a href="http://www.xycoon.com/SumOfSquares.htm">
311      * sum of squared errors</a> (SSE) associated with the regression
312      * model.</p>
313      *
314      * <p>The return value is constrained to be non-negative - i.e., if due to
315      * rounding errors the computational formula returns a negative result,
316      * 0 is returned.</p>
317      *
318      * <p><strong>Preconditions</strong>: <ul>
319      * <li>numberOfParameters data pairs
320      * must have been added before invoking this method. If this method is
321      * invoked before a model can be estimated, <code>Double,NaN</code> is
322      * returned.
323      * </li></ul>
324      *
325      * @return sum of squared errors associated with the regression model
326      */
327     public double getErrorSumSquares() {
328         return this.globalFitInfo[ SSE_IDX];
329     }
330 
331     /**
332      * <p>Returns the sum of squared errors divided by the degrees of freedom,
333      * usually abbreviated MSE.</p>
334      *
335      * <p>If there are fewer than <strong>numberOfParameters + 1</strong> data pairs in the model,
336      * or if there is no variation in <code>x</code>, this returns
337      * <code>Double.NaN</code>.</p>
338      *
339      * @return sum of squared deviations of y values
340      */
341     public double getMeanSquareError() {
342         return this.globalFitInfo[ MSE_IDX];
343     }
344 
345     /**
346      * <p>Returns the <a href="http://www.xycoon.com/coefficient1.htm">
347      * coefficient of multiple determination</a>,
348      * usually denoted r-square.</p>
349      *
350      * <p><strong>Preconditions</strong>: <ul>
351      * <li>At least numberOfParameters observations (with at least numberOfParameters different x values)
352      * must have been added before invoking this method. If this method is
353      * invoked before a model can be estimated, {@code Double,NaN} is
354      * returned.
355      * </li></ul>
356      *
357      * @return r-square, a double in the interval [0, 1]
358      */
359     public double getRSquared() {
360         return this.globalFitInfo[ RSQ_IDX];
361     }
362 
363     /**
364      * <p>Returns the adjusted R-squared statistic, defined by the formula <div style="white-space: pre"><code>
365      * R<sup>2</sup><sub>adj</sub> = 1 - [SSR (n - 1)] / [SSTO (n - p)]
366      * </code></div>
367      * where SSR is the sum of squared residuals},
368      * SSTO is the total sum of squares}, n is the number
369      * of observations and p is the number of parameters estimated (including the intercept).
370      *
371      * <p>If the regression is estimated without an intercept term, what is returned is <pre>
372      * <code> 1 - (1 - {@link #getRSquared()} ) * (n / (n - p)) </code>
373      * </pre>
374      *
375      * @return adjusted R-Squared statistic
376      */
377     public double getAdjustedRSquared() {
378         return this.globalFitInfo[ ADJRSQ_IDX];
379     }
380 
381     /**
382      * Returns true if the regression model has been computed including an intercept.
383      * In this case, the coefficient of the intercept is the first element of the
384      * {@link #getParameterEstimates() parameter estimates}.
385      * @return true if the model has an intercept term
386      */
387     public boolean hasIntercept() {
388         return this.containsConstant;
389     }
390 
391     /**
392      * Gets the i-jth element of the variance-covariance matrix.
393      *
394      * @param i first variable index
395      * @param j second variable index
396      * @return the requested variance-covariance matrix entry
397      */
398     private double getVcvElement(int i, int j) {
399         if (this.isSymmetricVCD) {
400             if (this.varCovData.length > 1) {
401                 //could be stored in upper or lower triangular
402                 if (i == j) {
403                     return varCovData[i][i];
404                 } else if (i >= varCovData[j].length) {
405                     return varCovData[i][j];
406                 } else {
407                     return varCovData[j][i];
408                 }
409             } else {//could be in single array
410                 if (i > j) {
411                     return varCovData[0][(i + 1) * i / 2 + j];
412                 } else {
413                     return varCovData[0][(j + 1) * j / 2 + i];
414                 }
415             }
416         } else {
417             return this.varCovData[i][j];
418         }
419     }
420 }