001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.math3.stat.regression;
018
019import java.io.Serializable;
020import java.util.Arrays;
021import org.apache.commons.math3.util.FastMath;
022import org.apache.commons.math3.util.MathArrays;
023import org.apache.commons.math3.exception.OutOfRangeException;
024
025/**
026 * Results of a Multiple Linear Regression model fit.
027 *
028 * @since 3.0
029 */
030public class RegressionResults implements Serializable {
031
032    /** INDEX of Sum of Squared Errors */
033    private static final int SSE_IDX = 0;
034    /** INDEX of Sum of Squares of Model */
035    private static final int SST_IDX = 1;
036    /** INDEX of R-Squared of regression */
037    private static final int RSQ_IDX = 2;
038    /** INDEX of Mean Squared Error */
039    private static final int MSE_IDX = 3;
040    /** INDEX of Adjusted R Squared */
041    private static final int ADJRSQ_IDX = 4;
042    /** UID */
043    private static final long serialVersionUID = 1l;
044    /** regression slope parameters */
045    private final double[] parameters;
046    /** variance covariance matrix of parameters */
047    private final double[][] varCovData;
048    /** boolean flag for variance covariance matrix in symm compressed storage */
049    private final boolean isSymmetricVCD;
050    /** rank of the solution */
051    @SuppressWarnings("unused")
052    private final int rank;
053    /** number of observations on which results are based */
054    private final long nobs;
055    /** boolean flag indicator of whether a constant was included*/
056    private final boolean containsConstant;
057    /** array storing global results, SSE, MSE, RSQ, adjRSQ */
058    private final double[] globalFitInfo;
059
060    /**
061     *  Set the default constructor to private access
062     *  to prevent inadvertent instantiation
063     */
064    @SuppressWarnings("unused")
065    private RegressionResults() {
066        this.parameters = null;
067        this.varCovData = null;
068        this.rank = -1;
069        this.nobs = -1;
070        this.containsConstant = false;
071        this.isSymmetricVCD = false;
072        this.globalFitInfo = null;
073    }
074
075    /**
076     * Constructor for Regression Results.
077     *
078     * @param parameters a double array with the regression slope estimates
079     * @param varcov the variance covariance matrix, stored either in a square matrix
080     * or as a compressed
081     * @param isSymmetricCompressed a flag which denotes that the variance covariance
082     * matrix is in symmetric compressed format
083     * @param nobs the number of observations of the regression estimation
084     * @param rank the number of independent variables in the regression
085     * @param sumy the sum of the independent variable
086     * @param sumysq the sum of the squared independent variable
087     * @param sse sum of squared errors
088     * @param containsConstant true model has constant,  false model does not have constant
089     * @param copyData if true a deep copy of all input data is made, if false only references
090     * are copied and the RegressionResults become mutable
091     */
092    public RegressionResults(
093            final double[] parameters, final double[][] varcov,
094            final boolean isSymmetricCompressed,
095            final long nobs, final int rank,
096            final double sumy, final double sumysq, final double sse,
097            final boolean containsConstant,
098            final boolean copyData) {
099        if (copyData) {
100            this.parameters = MathArrays.copyOf(parameters);
101            this.varCovData = new double[varcov.length][];
102            for (int i = 0; i < varcov.length; i++) {
103                this.varCovData[i] = MathArrays.copyOf(varcov[i]);
104            }
105        } else {
106            this.parameters = parameters;
107            this.varCovData = varcov;
108        }
109        this.isSymmetricVCD = isSymmetricCompressed;
110        this.nobs = nobs;
111        this.rank = rank;
112        this.containsConstant = containsConstant;
113        this.globalFitInfo = new double[5];
114        Arrays.fill(this.globalFitInfo, Double.NaN);
115
116        if (rank > 0) {
117            this.globalFitInfo[SST_IDX] = containsConstant ?
118                    (sumysq - sumy * sumy / nobs) : sumysq;
119        }
120
121        this.globalFitInfo[SSE_IDX] = sse;
122        this.globalFitInfo[MSE_IDX] = this.globalFitInfo[SSE_IDX] /
123                (nobs - rank);
124        this.globalFitInfo[RSQ_IDX] = 1.0 -
125                this.globalFitInfo[SSE_IDX] /
126                this.globalFitInfo[SST_IDX];
127
128        if (!containsConstant) {
129            this.globalFitInfo[ADJRSQ_IDX] = 1.0-
130                    (1.0 - this.globalFitInfo[RSQ_IDX]) *
131                    ( (double) nobs / ( (double) (nobs - rank)));
132        } else {
133            this.globalFitInfo[ADJRSQ_IDX] = 1.0 - (sse * (nobs - 1.0)) /
134                    (globalFitInfo[SST_IDX] * (nobs - rank));
135        }
136    }
137
138    /**
139     * <p>Returns the parameter estimate for the regressor at the given index.</p>
140     *
141     * <p>A redundant regressor will have its redundancy flag set, as well as
142     *  a parameters estimated equal to {@code Double.NaN}</p>
143     *
144     * @param index Index.
145     * @return the parameters estimated for regressor at index.
146     * @throws OutOfRangeException if {@code index} is not in the interval
147     * {@code [0, number of parameters)}.
148     */
149    public double getParameterEstimate(int index) throws OutOfRangeException {
150        if (parameters == null) {
151            return Double.NaN;
152        }
153        if (index < 0 || index >= this.parameters.length) {
154            throw new OutOfRangeException(index, 0, this.parameters.length - 1);
155        }
156        return this.parameters[index];
157    }
158
159    /**
160     * <p>Returns a copy of the regression parameters estimates.</p>
161     *
162     * <p>The parameter estimates are returned in the natural order of the data.</p>
163     *
164     * <p>A redundant regressor will have its redundancy flag set, as will
165     *  a parameter estimate equal to {@code Double.NaN}.</p>
166     *
167     * @return array of parameter estimates, null if no estimation occurred
168     */
169    public double[] getParameterEstimates() {
170        if (this.parameters == null) {
171            return null;
172        }
173        return MathArrays.copyOf(parameters);
174    }
175
176    /**
177     * Returns the <a href="http://www.xycoon.com/standerrorb(1).htm">standard
178     * error of the parameter estimate at index</a>,
179     * usually denoted s(b<sub>index</sub>).
180     *
181     * @param index Index.
182     * @return the standard errors associated with parameters estimated at index.
183     * @throws OutOfRangeException if {@code index} is not in the interval
184     * {@code [0, number of parameters)}.
185     */
186    public double getStdErrorOfEstimate(int index) throws OutOfRangeException {
187        if (parameters == null) {
188            return Double.NaN;
189        }
190        if (index < 0 || index >= this.parameters.length) {
191            throw new OutOfRangeException(index, 0, this.parameters.length - 1);
192        }
193        double var = this.getVcvElement(index, index);
194        if (!Double.isNaN(var) && var > Double.MIN_VALUE) {
195            return FastMath.sqrt(var);
196        }
197        return Double.NaN;
198    }
199
200    /**
201     * <p>Returns the <a href="http://www.xycoon.com/standerrorb(1).htm">standard
202     * error of the parameter estimates</a>,
203     * usually denoted s(b<sub>i</sub>).</p>
204     *
205     * <p>If there are problems with an ill conditioned design matrix then the regressor
206     * which is redundant will be assigned <code>Double.NaN</code>. </p>
207     *
208     * @return an array standard errors associated with parameters estimates,
209     *  null if no estimation occurred
210     */
211    public double[] getStdErrorOfEstimates() {
212        if (parameters == null) {
213            return null;
214        }
215        double[] se = new double[this.parameters.length];
216        for (int i = 0; i < this.parameters.length; i++) {
217            double var = this.getVcvElement(i, i);
218            if (!Double.isNaN(var) && var > Double.MIN_VALUE) {
219                se[i] = FastMath.sqrt(var);
220                continue;
221            }
222            se[i] = Double.NaN;
223        }
224        return se;
225    }
226
227    /**
228     * <p>Returns the covariance between regression parameters i and j.</p>
229     *
230     * <p>If there are problems with an ill conditioned design matrix then the covariance
231     * which involves redundant columns will be assigned {@code Double.NaN}. </p>
232     *
233     * @param i {@code i}th regression parameter.
234     * @param j {@code j}th regression parameter.
235     * @return the covariance of the parameter estimates.
236     * @throws OutOfRangeException if {@code i} or {@code j} is not in the
237     * interval {@code [0, number of parameters)}.
238     */
239    public double getCovarianceOfParameters(int i, int j) throws OutOfRangeException {
240        if (parameters == null) {
241            return Double.NaN;
242        }
243        if (i < 0 || i >= this.parameters.length) {
244            throw new OutOfRangeException(i, 0, this.parameters.length - 1);
245        }
246        if (j < 0 || j >= this.parameters.length) {
247            throw new OutOfRangeException(j, 0, this.parameters.length - 1);
248        }
249        return this.getVcvElement(i, j);
250    }
251
252    /**
253     * <p>Returns the number of parameters estimated in the model.</p>
254     *
255     * <p>This is the maximum number of regressors, some techniques may drop
256     * redundant parameters</p>
257     *
258     * @return number of regressors, -1 if not estimated
259     */
260    public int getNumberOfParameters() {
261        if (this.parameters == null) {
262            return -1;
263        }
264        return this.parameters.length;
265    }
266
267    /**
268     * Returns the number of observations added to the regression model.
269     *
270     * @return Number of observations, -1 if an error condition prevents estimation
271     */
272    public long getN() {
273        return this.nobs;
274    }
275
276    /**
277     * <p>Returns the sum of squared deviations of the y values about their mean.</p>
278     *
279     * <p>This is defined as SSTO
280     * <a href="http://www.xycoon.com/SumOfSquares.htm">here</a>.</p>
281     *
282     * <p>If {@code n < 2}, this returns {@code Double.NaN}.</p>
283     *
284     * @return sum of squared deviations of y values
285     */
286    public double getTotalSumSquares() {
287        return this.globalFitInfo[SST_IDX];
288    }
289
290    /**
291     * <p>Returns the sum of squared deviations of the predicted y values about
292     * their mean (which equals the mean of y).</p>
293     *
294     * <p>This is usually abbreviated SSR or SSM.  It is defined as SSM
295     * <a href="http://www.xycoon.com/SumOfSquares.htm">here</a></p>
296     *
297     * <p><strong>Preconditions</strong>: <ul>
298     * <li>At least two observations (with at least two different x values)
299     * must have been added before invoking this method. If this method is
300     * invoked before a model can be estimated, <code>Double.NaN</code> is
301     * returned.
302     * </li></ul></p>
303     *
304     * @return sum of squared deviations of predicted y values
305     */
306    public double getRegressionSumSquares() {
307        return this.globalFitInfo[SST_IDX] - this.globalFitInfo[SSE_IDX];
308    }
309
310    /**
311     * <p>Returns the <a href="http://www.xycoon.com/SumOfSquares.htm">
312     * sum of squared errors</a> (SSE) associated with the regression
313     * model.</p>
314     *
315     * <p>The return value is constrained to be non-negative - i.e., if due to
316     * rounding errors the computational formula returns a negative result,
317     * 0 is returned.</p>
318     *
319     * <p><strong>Preconditions</strong>: <ul>
320     * <li>numberOfParameters data pairs
321     * must have been added before invoking this method. If this method is
322     * invoked before a model can be estimated, <code>Double,NaN</code> is
323     * returned.
324     * </li></ul></p>
325     *
326     * @return sum of squared errors associated with the regression model
327     */
328    public double getErrorSumSquares() {
329        return this.globalFitInfo[ SSE_IDX];
330    }
331
332    /**
333     * <p>Returns the sum of squared errors divided by the degrees of freedom,
334     * usually abbreviated MSE.</p>
335     *
336     * <p>If there are fewer than <strong>numberOfParameters + 1</strong> data pairs in the model,
337     * or if there is no variation in <code>x</code>, this returns
338     * <code>Double.NaN</code>.</p>
339     *
340     * @return sum of squared deviations of y values
341     */
342    public double getMeanSquareError() {
343        return this.globalFitInfo[ MSE_IDX];
344    }
345
346    /**
347     * <p>Returns the <a href="http://www.xycoon.com/coefficient1.htm">
348     * coefficient of multiple determination</a>,
349     * usually denoted r-square.</p>
350     *
351     * <p><strong>Preconditions</strong>: <ul>
352     * <li>At least numberOfParameters observations (with at least numberOfParameters different x values)
353     * must have been added before invoking this method. If this method is
354     * invoked before a model can be estimated, {@code Double,NaN} is
355     * returned.
356     * </li></ul></p>
357     *
358     * @return r-square, a double in the interval [0, 1]
359     */
360    public double getRSquared() {
361        return this.globalFitInfo[ RSQ_IDX];
362    }
363
364    /**
365     * <p>Returns the adjusted R-squared statistic, defined by the formula <pre>
366     * R<sup>2</sup><sub>adj</sub> = 1 - [SSR (n - 1)] / [SSTO (n - p)]
367     * </pre>
368     * where SSR is the sum of squared residuals},
369     * SSTO is the total sum of squares}, n is the number
370     * of observations and p is the number of parameters estimated (including the intercept).</p>
371     *
372     * <p>If the regression is estimated without an intercept term, what is returned is <pre>
373     * <code> 1 - (1 - {@link #getRSquared()} ) * (n / (n - p)) </code>
374     * </pre></p>
375     *
376     * @return adjusted R-Squared statistic
377     */
378    public double getAdjustedRSquared() {
379        return this.globalFitInfo[ ADJRSQ_IDX];
380    }
381
382    /**
383     * Returns true if the regression model has been computed including an intercept.
384     * In this case, the coefficient of the intercept is the first element of the
385     * {@link #getParameterEstimates() parameter estimates}.
386     * @return true if the model has an intercept term
387     */
388    public boolean hasIntercept() {
389        return this.containsConstant;
390    }
391
392    /**
393     * Gets the i-jth element of the variance-covariance matrix.
394     *
395     * @param i first variable index
396     * @param j second variable index
397     * @return the requested variance-covariance matrix entry
398     */
399    private double getVcvElement(int i, int j) {
400        if (this.isSymmetricVCD) {
401            if (this.varCovData.length > 1) {
402                //could be stored in upper or lower triangular
403                if (i == j) {
404                    return varCovData[i][i];
405                } else if (i >= varCovData[j].length) {
406                    return varCovData[i][j];
407                } else {
408                    return varCovData[j][i];
409                }
410            } else {//could be in single array
411                if (i > j) {
412                    return varCovData[0][(i + 1) * i / 2 + j];
413                } else {
414                    return varCovData[0][(j + 1) * j / 2 + i];
415                }
416            }
417        } else {
418            return this.varCovData[i][j];
419        }
420    }
421}