001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.math4.legacy.stat.regression;
018
019import java.util.Arrays;
020
021import org.apache.commons.math4.legacy.exception.OutOfRangeException;
022import org.apache.commons.math4.core.jdkmath.JdkMath;
023
024/**
025 * Results of a Multiple Linear Regression model fit.
026 *
027 * @since 3.0
028 */
029public class RegressionResults {
030
031    /** INDEX of Sum of Squared Errors. */
032    private static final int SSE_IDX = 0;
033    /** INDEX of Sum of Squares of Model. */
034    private static final int SST_IDX = 1;
035    /** INDEX of R-Squared of regression. */
036    private static final int RSQ_IDX = 2;
037    /** INDEX of Mean Squared Error. */
038    private static final int MSE_IDX = 3;
039    /** INDEX of Adjusted R Squared. */
040    private static final int ADJRSQ_IDX = 4;
041    /** UID. */
042    private static final long serialVersionUID = 1L;
043    /** regression slope parameters. */
044    private final double[] parameters;
045    /** variance covariance matrix of parameters. */
046    private final double[][] varCovData;
047    /** boolean flag for variance covariance matrix in symm compressed storage. */
048    private final boolean isSymmetricVCD;
049    /** rank of the solution. */
050    @SuppressWarnings("unused")
051    private final int rank;
052    /** number of observations on which results are based. */
053    private final long nobs;
054    /** boolean flag indicator of whether a constant was included. */
055    private final boolean containsConstant;
056    /** array storing global results, SSE, MSE, RSQ, adjRSQ. */
057    private final double[] globalFitInfo;
058
059    /**
060     *  Set the default constructor to private access.
061     *  to prevent inadvertent instantiation
062     */
063    @SuppressWarnings("unused")
064    private RegressionResults() {
065        this.parameters = null;
066        this.varCovData = null;
067        this.rank = -1;
068        this.nobs = -1;
069        this.containsConstant = false;
070        this.isSymmetricVCD = false;
071        this.globalFitInfo = null;
072    }
073
074    /**
075     * Constructor for Regression Results.
076     *
077     * @param parameters a double array with the regression slope estimates
078     * @param varcov the variance covariance matrix, stored either in a square matrix
079     * or as a compressed
080     * @param isSymmetricCompressed a flag which denotes that the variance covariance
081     * matrix is in symmetric compressed format
082     * @param nobs the number of observations of the regression estimation
083     * @param rank the number of independent variables in the regression
084     * @param sumy the sum of the independent variable
085     * @param sumysq the sum of the squared independent variable
086     * @param sse sum of squared errors
087     * @param containsConstant true model has constant,  false model does not have constant
088     * @param copyData if true a deep copy of all input data is made, if false only references
089     * are copied and the RegressionResults become mutable
090     */
091    public RegressionResults(
092            final double[] parameters, final double[][] varcov,
093            final boolean isSymmetricCompressed,
094            final long nobs, final int rank,
095            final double sumy, final double sumysq, final double sse,
096            final boolean containsConstant,
097            final boolean copyData) {
098        if (copyData) {
099            this.parameters = Arrays.copyOf(parameters, parameters.length);
100            this.varCovData = new double[varcov.length][];
101            for (int i = 0; i < varcov.length; i++) {
102                this.varCovData[i] = Arrays.copyOf(varcov[i], varcov[i].length);
103            }
104        } else {
105            this.parameters = parameters;
106            this.varCovData = varcov;
107        }
108        this.isSymmetricVCD = isSymmetricCompressed;
109        this.nobs = nobs;
110        this.rank = rank;
111        this.containsConstant = containsConstant;
112        this.globalFitInfo = new double[5];
113        Arrays.fill(this.globalFitInfo, Double.NaN);
114
115        if (rank > 0) {
116            this.globalFitInfo[SST_IDX] = containsConstant ?
117                    (sumysq - sumy * sumy / nobs) : sumysq;
118        }
119
120        this.globalFitInfo[SSE_IDX] = sse;
121        this.globalFitInfo[MSE_IDX] = this.globalFitInfo[SSE_IDX] /
122                (nobs - rank);
123        this.globalFitInfo[RSQ_IDX] = 1.0 -
124                this.globalFitInfo[SSE_IDX] /
125                this.globalFitInfo[SST_IDX];
126
127        if (!containsConstant) {
128            this.globalFitInfo[ADJRSQ_IDX] = 1.0-
129                    (1.0 - this.globalFitInfo[RSQ_IDX]) *
130                    ( (double) nobs / ( (double) (nobs - rank)));
131        } else {
132            this.globalFitInfo[ADJRSQ_IDX] = 1.0 - (sse * (nobs - 1.0)) /
133                    (globalFitInfo[SST_IDX] * (nobs - rank));
134        }
135    }
136
137    /**
138     * <p>Returns the parameter estimate for the regressor at the given index.</p>
139     *
140     * <p>A redundant regressor will have its redundancy flag set, as well as
141     *  a parameters estimated equal to {@code Double.NaN}</p>
142     *
143     * @param index Index.
144     * @return the parameters estimated for regressor at index.
145     * @throws OutOfRangeException if {@code index} is not in the interval
146     * {@code [0, number of parameters)}.
147     */
148    public double getParameterEstimate(int index) throws OutOfRangeException {
149        if (parameters == null) {
150            return Double.NaN;
151        }
152        if (index < 0 || index >= this.parameters.length) {
153            throw new OutOfRangeException(index, 0, this.parameters.length - 1);
154        }
155        return this.parameters[index];
156    }
157
158    /**
159     * <p>Returns a copy of the regression parameters estimates.</p>
160     *
161     * <p>The parameter estimates are returned in the natural order of the data.</p>
162     *
163     * <p>A redundant regressor will have its redundancy flag set, as will
164     *  a parameter estimate equal to {@code Double.NaN}.</p>
165     *
166     * @return array of parameter estimates, null if no estimation occurred
167     */
168    public double[] getParameterEstimates() {
169        if (this.parameters == null) {
170            return null;
171        }
172        return Arrays.copyOf(parameters, parameters.length);
173    }
174
175    /**
176     * Returns the <a href="http://www.xycoon.com/standerrorb(1).htm">standard
177     * error of the parameter estimate at index</a>,
178     * usually denoted s(b<sub>index</sub>).
179     *
180     * @param index Index.
181     * @return the standard errors associated with parameters estimated at index.
182     * @throws OutOfRangeException if {@code index} is not in the interval
183     * {@code [0, number of parameters)}.
184     */
185    public double getStdErrorOfEstimate(int index) throws OutOfRangeException {
186        if (parameters == null) {
187            return Double.NaN;
188        }
189        if (index < 0 || index >= this.parameters.length) {
190            throw new OutOfRangeException(index, 0, this.parameters.length - 1);
191        }
192        double var = this.getVcvElement(index, index);
193        if (!Double.isNaN(var) && var > Double.MIN_VALUE) {
194            return JdkMath.sqrt(var);
195        }
196        return Double.NaN;
197    }
198
199    /**
200     * <p>Returns the <a href="http://www.xycoon.com/standerrorb(1).htm">standard
201     * error of the parameter estimates</a>,
202     * usually denoted s(b<sub>i</sub>).</p>
203     *
204     * <p>If there are problems with an ill conditioned design matrix then the regressor
205     * which is redundant will be assigned <code>Double.NaN</code>. </p>
206     *
207     * @return an array standard errors associated with parameters estimates,
208     *  null if no estimation occurred
209     */
210    public double[] getStdErrorOfEstimates() {
211        if (parameters == null) {
212            return null;
213        }
214        double[] se = new double[this.parameters.length];
215        for (int i = 0; i < this.parameters.length; i++) {
216            double var = this.getVcvElement(i, i);
217            if (!Double.isNaN(var) && var > Double.MIN_VALUE) {
218                se[i] = JdkMath.sqrt(var);
219                continue;
220            }
221            se[i] = Double.NaN;
222        }
223        return se;
224    }
225
226    /**
227     * <p>Returns the covariance between regression parameters i and j.</p>
228     *
229     * <p>If there are problems with an ill conditioned design matrix then the covariance
230     * which involves redundant columns will be assigned {@code Double.NaN}. </p>
231     *
232     * @param i {@code i}th regression parameter.
233     * @param j {@code j}th regression parameter.
234     * @return the covariance of the parameter estimates.
235     * @throws OutOfRangeException if {@code i} or {@code j} is not in the
236     * interval {@code [0, number of parameters)}.
237     */
238    public double getCovarianceOfParameters(int i, int j) throws OutOfRangeException {
239        if (parameters == null) {
240            return Double.NaN;
241        }
242        if (i < 0 || i >= this.parameters.length) {
243            throw new OutOfRangeException(i, 0, this.parameters.length - 1);
244        }
245        if (j < 0 || j >= this.parameters.length) {
246            throw new OutOfRangeException(j, 0, this.parameters.length - 1);
247        }
248        return this.getVcvElement(i, j);
249    }
250
251    /**
252     * <p>Returns the number of parameters estimated in the model.</p>
253     *
254     * <p>This is the maximum number of regressors, some techniques may drop
255     * redundant parameters</p>
256     *
257     * @return number of regressors, -1 if not estimated
258     */
259    public int getNumberOfParameters() {
260        if (this.parameters == null) {
261            return -1;
262        }
263        return this.parameters.length;
264    }
265
266    /**
267     * Returns the number of observations added to the regression model.
268     *
269     * @return Number of observations, -1 if an error condition prevents estimation
270     */
271    public long getN() {
272        return this.nobs;
273    }
274
275    /**
276     * <p>Returns the sum of squared deviations of the y values about their mean.</p>
277     *
278     * <p>This is defined as SSTO
279     * <a href="http://www.xycoon.com/SumOfSquares.htm">here</a>.</p>
280     *
281     * <p>If {@code n < 2}, this returns {@code Double.NaN}.</p>
282     *
283     * @return sum of squared deviations of y values
284     */
285    public double getTotalSumSquares() {
286        return this.globalFitInfo[SST_IDX];
287    }
288
289    /**
290     * <p>Returns the sum of squared deviations of the predicted y values about
291     * their mean (which equals the mean of y).</p>
292     *
293     * <p>This is usually abbreviated SSR or SSM.  It is defined as SSM
294     * <a href="http://www.xycoon.com/SumOfSquares.htm">here</a></p>
295     *
296     * <p><strong>Preconditions</strong>: <ul>
297     * <li>At least two observations (with at least two different x values)
298     * must have been added before invoking this method. If this method is
299     * invoked before a model can be estimated, <code>Double.NaN</code> is
300     * returned.
301     * </li></ul>
302     *
303     * @return sum of squared deviations of predicted y values
304     */
305    public double getRegressionSumSquares() {
306        return this.globalFitInfo[SST_IDX] - this.globalFitInfo[SSE_IDX];
307    }
308
309    /**
310     * <p>Returns the <a href="http://www.xycoon.com/SumOfSquares.htm">
311     * sum of squared errors</a> (SSE) associated with the regression
312     * model.</p>
313     *
314     * <p>The return value is constrained to be non-negative - i.e., if due to
315     * rounding errors the computational formula returns a negative result,
316     * 0 is returned.</p>
317     *
318     * <p><strong>Preconditions</strong>: <ul>
319     * <li>numberOfParameters data pairs
320     * must have been added before invoking this method. If this method is
321     * invoked before a model can be estimated, <code>Double,NaN</code> is
322     * returned.
323     * </li></ul>
324     *
325     * @return sum of squared errors associated with the regression model
326     */
327    public double getErrorSumSquares() {
328        return this.globalFitInfo[ SSE_IDX];
329    }
330
331    /**
332     * <p>Returns the sum of squared errors divided by the degrees of freedom,
333     * usually abbreviated MSE.</p>
334     *
335     * <p>If there are fewer than <strong>numberOfParameters + 1</strong> data pairs in the model,
336     * or if there is no variation in <code>x</code>, this returns
337     * <code>Double.NaN</code>.</p>
338     *
339     * @return sum of squared deviations of y values
340     */
341    public double getMeanSquareError() {
342        return this.globalFitInfo[ MSE_IDX];
343    }
344
345    /**
346     * <p>Returns the <a href="http://www.xycoon.com/coefficient1.htm">
347     * coefficient of multiple determination</a>,
348     * usually denoted r-square.</p>
349     *
350     * <p><strong>Preconditions</strong>: <ul>
351     * <li>At least numberOfParameters observations (with at least numberOfParameters different x values)
352     * must have been added before invoking this method. If this method is
353     * invoked before a model can be estimated, {@code Double,NaN} is
354     * returned.
355     * </li></ul>
356     *
357     * @return r-square, a double in the interval [0, 1]
358     */
359    public double getRSquared() {
360        return this.globalFitInfo[ RSQ_IDX];
361    }
362
363    /**
364     * <p>Returns the adjusted R-squared statistic, defined by the formula <div style="white-space: pre"><code>
365     * R<sup>2</sup><sub>adj</sub> = 1 - [SSR (n - 1)] / [SSTO (n - p)]
366     * </code></div>
367     * where SSR is the sum of squared residuals},
368     * SSTO is the total sum of squares}, n is the number
369     * of observations and p is the number of parameters estimated (including the intercept).
370     *
371     * <p>If the regression is estimated without an intercept term, what is returned is <pre>
372     * <code> 1 - (1 - {@link #getRSquared()} ) * (n / (n - p)) </code>
373     * </pre>
374     *
375     * @return adjusted R-Squared statistic
376     */
377    public double getAdjustedRSquared() {
378        return this.globalFitInfo[ ADJRSQ_IDX];
379    }
380
381    /**
382     * Returns true if the regression model has been computed including an intercept.
383     * In this case, the coefficient of the intercept is the first element of the
384     * {@link #getParameterEstimates() parameter estimates}.
385     * @return true if the model has an intercept term
386     */
387    public boolean hasIntercept() {
388        return this.containsConstant;
389    }
390
391    /**
392     * Gets the i-jth element of the variance-covariance matrix.
393     *
394     * @param i first variable index
395     * @param j second variable index
396     * @return the requested variance-covariance matrix entry
397     */
398    private double getVcvElement(int i, int j) {
399        if (this.isSymmetricVCD) {
400            if (this.varCovData.length > 1) {
401                //could be stored in upper or lower triangular
402                if (i == j) {
403                    return varCovData[i][i];
404                } else if (i >= varCovData[j].length) {
405                    return varCovData[i][j];
406                } else {
407                    return varCovData[j][i];
408                }
409            } else {//could be in single array
410                if (i > j) {
411                    return varCovData[0][(i + 1) * i / 2 + j];
412                } else {
413                    return varCovData[0][(j + 1) * j / 2 + i];
414                }
415            }
416        } else {
417            return this.varCovData[i][j];
418        }
419    }
420}