RegressionResults.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.apache.commons.math4.legacy.stat.regression;

  18. import java.util.Arrays;

  19. import org.apache.commons.math4.legacy.exception.OutOfRangeException;
  20. import org.apache.commons.math4.core.jdkmath.JdkMath;

  21. /**
  22.  * Results of a Multiple Linear Regression model fit.
  23.  *
  24.  * @since 3.0
  25.  */
  26. public class RegressionResults {

  27.     /** INDEX of Sum of Squared Errors. */
  28.     private static final int SSE_IDX = 0;
  29.     /** INDEX of Sum of Squares of Model. */
  30.     private static final int SST_IDX = 1;
  31.     /** INDEX of R-Squared of regression. */
  32.     private static final int RSQ_IDX = 2;
  33.     /** INDEX of Mean Squared Error. */
  34.     private static final int MSE_IDX = 3;
  35.     /** INDEX of Adjusted R Squared. */
  36.     private static final int ADJRSQ_IDX = 4;
  37.     /** UID. */
  38.     private static final long serialVersionUID = 1L;
  39.     /** regression slope parameters. */
  40.     private final double[] parameters;
  41.     /** variance covariance matrix of parameters. */
  42.     private final double[][] varCovData;
  43.     /** boolean flag for variance covariance matrix in symm compressed storage. */
  44.     private final boolean isSymmetricVCD;
  45.     /** rank of the solution. */
  46.     @SuppressWarnings("unused")
  47.     private final int rank;
  48.     /** number of observations on which results are based. */
  49.     private final long nobs;
  50.     /** boolean flag indicator of whether a constant was included. */
  51.     private final boolean containsConstant;
  52.     /** array storing global results, SSE, MSE, RSQ, adjRSQ. */
  53.     private final double[] globalFitInfo;

  54.     /**
  55.      *  Set the default constructor to private access.
  56.      *  to prevent inadvertent instantiation
  57.      */
  58.     @SuppressWarnings("unused")
  59.     private RegressionResults() {
  60.         this.parameters = null;
  61.         this.varCovData = null;
  62.         this.rank = -1;
  63.         this.nobs = -1;
  64.         this.containsConstant = false;
  65.         this.isSymmetricVCD = false;
  66.         this.globalFitInfo = null;
  67.     }

  68.     /**
  69.      * Constructor for Regression Results.
  70.      *
  71.      * @param parameters a double array with the regression slope estimates
  72.      * @param varcov the variance covariance matrix, stored either in a square matrix
  73.      * or as a compressed
  74.      * @param isSymmetricCompressed a flag which denotes that the variance covariance
  75.      * matrix is in symmetric compressed format
  76.      * @param nobs the number of observations of the regression estimation
  77.      * @param rank the number of independent variables in the regression
  78.      * @param sumy the sum of the independent variable
  79.      * @param sumysq the sum of the squared independent variable
  80.      * @param sse sum of squared errors
  81.      * @param containsConstant true model has constant,  false model does not have constant
  82.      * @param copyData if true a deep copy of all input data is made, if false only references
  83.      * are copied and the RegressionResults become mutable
  84.      */
  85.     public RegressionResults(
  86.             final double[] parameters, final double[][] varcov,
  87.             final boolean isSymmetricCompressed,
  88.             final long nobs, final int rank,
  89.             final double sumy, final double sumysq, final double sse,
  90.             final boolean containsConstant,
  91.             final boolean copyData) {
  92.         if (copyData) {
  93.             this.parameters = Arrays.copyOf(parameters, parameters.length);
  94.             this.varCovData = new double[varcov.length][];
  95.             for (int i = 0; i < varcov.length; i++) {
  96.                 this.varCovData[i] = Arrays.copyOf(varcov[i], varcov[i].length);
  97.             }
  98.         } else {
  99.             this.parameters = parameters;
  100.             this.varCovData = varcov;
  101.         }
  102.         this.isSymmetricVCD = isSymmetricCompressed;
  103.         this.nobs = nobs;
  104.         this.rank = rank;
  105.         this.containsConstant = containsConstant;
  106.         this.globalFitInfo = new double[5];
  107.         Arrays.fill(this.globalFitInfo, Double.NaN);

  108.         if (rank > 0) {
  109.             this.globalFitInfo[SST_IDX] = containsConstant ?
  110.                     (sumysq - sumy * sumy / nobs) : sumysq;
  111.         }

  112.         this.globalFitInfo[SSE_IDX] = sse;
  113.         this.globalFitInfo[MSE_IDX] = this.globalFitInfo[SSE_IDX] /
  114.                 (nobs - rank);
  115.         this.globalFitInfo[RSQ_IDX] = 1.0 -
  116.                 this.globalFitInfo[SSE_IDX] /
  117.                 this.globalFitInfo[SST_IDX];

  118.         if (!containsConstant) {
  119.             this.globalFitInfo[ADJRSQ_IDX] = 1.0-
  120.                     (1.0 - this.globalFitInfo[RSQ_IDX]) *
  121.                     ( (double) nobs / ( (double) (nobs - rank)));
  122.         } else {
  123.             this.globalFitInfo[ADJRSQ_IDX] = 1.0 - (sse * (nobs - 1.0)) /
  124.                     (globalFitInfo[SST_IDX] * (nobs - rank));
  125.         }
  126.     }

  127.     /**
  128.      * <p>Returns the parameter estimate for the regressor at the given index.</p>
  129.      *
  130.      * <p>A redundant regressor will have its redundancy flag set, as well as
  131.      *  a parameters estimated equal to {@code Double.NaN}</p>
  132.      *
  133.      * @param index Index.
  134.      * @return the parameters estimated for regressor at index.
  135.      * @throws OutOfRangeException if {@code index} is not in the interval
  136.      * {@code [0, number of parameters)}.
  137.      */
  138.     public double getParameterEstimate(int index) throws OutOfRangeException {
  139.         if (parameters == null) {
  140.             return Double.NaN;
  141.         }
  142.         if (index < 0 || index >= this.parameters.length) {
  143.             throw new OutOfRangeException(index, 0, this.parameters.length - 1);
  144.         }
  145.         return this.parameters[index];
  146.     }

  147.     /**
  148.      * <p>Returns a copy of the regression parameters estimates.</p>
  149.      *
  150.      * <p>The parameter estimates are returned in the natural order of the data.</p>
  151.      *
  152.      * <p>A redundant regressor will have its redundancy flag set, as will
  153.      *  a parameter estimate equal to {@code Double.NaN}.</p>
  154.      *
  155.      * @return array of parameter estimates, null if no estimation occurred
  156.      */
  157.     public double[] getParameterEstimates() {
  158.         if (this.parameters == null) {
  159.             return null;
  160.         }
  161.         return Arrays.copyOf(parameters, parameters.length);
  162.     }

  163.     /**
  164.      * Returns the <a href="http://www.xycoon.com/standerrorb(1).htm">standard
  165.      * error of the parameter estimate at index</a>,
  166.      * usually denoted s(b<sub>index</sub>).
  167.      *
  168.      * @param index Index.
  169.      * @return the standard errors associated with parameters estimated at index.
  170.      * @throws OutOfRangeException if {@code index} is not in the interval
  171.      * {@code [0, number of parameters)}.
  172.      */
  173.     public double getStdErrorOfEstimate(int index) throws OutOfRangeException {
  174.         if (parameters == null) {
  175.             return Double.NaN;
  176.         }
  177.         if (index < 0 || index >= this.parameters.length) {
  178.             throw new OutOfRangeException(index, 0, this.parameters.length - 1);
  179.         }
  180.         double var = this.getVcvElement(index, index);
  181.         if (!Double.isNaN(var) && var > Double.MIN_VALUE) {
  182.             return JdkMath.sqrt(var);
  183.         }
  184.         return Double.NaN;
  185.     }

  186.     /**
  187.      * <p>Returns the <a href="http://www.xycoon.com/standerrorb(1).htm">standard
  188.      * error of the parameter estimates</a>,
  189.      * usually denoted s(b<sub>i</sub>).</p>
  190.      *
  191.      * <p>If there are problems with an ill conditioned design matrix then the regressor
  192.      * which is redundant will be assigned <code>Double.NaN</code>. </p>
  193.      *
  194.      * @return an array standard errors associated with parameters estimates,
  195.      *  null if no estimation occurred
  196.      */
  197.     public double[] getStdErrorOfEstimates() {
  198.         if (parameters == null) {
  199.             return null;
  200.         }
  201.         double[] se = new double[this.parameters.length];
  202.         for (int i = 0; i < this.parameters.length; i++) {
  203.             double var = this.getVcvElement(i, i);
  204.             if (!Double.isNaN(var) && var > Double.MIN_VALUE) {
  205.                 se[i] = JdkMath.sqrt(var);
  206.                 continue;
  207.             }
  208.             se[i] = Double.NaN;
  209.         }
  210.         return se;
  211.     }

  212.     /**
  213.      * <p>Returns the covariance between regression parameters i and j.</p>
  214.      *
  215.      * <p>If there are problems with an ill conditioned design matrix then the covariance
  216.      * which involves redundant columns will be assigned {@code Double.NaN}. </p>
  217.      *
  218.      * @param i {@code i}th regression parameter.
  219.      * @param j {@code j}th regression parameter.
  220.      * @return the covariance of the parameter estimates.
  221.      * @throws OutOfRangeException if {@code i} or {@code j} is not in the
  222.      * interval {@code [0, number of parameters)}.
  223.      */
  224.     public double getCovarianceOfParameters(int i, int j) throws OutOfRangeException {
  225.         if (parameters == null) {
  226.             return Double.NaN;
  227.         }
  228.         if (i < 0 || i >= this.parameters.length) {
  229.             throw new OutOfRangeException(i, 0, this.parameters.length - 1);
  230.         }
  231.         if (j < 0 || j >= this.parameters.length) {
  232.             throw new OutOfRangeException(j, 0, this.parameters.length - 1);
  233.         }
  234.         return this.getVcvElement(i, j);
  235.     }

  236.     /**
  237.      * <p>Returns the number of parameters estimated in the model.</p>
  238.      *
  239.      * <p>This is the maximum number of regressors, some techniques may drop
  240.      * redundant parameters</p>
  241.      *
  242.      * @return number of regressors, -1 if not estimated
  243.      */
  244.     public int getNumberOfParameters() {
  245.         if (this.parameters == null) {
  246.             return -1;
  247.         }
  248.         return this.parameters.length;
  249.     }

  250.     /**
  251.      * Returns the number of observations added to the regression model.
  252.      *
  253.      * @return Number of observations, -1 if an error condition prevents estimation
  254.      */
  255.     public long getN() {
  256.         return this.nobs;
  257.     }

  258.     /**
  259.      * <p>Returns the sum of squared deviations of the y values about their mean.</p>
  260.      *
  261.      * <p>This is defined as SSTO
  262.      * <a href="http://www.xycoon.com/SumOfSquares.htm">here</a>.</p>
  263.      *
  264.      * <p>If {@code n < 2}, this returns {@code Double.NaN}.</p>
  265.      *
  266.      * @return sum of squared deviations of y values
  267.      */
  268.     public double getTotalSumSquares() {
  269.         return this.globalFitInfo[SST_IDX];
  270.     }

  271.     /**
  272.      * <p>Returns the sum of squared deviations of the predicted y values about
  273.      * their mean (which equals the mean of y).</p>
  274.      *
  275.      * <p>This is usually abbreviated SSR or SSM.  It is defined as SSM
  276.      * <a href="http://www.xycoon.com/SumOfSquares.htm">here</a></p>
  277.      *
  278.      * <p><strong>Preconditions</strong>: <ul>
  279.      * <li>At least two observations (with at least two different x values)
  280.      * must have been added before invoking this method. If this method is
  281.      * invoked before a model can be estimated, <code>Double.NaN</code> is
  282.      * returned.
  283.      * </li></ul>
  284.      *
  285.      * @return sum of squared deviations of predicted y values
  286.      */
  287.     public double getRegressionSumSquares() {
  288.         return this.globalFitInfo[SST_IDX] - this.globalFitInfo[SSE_IDX];
  289.     }

  290.     /**
  291.      * <p>Returns the <a href="http://www.xycoon.com/SumOfSquares.htm">
  292.      * sum of squared errors</a> (SSE) associated with the regression
  293.      * model.</p>
  294.      *
  295.      * <p>The return value is constrained to be non-negative - i.e., if due to
  296.      * rounding errors the computational formula returns a negative result,
  297.      * 0 is returned.</p>
  298.      *
  299.      * <p><strong>Preconditions</strong>: <ul>
  300.      * <li>numberOfParameters data pairs
  301.      * must have been added before invoking this method. If this method is
  302.      * invoked before a model can be estimated, <code>Double,NaN</code> is
  303.      * returned.
  304.      * </li></ul>
  305.      *
  306.      * @return sum of squared errors associated with the regression model
  307.      */
  308.     public double getErrorSumSquares() {
  309.         return this.globalFitInfo[ SSE_IDX];
  310.     }

  311.     /**
  312.      * <p>Returns the sum of squared errors divided by the degrees of freedom,
  313.      * usually abbreviated MSE.</p>
  314.      *
  315.      * <p>If there are fewer than <strong>numberOfParameters + 1</strong> data pairs in the model,
  316.      * or if there is no variation in <code>x</code>, this returns
  317.      * <code>Double.NaN</code>.</p>
  318.      *
  319.      * @return sum of squared deviations of y values
  320.      */
  321.     public double getMeanSquareError() {
  322.         return this.globalFitInfo[ MSE_IDX];
  323.     }

  324.     /**
  325.      * <p>Returns the <a href="http://www.xycoon.com/coefficient1.htm">
  326.      * coefficient of multiple determination</a>,
  327.      * usually denoted r-square.</p>
  328.      *
  329.      * <p><strong>Preconditions</strong>: <ul>
  330.      * <li>At least numberOfParameters observations (with at least numberOfParameters different x values)
  331.      * must have been added before invoking this method. If this method is
  332.      * invoked before a model can be estimated, {@code Double,NaN} is
  333.      * returned.
  334.      * </li></ul>
  335.      *
  336.      * @return r-square, a double in the interval [0, 1]
  337.      */
  338.     public double getRSquared() {
  339.         return this.globalFitInfo[ RSQ_IDX];
  340.     }

  341.     /**
  342.      * <p>Returns the adjusted R-squared statistic, defined by the formula <div style="white-space: pre"><code>
  343.      * R<sup>2</sup><sub>adj</sub> = 1 - [SSR (n - 1)] / [SSTO (n - p)]
  344.      * </code></div>
  345.      * where SSR is the sum of squared residuals},
  346.      * SSTO is the total sum of squares}, n is the number
  347.      * of observations and p is the number of parameters estimated (including the intercept).
  348.      *
  349.      * <p>If the regression is estimated without an intercept term, what is returned is <pre>
  350.      * <code> 1 - (1 - {@link #getRSquared()} ) * (n / (n - p)) </code>
  351.      * </pre>
  352.      *
  353.      * @return adjusted R-Squared statistic
  354.      */
  355.     public double getAdjustedRSquared() {
  356.         return this.globalFitInfo[ ADJRSQ_IDX];
  357.     }

  358.     /**
  359.      * Returns true if the regression model has been computed including an intercept.
  360.      * In this case, the coefficient of the intercept is the first element of the
  361.      * {@link #getParameterEstimates() parameter estimates}.
  362.      * @return true if the model has an intercept term
  363.      */
  364.     public boolean hasIntercept() {
  365.         return this.containsConstant;
  366.     }

  367.     /**
  368.      * Gets the i-jth element of the variance-covariance matrix.
  369.      *
  370.      * @param i first variable index
  371.      * @param j second variable index
  372.      * @return the requested variance-covariance matrix entry
  373.      */
  374.     private double getVcvElement(int i, int j) {
  375.         if (this.isSymmetricVCD) {
  376.             if (this.varCovData.length > 1) {
  377.                 //could be stored in upper or lower triangular
  378.                 if (i == j) {
  379.                     return varCovData[i][i];
  380.                 } else if (i >= varCovData[j].length) {
  381.                     return varCovData[i][j];
  382.                 } else {
  383.                     return varCovData[j][i];
  384.                 }
  385.             } else {//could be in single array
  386.                 if (i > j) {
  387.                     return varCovData[0][(i + 1) * i / 2 + j];
  388.                 } else {
  389.                     return varCovData[0][(j + 1) * j / 2 + i];
  390.                 }
  391.             }
  392.         } else {
  393.             return this.varCovData[i][j];
  394.         }
  395.     }
  396. }