MillerUpdatingRegression.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.apache.commons.math4.legacy.stat.regression;

  18. import java.util.Arrays;

  19. import org.apache.commons.math4.legacy.exception.util.LocalizedFormats;
  20. import org.apache.commons.math4.core.jdkmath.JdkMath;
  21. import org.apache.commons.numbers.core.Precision;

  22. /**
  23.  * This class is a concrete implementation of the {@link UpdatingMultipleLinearRegression} interface.
  24.  *
  25.  * <p>The algorithm is described in: <pre>
  26.  * Algorithm AS 274: Least Squares Routines to Supplement Those of Gentleman
  27.  * Author(s): Alan J. Miller
  28.  * Source: Journal of the Royal Statistical Society.
  29.  * Series C (Applied Statistics), Vol. 41, No. 2
  30.  * (1992), pp. 458-478
  31.  * Published by: Blackwell Publishing for the Royal Statistical Society
  32.  * Stable URL: http://www.jstor.org/stable/2347583 </pre>
  33.  *
  34.  * <p>This method for multiple regression forms the solution to the OLS problem
  35.  * by updating the QR decomposition as described by Gentleman.</p>
  36.  *
  37.  * @since 3.0
  38.  */
  39. public class MillerUpdatingRegression implements UpdatingMultipleLinearRegression {

  40.     /** number of variables in regression. */
  41.     private final int nvars;
  42.     /** diagonals of cross products matrix. */
  43.     private final double[] d;
  44.     /** the elements of the R`Y. */
  45.     private final double[] rhs;
  46.     /** the off diagonal portion of the R matrix. */
  47.     private final double[] r;
  48.     /** the tolerance for each of the variables. */
  49.     private final double[] tol;
  50.     /** residual sum of squares for all nested regressions. */
  51.     private final double[] rss;
  52.     /** order of the regressors. */
  53.     private final int[] vorder;
  54.     /** scratch space for tolerance calc. */
  55.     private final double[] workTolset;
  56.     /** number of observations entered. */
  57.     private long nobs;
  58.     /** sum of squared errors of largest regression. */
  59.     private double sserr;
  60.     /** has rss been called? */
  61.     private boolean rssSet;
  62.     /** has the tolerance setting method been called. */
  63.     private boolean tolSet;
  64.     /** flags for variables with linear dependency problems. */
  65.     private final boolean[] lindep;
  66.     /** singular x values. */
  67.     private final double[] xSing;
  68.     /** workspace for singularity method. */
  69.     private final double[] workSing;
  70.     /** summation of Y variable. */
  71.     private double sumy;
  72.     /** summation of squared Y values. */
  73.     private double sumsqy;
  74.     /** boolean flag whether a regression constant is added. */
  75.     private final boolean hasIntercept;
  76.     /** zero tolerance. */
  77.     private final double epsilon;
  78.     /**
  79.      *  Set the default constructor to private access.
  80.      *  to prevent inadvertent instantiation
  81.      */
  82.     @SuppressWarnings("unused")
  83.     private MillerUpdatingRegression() {
  84.         this(-1, false, Double.NaN);
  85.     }

  86.     /**
  87.      * This is the augmented constructor for the MillerUpdatingRegression class.
  88.      *
  89.      * @param numberOfVariables number of regressors to expect, not including constant
  90.      * @param includeConstant include a constant automatically
  91.      * @param errorTolerance  zero tolerance, how machine zero is determined
  92.      * @throws ModelSpecificationException if {@code numberOfVariables is less than 1}
  93.      */
  94.     public MillerUpdatingRegression(int numberOfVariables, boolean includeConstant, double errorTolerance)
  95.             throws ModelSpecificationException {
  96.         if (numberOfVariables < 1) {
  97.             throw new ModelSpecificationException(LocalizedFormats.NO_REGRESSORS);
  98.         }
  99.         if (includeConstant) {
  100.             this.nvars = numberOfVariables + 1;
  101.         } else {
  102.             this.nvars = numberOfVariables;
  103.         }
  104.         this.hasIntercept = includeConstant;
  105.         this.nobs = 0;
  106.         this.d = new double[this.nvars];
  107.         this.rhs = new double[this.nvars];
  108.         this.r = new double[this.nvars * (this.nvars - 1) / 2];
  109.         this.tol = new double[this.nvars];
  110.         this.rss = new double[this.nvars];
  111.         this.vorder = new int[this.nvars];
  112.         this.xSing = new double[this.nvars];
  113.         this.workSing = new double[this.nvars];
  114.         this.workTolset = new double[this.nvars];
  115.         this.lindep = new boolean[this.nvars];
  116.         for (int i = 0; i < this.nvars; i++) {
  117.             vorder[i] = i;
  118.         }
  119.         if (errorTolerance > 0) {
  120.             this.epsilon = errorTolerance;
  121.         } else {
  122.             this.epsilon = -errorTolerance;
  123.         }
  124.     }

  125.     /**
  126.      * Primary constructor for the MillerUpdatingRegression.
  127.      *
  128.      * @param numberOfVariables maximum number of potential regressors
  129.      * @param includeConstant include a constant automatically
  130.      * @throws ModelSpecificationException if {@code numberOfVariables is less than 1}
  131.      */
  132.     public MillerUpdatingRegression(int numberOfVariables, boolean includeConstant)
  133.             throws ModelSpecificationException {
  134.         this(numberOfVariables, includeConstant, Precision.EPSILON);
  135.     }

  136.     /**
  137.      * A getter method which determines whether a constant is included.
  138.      * @return true regression has an intercept, false no intercept
  139.      */
  140.     @Override
  141.     public boolean hasIntercept() {
  142.         return this.hasIntercept;
  143.     }

  144.     /**
  145.      * Gets the number of observations added to the regression model.
  146.      * @return number of observations
  147.      */
  148.     @Override
  149.     public long getN() {
  150.         return this.nobs;
  151.     }

  152.     /**
  153.      * Adds an observation to the regression model.
  154.      * @param x the array with regressor values
  155.      * @param y  the value of dependent variable given these regressors
  156.      * @exception ModelSpecificationException if the length of {@code x} does not equal
  157.      * the number of independent variables in the model
  158.      */
  159.     @Override
  160.     public void addObservation(final double[] x, final double y)
  161.             throws ModelSpecificationException {

  162.         if ((!this.hasIntercept && x.length != nvars) ||
  163.                (this.hasIntercept && x.length + 1 != nvars)) {
  164.             throw new ModelSpecificationException(LocalizedFormats.INVALID_REGRESSION_OBSERVATION,
  165.                     x.length, nvars);
  166.         }
  167.         if (!this.hasIntercept) {
  168.             include(Arrays.copyOf(x, x.length), 1.0, y);
  169.         } else {
  170.             final double[] tmp = new double[x.length + 1];
  171.             System.arraycopy(x, 0, tmp, 1, x.length);
  172.             tmp[0] = 1.0;
  173.             include(tmp, 1.0, y);
  174.         }
  175.         ++nobs;
  176.     }

  177.     /**
  178.      * Adds multiple observations to the model.
  179.      * @param x observations on the regressors
  180.      * @param y observations on the regressand
  181.      * @throws ModelSpecificationException if {@code x} is not rectangular, does not match
  182.      * the length of {@code y} or does not contain sufficient data to estimate the model
  183.      */
  184.     @Override
  185.     public void addObservations(double[][] x, double[] y) throws ModelSpecificationException {
  186.         if (x == null || y == null || x.length != y.length) {
  187.             throw new ModelSpecificationException(
  188.                   LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE,
  189.                   (x == null) ? 0 : x.length,
  190.                   (y == null) ? 0 : y.length);
  191.         }
  192.         if (x.length == 0) {  // Must be no y data either
  193.             throw new ModelSpecificationException(
  194.                     LocalizedFormats.NO_DATA);
  195.         }
  196.         if (x[0].length + 1 > x.length) {
  197.             throw new ModelSpecificationException(
  198.                   LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
  199.                   x.length, x[0].length);
  200.         }
  201.         for (int i = 0; i < x.length; i++) {
  202.             addObservation(x[i], y[i]);
  203.         }
  204.     }

  205.     /**
  206.      * The include method is where the QR decomposition occurs. This statement forms all
  207.      * intermediate data which will be used for all derivative measures.
  208.      * According to the miller paper, note that in the original implementation the x vector
  209.      * is overwritten. In this implementation, the include method is passed a copy of the
  210.      * original data vector so that there is no contamination of the data. Additionally,
  211.      * this method differs slightly from Gentleman's method, in that the assumption is
  212.      * of dense design matrices, there is some advantage in using the original gentleman algorithm
  213.      * on sparse matrices.
  214.      *
  215.      * @param x observations on the regressors
  216.      * @param wi weight of the this observation (-1,1)
  217.      * @param yi observation on the regressand
  218.      */
  219.     private void include(final double[] x, final double wi, final double yi) {
  220.         int nextr = 0;
  221.         double w = wi;
  222.         double y = yi;
  223.         double xi;
  224.         double di;
  225.         double wxi;
  226.         double dpi;
  227.         double xk;
  228.         double wPrev;
  229.         this.rssSet = false;
  230.         sumy = smartAdd(yi, sumy);
  231.         sumsqy = smartAdd(sumsqy, yi * yi);
  232.         for (int i = 0; i < x.length; i++) {
  233.             if (w == 0.0) {
  234.                 return;
  235.             }
  236.             xi = x[i];

  237.             if (xi == 0.0) {
  238.                 nextr += nvars - i - 1;
  239.                 continue;
  240.             }
  241.             di = d[i];
  242.             wxi = w * xi;
  243.             wPrev = w;
  244.             if (di != 0.0) {
  245.                 dpi = smartAdd(di, wxi * xi);
  246.                 final double tmp = wxi * xi / di;
  247.                 if (JdkMath.abs(tmp) > Precision.EPSILON) {
  248.                     w = (di * w) / dpi;
  249.                 }
  250.             } else {
  251.                 dpi = wxi * xi;
  252.                 w = 0.0;
  253.             }
  254.             d[i] = dpi;
  255.             for (int k = i + 1; k < nvars; k++) {
  256.                 xk = x[k];
  257.                 x[k] = smartAdd(xk, -xi * r[nextr]);
  258.                 if (di != 0.0) {
  259.                     r[nextr] = smartAdd(di * r[nextr], (wPrev * xi) * xk) / dpi;
  260.                 } else {
  261.                     r[nextr] = xk / xi;
  262.                 }
  263.                 ++nextr;
  264.             }
  265.             xk = y;
  266.             y = smartAdd(xk, -xi * rhs[i]);
  267.             if (di != 0.0) {
  268.                 rhs[i] = smartAdd(di * rhs[i], wxi * xk) / dpi;
  269.             } else {
  270.                 rhs[i] = xk / xi;
  271.             }
  272.         }
  273.         sserr = smartAdd(sserr, w * y * y);
  274.     }

  275.     /**
  276.      * Adds to number a and b such that the contamination due to
  277.      * numerical smallness of one addend does not corrupt the sum.
  278.      * @param a - an addend
  279.      * @param b - an addend
  280.      * @return the sum of the a and b
  281.      */
  282.     private double smartAdd(double a, double b) {
  283.         final double aa = JdkMath.abs(a);
  284.         final double ba = JdkMath.abs(b);
  285.         if (aa > ba) {
  286.             final double eps = aa * Precision.EPSILON;
  287.             if (ba > eps) {
  288.                 return a + b;
  289.             }
  290.             return a;
  291.         } else {
  292.             final double eps = ba * Precision.EPSILON;
  293.             if (aa > eps) {
  294.                 return a + b;
  295.             }
  296.             return b;
  297.         }
  298.     }

  299.     /**
  300.      * As the name suggests,  clear wipes the internals and reorders everything in the
  301.      * canonical order.
  302.      */
  303.     @Override
  304.     public void clear() {
  305.         Arrays.fill(this.d, 0.0);
  306.         Arrays.fill(this.rhs, 0.0);
  307.         Arrays.fill(this.r, 0.0);
  308.         Arrays.fill(this.tol, 0.0);
  309.         Arrays.fill(this.rss, 0.0);
  310.         Arrays.fill(this.workTolset, 0.0);
  311.         Arrays.fill(this.workSing, 0.0);
  312.         Arrays.fill(this.xSing, 0.0);
  313.         Arrays.fill(this.lindep, false);
  314.         for (int i = 0; i < nvars; i++) {
  315.             this.vorder[i] = i;
  316.         }
  317.         this.nobs = 0;
  318.         this.sserr = 0.0;
  319.         this.sumy = 0.0;
  320.         this.sumsqy = 0.0;
  321.         this.rssSet = false;
  322.         this.tolSet = false;
  323.     }

  324.     /**
  325.      * This sets up tolerances for singularity testing.
  326.      */
  327.     private void tolset() {
  328.         int pos;
  329.         double total;
  330.         final double eps = this.epsilon;
  331.         for (int i = 0; i < nvars; i++) {
  332.             this.workTolset[i] = JdkMath.sqrt(d[i]);
  333.         }
  334.         tol[0] = eps * this.workTolset[0];
  335.         for (int col = 1; col < nvars; col++) {
  336.             pos = col - 1;
  337.             total = workTolset[col];
  338.             for (int row = 0; row < col; row++) {
  339.                 total += JdkMath.abs(r[pos]) * workTolset[row];
  340.                 pos += nvars - row - 2;
  341.             }
  342.             tol[col] = eps * total;
  343.         }
  344.         tolSet = true;
  345.     }

  346.     /**
  347.      * The regcf method conducts the linear regression and extracts the
  348.      * parameter vector. Notice that the algorithm can do subset regression
  349.      * with no alteration.
  350.      *
  351.      * @param nreq how many of the regressors to include (either in canonical
  352.      * order, or in the current reordered state)
  353.      * @return an array with the estimated slope coefficients
  354.      * @throws ModelSpecificationException if {@code nreq} is less than 1
  355.      * or greater than the number of independent variables
  356.      */
  357.     private double[] regcf(int nreq) throws ModelSpecificationException {
  358.         int nextr;
  359.         if (nreq < 1) {
  360.             throw new ModelSpecificationException(LocalizedFormats.NO_REGRESSORS);
  361.         }
  362.         if (nreq > this.nvars) {
  363.             throw new ModelSpecificationException(
  364.                     LocalizedFormats.TOO_MANY_REGRESSORS, nreq, this.nvars);
  365.         }
  366.         if (!this.tolSet) {
  367.             tolset();
  368.         }
  369.         final double[] ret = new double[nreq];
  370.         boolean rankProblem = false;
  371.         for (int i = nreq - 1; i > -1; i--) {
  372.             if (JdkMath.sqrt(d[i]) < tol[i]) {
  373.                 ret[i] = 0.0;
  374.                 d[i] = 0.0;
  375.                 rankProblem = true;
  376.             } else {
  377.                 ret[i] = rhs[i];
  378.                 nextr = i * (nvars + nvars - i - 1) / 2;
  379.                 for (int j = i + 1; j < nreq; j++) {
  380.                     ret[i] = smartAdd(ret[i], -r[nextr] * ret[j]);
  381.                     ++nextr;
  382.                 }
  383.             }
  384.         }
  385.         if (rankProblem) {
  386.             for (int i = 0; i < nreq; i++) {
  387.                 if (this.lindep[i]) {
  388.                     ret[i] = Double.NaN;
  389.                 }
  390.             }
  391.         }
  392.         return ret;
  393.     }

  394.     /**
  395.      * The method which checks for singularities and then eliminates the offending
  396.      * columns.
  397.      */
  398.     private void singcheck() {
  399.         int pos;
  400.         for (int i = 0; i < nvars; i++) {
  401.             workSing[i] = JdkMath.sqrt(d[i]);
  402.         }
  403.         for (int col = 0; col < nvars; col++) {
  404.             // Set elements within R to zero if they are less than tol(col) in
  405.             // absolute value after being scaled by the square root of their row
  406.             // multiplier
  407.             final double temp = tol[col];
  408.             pos = col - 1;
  409.             for (int row = 0; row < col - 1; row++) {
  410.                 if (JdkMath.abs(r[pos]) * workSing[row] < temp) {
  411.                     r[pos] = 0.0;
  412.                 }
  413.                 pos += nvars - row - 2;
  414.             }
  415.             // If diagonal element is near zero, set it to zero, set appropriate
  416.             // element of LINDEP, and use INCLUD to augment the projections in
  417.             // the lower rows of the orthogonalization.
  418.             lindep[col] = false;
  419.             if (workSing[col] < temp) {
  420.                 lindep[col] = true;
  421.                 if (col < nvars - 1) {
  422.                     Arrays.fill(xSing, 0.0);
  423.                     int pi = col * (nvars + nvars - col - 1) / 2;
  424.                     for (int xi = col + 1; xi < nvars; xi++, pi++) {
  425.                         xSing[xi] = r[pi];
  426.                         r[pi] = 0.0;
  427.                     }
  428.                     final double y = rhs[col];
  429.                     final double weight = d[col];
  430.                     d[col] = 0.0;
  431.                     rhs[col] = 0.0;
  432.                     this.include(xSing, weight, y);
  433.                 } else {
  434.                     sserr += d[col] * rhs[col] * rhs[col];
  435.                 }
  436.             }
  437.         }
  438.     }

  439.     /**
  440.      * Calculates the sum of squared errors for the full regression.
  441.      * and all subsets in the following manner: <pre>
  442.      * rss[] ={
  443.      * ResidualSumOfSquares_allNvars,
  444.      * ResidualSumOfSquares_FirstNvars-1,
  445.      * ResidualSumOfSquares_FirstNvars-2,
  446.      * ..., ResidualSumOfSquares_FirstVariable} </pre>
  447.      */
  448.     private void ss() {
  449.         double total = sserr;
  450.         rss[nvars - 1] = sserr;
  451.         for (int i = nvars - 1; i > 0; i--) {
  452.             total += d[i] * rhs[i] * rhs[i];
  453.             rss[i - 1] = total;
  454.         }
  455.         rssSet = true;
  456.     }

  457.     /**
  458.      * Calculates the cov matrix assuming only the first nreq variables are
  459.      * included in the calculation. The returned array contains a symmetric
  460.      * matrix stored in lower triangular form. The matrix will have
  461.      * ( nreq + 1 ) * nreq / 2 elements. For illustration <pre>
  462.      * cov =
  463.      * {
  464.      *  cov_00,
  465.      *  cov_10, cov_11,
  466.      *  cov_20, cov_21, cov22,
  467.      *  ...
  468.      * } </pre>
  469.      *
  470.      * @param nreq how many of the regressors to include (either in canonical
  471.      * order, or in the current reordered state)
  472.      * @return an array with the variance covariance of the included
  473.      * regressors in lower triangular form
  474.      */
  475.     private double[] cov(int nreq) {
  476.         if (this.nobs <= nreq) {
  477.             return null;
  478.         }
  479.         double rnk = 0.0;
  480.         for (int i = 0; i < nreq; i++) {
  481.             if (!this.lindep[i]) {
  482.                 rnk += 1.0;
  483.             }
  484.         }
  485.         final double var = rss[nreq - 1] / (nobs - rnk);
  486.         final double[] rinv = new double[nreq * (nreq - 1) / 2];
  487.         inverse(rinv, nreq);
  488.         final double[] covmat = new double[nreq * (nreq + 1) / 2];
  489.         Arrays.fill(covmat, Double.NaN);
  490.         int pos2;
  491.         int pos1;
  492.         int start = 0;
  493.         double total = 0;
  494.         for (int row = 0; row < nreq; row++) {
  495.             pos2 = start;
  496.             if (!this.lindep[row]) {
  497.                 for (int col = row; col < nreq; col++) {
  498.                     if (!this.lindep[col]) {
  499.                         pos1 = start + col - row;
  500.                         if (row == col) {
  501.                             total = 1.0 / d[col];
  502.                         } else {
  503.                             total = rinv[pos1 - 1] / d[col];
  504.                         }
  505.                         for (int k = col + 1; k < nreq; k++) {
  506.                             if (!this.lindep[k]) {
  507.                                 total += rinv[pos1] * rinv[pos2] / d[k];
  508.                             }
  509.                             ++pos1;
  510.                             ++pos2;
  511.                         }
  512.                         covmat[ (col + 1) * col / 2 + row] = total * var;
  513.                     } else {
  514.                         pos2 += nreq - col - 1;
  515.                     }
  516.                 }
  517.             }
  518.             start += nreq - row - 1;
  519.         }
  520.         return covmat;
  521.     }

  522.     /**
  523.      * This internal method calculates the inverse of the upper-triangular portion
  524.      * of the R matrix.
  525.      * @param rinv  the storage for the inverse of r
  526.      * @param nreq how many of the regressors to include (either in canonical
  527.      * order, or in the current reordered state)
  528.      */
  529.     private void inverse(double[] rinv, int nreq) {
  530.         int pos = nreq * (nreq - 1) / 2 - 1;
  531.         int pos1 = -1;
  532.         int pos2 = -1;
  533.         double total = 0.0;
  534.         Arrays.fill(rinv, Double.NaN);
  535.         for (int row = nreq - 1; row > 0; --row) {
  536.             if (!this.lindep[row]) {
  537.                 final int start = (row - 1) * (nvars + nvars - row) / 2;
  538.                 for (int col = nreq; col > row; --col) {
  539.                     pos1 = start;
  540.                     pos2 = pos;
  541.                     total = 0.0;
  542.                     for (int k = row; k < col - 1; k++) {
  543.                         pos2 += nreq - k - 1;
  544.                         if (!this.lindep[k]) {
  545.                             total += -r[pos1] * rinv[pos2];
  546.                         }
  547.                         ++pos1;
  548.                     }
  549.                     rinv[pos] = total - r[pos1];
  550.                     --pos;
  551.                 }
  552.             } else {
  553.                 pos -= nreq - row;
  554.             }
  555.         }
  556.     }

  557.     /**
  558.      * In the original algorithm only the partial correlations of the regressors
  559.      * is returned to the user. In this implementation, we have <pre>
  560.      * corr =
  561.      * {
  562.      *   corrxx - lower triangular
  563.      *   corrxy - bottom row of the matrix
  564.      * }
  565.      * Replaces subroutines PCORR and COR of:
  566.      * ALGORITHM AS274  APPL. STATIST. (1992) VOL.41, NO. 2 </pre>
  567.      *
  568.      * <p>Calculate partial correlations after the variables in rows
  569.      * 1, 2, ..., IN have been forced into the regression.
  570.      * If IN = 1, and the first row of R represents a constant in the
  571.      * model, then the usual simple correlations are returned.</p>
  572.      *
  573.      * <p>If IN = 0, the value returned in array CORMAT for the correlation
  574.      * of variables Xi &amp; Xj is: <pre>
  575.      * sum ( Xi.Xj ) / Sqrt ( sum (Xi^2) . sum (Xj^2) )</pre>
  576.      *
  577.      * <p>On return, array CORMAT contains the upper triangle of the matrix of
  578.      * partial correlations stored by rows, excluding the 1's on the diagonal.
  579.      * e.g. if IN = 2, the consecutive elements returned are:
  580.      * (3,4) (3,5) ... (3,ncol), (4,5) (4,6) ... (4,ncol), etc.
  581.      * Array YCORR stores the partial correlations with the Y-variable
  582.      * starting with YCORR(IN+1) = partial correlation with the variable in
  583.      * position (IN+1). </p>
  584.      *
  585.      * @param in how many of the regressors to include (either in canonical
  586.      * order, or in the current reordered state)
  587.      * @return an array with the partial correlations of the remainder of
  588.      * regressors with each other and the regressand, in lower triangular form
  589.      */
  590.     public double[] getPartialCorrelations(int in) {
  591.         final double[] output = new double[(nvars - in + 1) * (nvars - in) / 2];
  592.         int pos;
  593.         int pos1;
  594.         int pos2;
  595.         final int rmsOff = -in;
  596.         final int wrkOff = -(in + 1);
  597.         final double[] rms = new double[nvars - in];
  598.         final double[] work = new double[nvars - in - 1];
  599.         double sumxx;
  600.         double sumxy;
  601.         double sumyy;
  602.         final int offXX = (nvars - in) * (nvars - in - 1) / 2;
  603.         if (in < -1 || in >= nvars) {
  604.             return null;
  605.         }
  606.         final int nvm = nvars - 1;
  607.         final int basePos = r.length - (nvm - in) * (nvm - in + 1) / 2;
  608.         if (d[in] > 0.0) {
  609.             rms[in + rmsOff] = 1.0 / JdkMath.sqrt(d[in]);
  610.         }
  611.         for (int col = in + 1; col < nvars; col++) {
  612.             pos = basePos + col - 1 - in;
  613.             sumxx = d[col];
  614.             for (int row = in; row < col; row++) {
  615.                 sumxx += d[row] * r[pos] * r[pos];
  616.                 pos += nvars - row - 2;
  617.             }
  618.             if (sumxx > 0.0) {
  619.                 rms[col + rmsOff] = 1.0 / JdkMath.sqrt(sumxx);
  620.             } else {
  621.                 rms[col + rmsOff] = 0.0;
  622.             }
  623.         }
  624.         sumyy = sserr;
  625.         for (int row = in; row < nvars; row++) {
  626.             sumyy += d[row] * rhs[row] * rhs[row];
  627.         }
  628.         if (sumyy > 0.0) {
  629.             sumyy = 1.0 / JdkMath.sqrt(sumyy);
  630.         }
  631.         pos = 0;
  632.         for (int col1 = in; col1 < nvars; col1++) {
  633.             sumxy = 0.0;
  634.             Arrays.fill(work, 0.0);
  635.             pos1 = basePos + col1 - in - 1;
  636.             for (int row = in; row < col1; row++) {
  637.                 pos2 = pos1 + 1;
  638.                 for (int col2 = col1 + 1; col2 < nvars; col2++) {
  639.                     work[col2 + wrkOff] += d[row] * r[pos1] * r[pos2];
  640.                     pos2++;
  641.                 }
  642.                 sumxy += d[row] * r[pos1] * rhs[row];
  643.                 pos1 += nvars - row - 2;
  644.             }
  645.             pos2 = pos1 + 1;
  646.             for (int col2 = col1 + 1; col2 < nvars; col2++) {
  647.                 work[col2 + wrkOff] += d[col1] * r[pos2];
  648.                 ++pos2;
  649.                 output[ (col2 - 1 - in) * (col2 - in) / 2 + col1 - in] =
  650.                         work[col2 + wrkOff] * rms[col1 + rmsOff] * rms[col2 + rmsOff];
  651.                 ++pos;
  652.             }
  653.             sumxy += d[col1] * rhs[col1];
  654.             output[col1 + rmsOff + offXX] = sumxy * rms[col1 + rmsOff] * sumyy;
  655.         }

  656.         return output;
  657.     }

  658.     /**
  659.      * ALGORITHM AS274 APPL. STATIST. (1992) VOL.41, NO. 2.
  660.      * Move variable from position FROM to position TO in an
  661.      * orthogonal reduction produced by AS75.1.
  662.      *
  663.      * @param from initial position
  664.      * @param to destination
  665.      */
  666.     private void vmove(int from, int to) {
  667.         double d1;
  668.         double d2;
  669.         double x;
  670.         double d1new;
  671.         double d2new;
  672.         double cbar;
  673.         double sbar;
  674.         double y;
  675.         int first;
  676.         int inc;
  677.         int m1;
  678.         int m2;
  679.         int mp1;
  680.         int pos;
  681.         boolean bSkipTo40 = false;
  682.         if (from == to) {
  683.             return;
  684.         }
  685.         if (!this.rssSet) {
  686.             ss();
  687.         }
  688.         int count = 0;
  689.         if (from < to) {
  690.             first = from;
  691.             inc = 1;
  692.             count = to - from;
  693.         } else {
  694.             first = from - 1;
  695.             inc = -1;
  696.             count = from - to;
  697.         }

  698.         int m = first;
  699.         int idx = 0;
  700.         while (idx < count) {
  701.             m1 = m * (nvars + nvars - m - 1) / 2;
  702.             m2 = m1 + nvars - m - 1;
  703.             mp1 = m + 1;

  704.             d1 = d[m];
  705.             d2 = d[mp1];
  706.             // Special cases.
  707.             if (d1 > this.epsilon || d2 > this.epsilon) {
  708.                 x = r[m1];
  709.                 if (JdkMath.abs(x) * JdkMath.sqrt(d1) < tol[mp1]) {
  710.                     x = 0.0;
  711.                 }
  712.                 if (d1 < this.epsilon || JdkMath.abs(x) < this.epsilon) {
  713.                     d[m] = d2;
  714.                     d[mp1] = d1;
  715.                     r[m1] = 0.0;
  716.                     for (int col = m + 2; col < nvars; col++) {
  717.                         ++m1;
  718.                         x = r[m1];
  719.                         r[m1] = r[m2];
  720.                         r[m2] = x;
  721.                         ++m2;
  722.                     }
  723.                     x = rhs[m];
  724.                     rhs[m] = rhs[mp1];
  725.                     rhs[mp1] = x;
  726.                     bSkipTo40 = true;
  727.                     //break;
  728.                 } else if (d2 < this.epsilon) {
  729.                     d[m] = d1 * x * x;
  730.                     r[m1] = 1.0 / x;
  731.                     for (int i = m1 + 1; i < m1 + nvars - m - 1; i++) {
  732.                         r[i] /= x;
  733.                     }
  734.                     rhs[m] /= x;
  735.                     bSkipTo40 = true;
  736.                     //break;
  737.                 }
  738.                 if (!bSkipTo40) {
  739.                     d1new = d2 + d1 * x * x;
  740.                     cbar = d2 / d1new;
  741.                     sbar = x * d1 / d1new;
  742.                     d2new = d1 * cbar;
  743.                     d[m] = d1new;
  744.                     d[mp1] = d2new;
  745.                     r[m1] = sbar;
  746.                     for (int col = m + 2; col < nvars; col++) {
  747.                         ++m1;
  748.                         y = r[m1];
  749.                         r[m1] = cbar * r[m2] + sbar * y;
  750.                         r[m2] = y - x * r[m2];
  751.                         ++m2;
  752.                     }
  753.                     y = rhs[m];
  754.                     rhs[m] = cbar * rhs[mp1] + sbar * y;
  755.                     rhs[mp1] = y - x * rhs[mp1];
  756.                 }
  757.             }
  758.             if (m > 0) {
  759.                 pos = m;
  760.                 for (int row = 0; row < m; row++) {
  761.                     x = r[pos];
  762.                     r[pos] = r[pos - 1];
  763.                     r[pos - 1] = x;
  764.                     pos += nvars - row - 2;
  765.                 }
  766.             }
  767.             // Adjust variable order (VORDER), the tolerances (TOL) and
  768.             // the vector of residual sums of squares (RSS).
  769.             m1 = vorder[m];
  770.             vorder[m] = vorder[mp1];
  771.             vorder[mp1] = m1;
  772.             x = tol[m];
  773.             tol[m] = tol[mp1];
  774.             tol[mp1] = x;
  775.             rss[m] = rss[mp1] + d[mp1] * rhs[mp1] * rhs[mp1];

  776.             m += inc;
  777.             ++idx;
  778.         }
  779.     }

  780.     /**
  781.      * ALGORITHM AS274  APPL. STATIST. (1992) VOL.41, NO. 2
  782.      *
  783.      * <p> Re-order the variables in an orthogonal reduction produced by
  784.      * AS75.1 so that the N variables in LIST start at position POS1,
  785.      * though will not necessarily be in the same order as in LIST.
  786.      * Any variables in VORDER before position POS1 are not moved.
  787.      * Auxiliary routine called: VMOVE. </p>
  788.      *
  789.      * <p>This internal method reorders the regressors.</p>
  790.      *
  791.      * @param list the regressors to move
  792.      * @param pos1 where the list will be placed
  793.      * @return -1 error, 0 everything ok
  794.      */
  795.     private int reorderRegressors(int[] list, int pos1) {
  796.         int next;
  797.         int i;
  798.         int l;
  799.         if (list.length < 1 || list.length > nvars + 1 - pos1) {
  800.             return -1;
  801.         }
  802.         next = pos1;
  803.         i = pos1;
  804.         while (i < nvars) {
  805.             l = vorder[i];
  806.             for (int j = 0; j < list.length; j++) {
  807.                 if (l == list[j] && i > next) {
  808.                     this.vmove(i, next);
  809.                     ++next;
  810.                     if (next >= list.length + pos1) {
  811.                         return 0;
  812.                     } else {
  813.                         break;
  814.                     }
  815.                 }
  816.             }
  817.             ++i;
  818.         }
  819.         return 0;
  820.     }

  821.     /**
  822.      * Gets the diagonal of the Hat matrix also known as the leverage matrix.
  823.      *
  824.      * @param  rowData returns the diagonal of the hat matrix for this observation
  825.      * @return the diagonal element of the hatmatrix
  826.      */
  827.     public double getDiagonalOfHatMatrix(double[] rowData) {
  828.         double[] wk = new double[this.nvars];
  829.         int pos;
  830.         double total;

  831.         if (rowData.length > nvars) {
  832.             return Double.NaN;
  833.         }
  834.         double[] xrow;
  835.         if (this.hasIntercept) {
  836.             xrow = new double[rowData.length + 1];
  837.             xrow[0] = 1.0;
  838.             System.arraycopy(rowData, 0, xrow, 1, rowData.length);
  839.         } else {
  840.             xrow = rowData;
  841.         }
  842.         double hii = 0.0;
  843.         for (int col = 0; col < xrow.length; col++) {
  844.             if (JdkMath.sqrt(d[col]) < tol[col]) {
  845.                 wk[col] = 0.0;
  846.             } else {
  847.                 pos = col - 1;
  848.                 total = xrow[col];
  849.                 for (int row = 0; row < col; row++) {
  850.                     total = smartAdd(total, -wk[row] * r[pos]);
  851.                     pos += nvars - row - 2;
  852.                 }
  853.                 wk[col] = total;
  854.                 hii = smartAdd(hii, (total * total) / d[col]);
  855.             }
  856.         }
  857.         return hii;
  858.     }

  859.     /**
  860.      * Gets the order of the regressors, useful if some type of reordering
  861.      * has been called. Calling regress with int[]{} args will trigger
  862.      * a reordering.
  863.      *
  864.      * @return int[] with the current order of the regressors
  865.      */
  866.     public int[] getOrderOfRegressors(){
  867.         return Arrays.copyOf(vorder, vorder.length);
  868.     }

  869.     /**
  870.      * Conducts a regression on the data in the model, using all regressors.
  871.      *
  872.      * @return RegressionResults the structure holding all regression results
  873.      * @exception  ModelSpecificationException - thrown if number of observations is
  874.      * less than the number of variables
  875.      */
  876.     @Override
  877.     public RegressionResults regress() throws ModelSpecificationException {
  878.         return regress(this.nvars);
  879.     }

  880.     /**
  881.      * Conducts a regression on the data in the model, using a subset of regressors.
  882.      *
  883.      * @param numberOfRegressors many of the regressors to include (either in canonical
  884.      * order, or in the current reordered state)
  885.      * @return RegressionResults the structure holding all regression results
  886.      * @exception  ModelSpecificationException - thrown if number of observations is
  887.      * less than the number of variables or number of regressors requested
  888.      * is greater than the regressors in the model
  889.      */
  890.     public RegressionResults regress(int numberOfRegressors) throws ModelSpecificationException {
  891.         if (this.nobs <= numberOfRegressors) {
  892.            throw new ModelSpecificationException(
  893.                    LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
  894.                    this.nobs, numberOfRegressors);
  895.         }
  896.         if( numberOfRegressors > this.nvars ){
  897.             throw new ModelSpecificationException(
  898.                     LocalizedFormats.TOO_MANY_REGRESSORS, numberOfRegressors, this.nvars);
  899.         }

  900.         tolset();
  901.         singcheck();

  902.         double[] beta = this.regcf(numberOfRegressors);

  903.         ss();

  904.         double[] cov = this.cov(numberOfRegressors);

  905.         int rnk = 0;
  906.         for (int i = 0; i < this.lindep.length; i++) {
  907.             if (!this.lindep[i]) {
  908.                 ++rnk;
  909.             }
  910.         }

  911.         boolean needsReorder = false;
  912.         for (int i = 0; i < numberOfRegressors; i++) {
  913.             if (this.vorder[i] != i) {
  914.                 needsReorder = true;
  915.                 break;
  916.             }
  917.         }
  918.         if (!needsReorder) {
  919.             return new RegressionResults(
  920.                     beta, new double[][]{cov}, true, this.nobs, rnk,
  921.                     this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
  922.         } else {
  923.             double[] betaNew = new double[beta.length];
  924.             double[] covNew = new double[cov.length];

  925.             int[] newIndices = new int[beta.length];
  926.             for (int i = 0; i < nvars; i++) {
  927.                 for (int j = 0; j < numberOfRegressors; j++) {
  928.                     if (this.vorder[j] == i) {
  929.                         betaNew[i] = beta[ j];
  930.                         newIndices[i] = j;
  931.                     }
  932.                 }
  933.             }

  934.             int idx1 = 0;
  935.             int idx2;
  936.             int ii;
  937.             int jj;
  938.             for (int i = 0; i < beta.length; i++) {
  939.                 ii = newIndices[i];
  940.                 for (int j = 0; j <= i; j++, idx1++) {
  941.                     jj = newIndices[j];
  942.                     if (ii > jj) {
  943.                         idx2 = ii * (ii + 1) / 2 + jj;
  944.                     } else {
  945.                         idx2 = jj * (jj + 1) / 2 + ii;
  946.                     }
  947.                     covNew[idx1] = cov[idx2];
  948.                 }
  949.             }
  950.             return new RegressionResults(
  951.                     betaNew, new double[][]{covNew}, true, this.nobs, rnk,
  952.                     this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
  953.         }
  954.     }

  955.     /**
  956.      * Conducts a regression on the data in the model, using regressors in array
  957.      * Calling this method will change the internal order of the regressors
  958.      * and care is required in interpreting the hatmatrix.
  959.      *
  960.      * @param  variablesToInclude array of variables to include in regression
  961.      * @return RegressionResults the structure holding all regression results
  962.      * @exception  ModelSpecificationException - thrown if number of observations is
  963.      * less than the number of variables, the number of regressors requested
  964.      * is greater than the regressors in the model or a regressor index in
  965.      * regressor array does not exist
  966.      */
  967.     @Override
  968.     public RegressionResults regress(int[] variablesToInclude) throws ModelSpecificationException {
  969.         if (variablesToInclude.length > this.nvars) {
  970.             throw new ModelSpecificationException(
  971.                     LocalizedFormats.TOO_MANY_REGRESSORS, variablesToInclude.length, this.nvars);
  972.         }
  973.         if (this.nobs <= this.nvars) {
  974.             throw new ModelSpecificationException(
  975.                     LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
  976.                     this.nobs, this.nvars);
  977.         }
  978.         Arrays.sort(variablesToInclude);
  979.         int iExclude = 0;
  980.         for (int i = 0; i < variablesToInclude.length; i++) {
  981.             if (i >= this.nvars) {
  982.                 throw new ModelSpecificationException(
  983.                         LocalizedFormats.INDEX_LARGER_THAN_MAX, i, this.nvars);
  984.             }
  985.             if (i > 0 && variablesToInclude[i] == variablesToInclude[i - 1]) {
  986.                 variablesToInclude[i] = -1;
  987.                 ++iExclude;
  988.             }
  989.         }
  990.         int[] series;
  991.         if (iExclude > 0) {
  992.             int j = 0;
  993.             series = new int[variablesToInclude.length - iExclude];
  994.             for (int i = 0; i < variablesToInclude.length; i++) {
  995.                 if (variablesToInclude[i] > -1) {
  996.                     series[j] = variablesToInclude[i];
  997.                     ++j;
  998.                 }
  999.             }
  1000.         } else {
  1001.             series = variablesToInclude;
  1002.         }

  1003.         reorderRegressors(series, 0);
  1004.         tolset();
  1005.         singcheck();

  1006.         double[] beta = this.regcf(series.length);

  1007.         ss();

  1008.         double[] cov = this.cov(series.length);

  1009.         int rnk = 0;
  1010.         for (int i = 0; i < this.lindep.length; i++) {
  1011.             if (!this.lindep[i]) {
  1012.                 ++rnk;
  1013.             }
  1014.         }

  1015.         boolean needsReorder = false;
  1016.         for (int i = 0; i < series.length; i++) {
  1017.             if (this.vorder[i] != series[i]) {
  1018.                 needsReorder = true;
  1019.                 break;
  1020.             }
  1021.         }
  1022.         if (!needsReorder) {
  1023.             return new RegressionResults(
  1024.                     beta, new double[][]{cov}, true, this.nobs, rnk,
  1025.                     this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
  1026.         } else {
  1027.             double[] betaNew = new double[beta.length];
  1028.             int[] newIndices = new int[beta.length];
  1029.             for (int i = 0; i < series.length; i++) {
  1030.                 for (int j = 0; j < this.vorder.length; j++) {
  1031.                     if (this.vorder[j] == series[i]) {
  1032.                         betaNew[i] = beta[ j];
  1033.                         newIndices[i] = j;
  1034.                     }
  1035.                 }
  1036.             }
  1037.             double[] covNew = new double[cov.length];
  1038.             int idx1 = 0;
  1039.             int idx2;
  1040.             int ii;
  1041.             int jj;
  1042.             for (int i = 0; i < beta.length; i++) {
  1043.                 ii = newIndices[i];
  1044.                 for (int j = 0; j <= i; j++, idx1++) {
  1045.                     jj = newIndices[j];
  1046.                     if (ii > jj) {
  1047.                         idx2 = ii * (ii + 1) / 2 + jj;
  1048.                     } else {
  1049.                         idx2 = jj * (jj + 1) / 2 + ii;
  1050.                     }
  1051.                     covNew[idx1] = cov[idx2];
  1052.                 }
  1053.             }
  1054.             return new RegressionResults(
  1055.                     betaNew, new double[][]{covNew}, true, this.nobs, rnk,
  1056.                     this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
  1057.         }
  1058.     }
  1059. }