001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.math3.stat.regression;
018
019import java.util.Arrays;
020import org.apache.commons.math3.exception.util.LocalizedFormats;
021import org.apache.commons.math3.util.FastMath;
022import org.apache.commons.math3.util.Precision;
023import org.apache.commons.math3.util.MathArrays;
024
025/**
026 * This class is a concrete implementation of the {@link UpdatingMultipleLinearRegression} interface.
027 *
028 * <p>The algorithm is described in: <pre>
029 * Algorithm AS 274: Least Squares Routines to Supplement Those of Gentleman
030 * Author(s): Alan J. Miller
031 * Source: Journal of the Royal Statistical Society.
032 * Series C (Applied Statistics), Vol. 41, No. 2
033 * (1992), pp. 458-478
034 * Published by: Blackwell Publishing for the Royal Statistical Society
035 * Stable URL: http://www.jstor.org/stable/2347583 </pre></p>
036 *
037 * <p>This method for multiple regression forms the solution to the OLS problem
038 * by updating the QR decomposition as described by Gentleman.</p>
039 *
040 * @since 3.0
041 */
042public class MillerUpdatingRegression implements UpdatingMultipleLinearRegression {
043
044    /** number of variables in regression */
045    private final int nvars;
046    /** diagonals of cross products matrix */
047    private final double[] d;
048    /** the elements of the R`Y */
049    private final double[] rhs;
050    /** the off diagonal portion of the R matrix */
051    private final double[] r;
052    /** the tolerance for each of the variables */
053    private final double[] tol;
054    /** residual sum of squares for all nested regressions */
055    private final double[] rss;
056    /** order of the regressors */
057    private final int[] vorder;
058    /** scratch space for tolerance calc */
059    private final double[] work_tolset;
060    /** number of observations entered */
061    private long nobs = 0;
062    /** sum of squared errors of largest regression */
063    private double sserr = 0.0;
064    /** has rss been called? */
065    private boolean rss_set = false;
066    /** has the tolerance setting method been called */
067    private boolean tol_set = false;
068    /** flags for variables with linear dependency problems */
069    private final boolean[] lindep;
070    /** singular x values */
071    private final double[] x_sing;
072    /** workspace for singularity method */
073    private final double[] work_sing;
074    /** summation of Y variable */
075    private double sumy = 0.0;
076    /** summation of squared Y values */
077    private double sumsqy = 0.0;
078    /** boolean flag whether a regression constant is added */
079    private boolean hasIntercept;
080    /** zero tolerance */
081    private final double epsilon;
082    /**
083     *  Set the default constructor to private access
084     *  to prevent inadvertent instantiation
085     */
086    @SuppressWarnings("unused")
087    private MillerUpdatingRegression() {
088        this(-1, false, Double.NaN);
089    }
090
091    /**
092     * This is the augmented constructor for the MillerUpdatingRegression class.
093     *
094     * @param numberOfVariables number of regressors to expect, not including constant
095     * @param includeConstant include a constant automatically
096     * @param errorTolerance  zero tolerance, how machine zero is determined
097     * @throws ModelSpecificationException if {@code numberOfVariables is less than 1}
098     */
099    public MillerUpdatingRegression(int numberOfVariables, boolean includeConstant, double errorTolerance)
100    throws ModelSpecificationException {
101        if (numberOfVariables < 1) {
102            throw new ModelSpecificationException(LocalizedFormats.NO_REGRESSORS);
103        }
104        if (includeConstant) {
105            this.nvars = numberOfVariables + 1;
106        } else {
107            this.nvars = numberOfVariables;
108        }
109        this.hasIntercept = includeConstant;
110        this.nobs = 0;
111        this.d = new double[this.nvars];
112        this.rhs = new double[this.nvars];
113        this.r = new double[this.nvars * (this.nvars - 1) / 2];
114        this.tol = new double[this.nvars];
115        this.rss = new double[this.nvars];
116        this.vorder = new int[this.nvars];
117        this.x_sing = new double[this.nvars];
118        this.work_sing = new double[this.nvars];
119        this.work_tolset = new double[this.nvars];
120        this.lindep = new boolean[this.nvars];
121        for (int i = 0; i < this.nvars; i++) {
122            vorder[i] = i;
123        }
124        if (errorTolerance > 0) {
125            this.epsilon = errorTolerance;
126        } else {
127            this.epsilon = -errorTolerance;
128        }
129    }
130
131    /**
132     * Primary constructor for the MillerUpdatingRegression.
133     *
134     * @param numberOfVariables maximum number of potential regressors
135     * @param includeConstant include a constant automatically
136     * @throws ModelSpecificationException if {@code numberOfVariables is less than 1}
137     */
138    public MillerUpdatingRegression(int numberOfVariables, boolean includeConstant)
139    throws ModelSpecificationException {
140        this(numberOfVariables, includeConstant, Precision.EPSILON);
141    }
142
143    /**
144     * A getter method which determines whether a constant is included.
145     * @return true regression has an intercept, false no intercept
146     */
147    public boolean hasIntercept() {
148        return this.hasIntercept;
149    }
150
151    /**
152     * Gets the number of observations added to the regression model.
153     * @return number of observations
154     */
155    public long getN() {
156        return this.nobs;
157    }
158
159    /**
160     * Adds an observation to the regression model.
161     * @param x the array with regressor values
162     * @param y  the value of dependent variable given these regressors
163     * @exception ModelSpecificationException if the length of {@code x} does not equal
164     * the number of independent variables in the model
165     */
166    public void addObservation(final double[] x, final double y)
167    throws ModelSpecificationException {
168
169        if ((!this.hasIntercept && x.length != nvars) ||
170               (this.hasIntercept && x.length + 1 != nvars)) {
171            throw new ModelSpecificationException(LocalizedFormats.INVALID_REGRESSION_OBSERVATION,
172                    x.length, nvars);
173        }
174        if (!this.hasIntercept) {
175            include(MathArrays.copyOf(x, x.length), 1.0, y);
176        } else {
177            final double[] tmp = new double[x.length + 1];
178            System.arraycopy(x, 0, tmp, 1, x.length);
179            tmp[0] = 1.0;
180            include(tmp, 1.0, y);
181        }
182        ++nobs;
183
184    }
185
186    /**
187     * Adds multiple observations to the model.
188     * @param x observations on the regressors
189     * @param y observations on the regressand
190     * @throws ModelSpecificationException if {@code x} is not rectangular, does not match
191     * the length of {@code y} or does not contain sufficient data to estimate the model
192     */
193    public void addObservations(double[][] x, double[] y) throws ModelSpecificationException {
194        if ((x == null) || (y == null) || (x.length != y.length)) {
195            throw new ModelSpecificationException(
196                  LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE,
197                  (x == null) ? 0 : x.length,
198                  (y == null) ? 0 : y.length);
199        }
200        if (x.length == 0) {  // Must be no y data either
201            throw new ModelSpecificationException(
202                    LocalizedFormats.NO_DATA);
203        }
204        if (x[0].length + 1 > x.length) {
205            throw new ModelSpecificationException(
206                  LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
207                  x.length, x[0].length);
208        }
209        for (int i = 0; i < x.length; i++) {
210            addObservation(x[i], y[i]);
211        }
212    }
213
214    /**
215     * The include method is where the QR decomposition occurs. This statement forms all
216     * intermediate data which will be used for all derivative measures.
217     * According to the miller paper, note that in the original implementation the x vector
218     * is overwritten. In this implementation, the include method is passed a copy of the
219     * original data vector so that there is no contamination of the data. Additionally,
220     * this method differs slightly from Gentleman's method, in that the assumption is
221     * of dense design matrices, there is some advantage in using the original gentleman algorithm
222     * on sparse matrices.
223     *
224     * @param x observations on the regressors
225     * @param wi weight of the this observation (-1,1)
226     * @param yi observation on the regressand
227     */
228    private void include(final double[] x, final double wi, final double yi) {
229        int nextr = 0;
230        double w = wi;
231        double y = yi;
232        double xi;
233        double di;
234        double wxi;
235        double dpi;
236        double xk;
237        double _w;
238        this.rss_set = false;
239        sumy = smartAdd(yi, sumy);
240        sumsqy = smartAdd(sumsqy, yi * yi);
241        for (int i = 0; i < x.length; i++) {
242            if (w == 0.0) {
243                return;
244            }
245            xi = x[i];
246
247            if (xi == 0.0) {
248                nextr += nvars - i - 1;
249                continue;
250            }
251            di = d[i];
252            wxi = w * xi;
253            _w = w;
254            if (di != 0.0) {
255                dpi = smartAdd(di, wxi * xi);
256                final double tmp = wxi * xi / di;
257                if (FastMath.abs(tmp) > Precision.EPSILON) {
258                    w = (di * w) / dpi;
259                }
260            } else {
261                dpi = wxi * xi;
262                w = 0.0;
263            }
264            d[i] = dpi;
265            for (int k = i + 1; k < nvars; k++) {
266                xk = x[k];
267                x[k] = smartAdd(xk, -xi * r[nextr]);
268                if (di != 0.0) {
269                    r[nextr] = smartAdd(di * r[nextr], (_w * xi) * xk) / dpi;
270                } else {
271                    r[nextr] = xk / xi;
272                }
273                ++nextr;
274            }
275            xk = y;
276            y = smartAdd(xk, -xi * rhs[i]);
277            if (di != 0.0) {
278                rhs[i] = smartAdd(di * rhs[i], wxi * xk) / dpi;
279            } else {
280                rhs[i] = xk / xi;
281            }
282        }
283        sserr = smartAdd(sserr, w * y * y);
284    }
285
286    /**
287     * Adds to number a and b such that the contamination due to
288     * numerical smallness of one addend does not corrupt the sum.
289     * @param a - an addend
290     * @param b - an addend
291     * @return the sum of the a and b
292     */
293    private double smartAdd(double a, double b) {
294        final double _a = FastMath.abs(a);
295        final double _b = FastMath.abs(b);
296        if (_a > _b) {
297            final double eps = _a * Precision.EPSILON;
298            if (_b > eps) {
299                return a + b;
300            }
301            return a;
302        } else {
303            final double eps = _b * Precision.EPSILON;
304            if (_a > eps) {
305                return a + b;
306            }
307            return b;
308        }
309    }
310
311    /**
312     * As the name suggests,  clear wipes the internals and reorders everything in the
313     * canonical order.
314     */
315    public void clear() {
316        Arrays.fill(this.d, 0.0);
317        Arrays.fill(this.rhs, 0.0);
318        Arrays.fill(this.r, 0.0);
319        Arrays.fill(this.tol, 0.0);
320        Arrays.fill(this.rss, 0.0);
321        Arrays.fill(this.work_tolset, 0.0);
322        Arrays.fill(this.work_sing, 0.0);
323        Arrays.fill(this.x_sing, 0.0);
324        Arrays.fill(this.lindep, false);
325        for (int i = 0; i < nvars; i++) {
326            this.vorder[i] = i;
327        }
328        this.nobs = 0;
329        this.sserr = 0.0;
330        this.sumy = 0.0;
331        this.sumsqy = 0.0;
332        this.rss_set = false;
333        this.tol_set = false;
334    }
335
336    /**
337     * This sets up tolerances for singularity testing.
338     */
339    private void tolset() {
340        int pos;
341        double total;
342        final double eps = this.epsilon;
343        for (int i = 0; i < nvars; i++) {
344            this.work_tolset[i] = FastMath.sqrt(d[i]);
345        }
346        tol[0] = eps * this.work_tolset[0];
347        for (int col = 1; col < nvars; col++) {
348            pos = col - 1;
349            total = work_tolset[col];
350            for (int row = 0; row < col; row++) {
351                total += FastMath.abs(r[pos]) * work_tolset[row];
352                pos += nvars - row - 2;
353            }
354            tol[col] = eps * total;
355        }
356        tol_set = true;
357    }
358
359    /**
360     * The regcf method conducts the linear regression and extracts the
361     * parameter vector. Notice that the algorithm can do subset regression
362     * with no alteration.
363     *
364     * @param nreq how many of the regressors to include (either in canonical
365     * order, or in the current reordered state)
366     * @return an array with the estimated slope coefficients
367     * @throws ModelSpecificationException if {@code nreq} is less than 1
368     * or greater than the number of independent variables
369     */
370    private double[] regcf(int nreq) throws ModelSpecificationException {
371        int nextr;
372        if (nreq < 1) {
373            throw new ModelSpecificationException(LocalizedFormats.NO_REGRESSORS);
374        }
375        if (nreq > this.nvars) {
376            throw new ModelSpecificationException(
377                    LocalizedFormats.TOO_MANY_REGRESSORS, nreq, this.nvars);
378        }
379        if (!this.tol_set) {
380            tolset();
381        }
382        final double[] ret = new double[nreq];
383        boolean rankProblem = false;
384        for (int i = nreq - 1; i > -1; i--) {
385            if (FastMath.sqrt(d[i]) < tol[i]) {
386                ret[i] = 0.0;
387                d[i] = 0.0;
388                rankProblem = true;
389            } else {
390                ret[i] = rhs[i];
391                nextr = i * (nvars + nvars - i - 1) / 2;
392                for (int j = i + 1; j < nreq; j++) {
393                    ret[i] = smartAdd(ret[i], -r[nextr] * ret[j]);
394                    ++nextr;
395                }
396            }
397        }
398        if (rankProblem) {
399            for (int i = 0; i < nreq; i++) {
400                if (this.lindep[i]) {
401                    ret[i] = Double.NaN;
402                }
403            }
404        }
405        return ret;
406    }
407
408    /**
409     * The method which checks for singularities and then eliminates the offending
410     * columns.
411     */
412    private void singcheck() {
413        int pos;
414        for (int i = 0; i < nvars; i++) {
415            work_sing[i] = FastMath.sqrt(d[i]);
416        }
417        for (int col = 0; col < nvars; col++) {
418            // Set elements within R to zero if they are less than tol(col) in
419            // absolute value after being scaled by the square root of their row
420            // multiplier
421            final double temp = tol[col];
422            pos = col - 1;
423            for (int row = 0; row < col - 1; row++) {
424                if (FastMath.abs(r[pos]) * work_sing[row] < temp) {
425                    r[pos] = 0.0;
426                }
427                pos += nvars - row - 2;
428            }
429            // If diagonal element is near zero, set it to zero, set appropriate
430            // element of LINDEP, and use INCLUD to augment the projections in
431            // the lower rows of the orthogonalization.
432            lindep[col] = false;
433            if (work_sing[col] < temp) {
434                lindep[col] = true;
435                if (col < nvars - 1) {
436                    Arrays.fill(x_sing, 0.0);
437                    int _pi = col * (nvars + nvars - col - 1) / 2;
438                    for (int _xi = col + 1; _xi < nvars; _xi++, _pi++) {
439                        x_sing[_xi] = r[_pi];
440                        r[_pi] = 0.0;
441                    }
442                    final double y = rhs[col];
443                    final double weight = d[col];
444                    d[col] = 0.0;
445                    rhs[col] = 0.0;
446                    this.include(x_sing, weight, y);
447                } else {
448                    sserr += d[col] * rhs[col] * rhs[col];
449                }
450            }
451        }
452    }
453
454    /**
455     * Calculates the sum of squared errors for the full regression
456     * and all subsets in the following manner: <pre>
457     * rss[] ={
458     * ResidualSumOfSquares_allNvars,
459     * ResidualSumOfSquares_FirstNvars-1,
460     * ResidualSumOfSquares_FirstNvars-2,
461     * ..., ResidualSumOfSquares_FirstVariable} </pre>
462     */
463    private void ss() {
464        double total = sserr;
465        rss[nvars - 1] = sserr;
466        for (int i = nvars - 1; i > 0; i--) {
467            total += d[i] * rhs[i] * rhs[i];
468            rss[i - 1] = total;
469        }
470        rss_set = true;
471    }
472
473    /**
474     * Calculates the cov matrix assuming only the first nreq variables are
475     * included in the calculation. The returned array contains a symmetric
476     * matrix stored in lower triangular form. The matrix will have
477     * ( nreq + 1 ) * nreq / 2 elements. For illustration <pre>
478     * cov =
479     * {
480     *  cov_00,
481     *  cov_10, cov_11,
482     *  cov_20, cov_21, cov22,
483     *  ...
484     * } </pre>
485     *
486     * @param nreq how many of the regressors to include (either in canonical
487     * order, or in the current reordered state)
488     * @return an array with the variance covariance of the included
489     * regressors in lower triangular form
490     */
491    private double[] cov(int nreq) {
492        if (this.nobs <= nreq) {
493            return null;
494        }
495        double rnk = 0.0;
496        for (int i = 0; i < nreq; i++) {
497            if (!this.lindep[i]) {
498                rnk += 1.0;
499            }
500        }
501        final double var = rss[nreq - 1] / (nobs - rnk);
502        final double[] rinv = new double[nreq * (nreq - 1) / 2];
503        inverse(rinv, nreq);
504        final double[] covmat = new double[nreq * (nreq + 1) / 2];
505        Arrays.fill(covmat, Double.NaN);
506        int pos2;
507        int pos1;
508        int start = 0;
509        double total = 0;
510        for (int row = 0; row < nreq; row++) {
511            pos2 = start;
512            if (!this.lindep[row]) {
513                for (int col = row; col < nreq; col++) {
514                    if (!this.lindep[col]) {
515                        pos1 = start + col - row;
516                        if (row == col) {
517                            total = 1.0 / d[col];
518                        } else {
519                            total = rinv[pos1 - 1] / d[col];
520                        }
521                        for (int k = col + 1; k < nreq; k++) {
522                            if (!this.lindep[k]) {
523                                total += rinv[pos1] * rinv[pos2] / d[k];
524                            }
525                            ++pos1;
526                            ++pos2;
527                        }
528                        covmat[ (col + 1) * col / 2 + row] = total * var;
529                    } else {
530                        pos2 += nreq - col - 1;
531                    }
532                }
533            }
534            start += nreq - row - 1;
535        }
536        return covmat;
537    }
538
539    /**
540     * This internal method calculates the inverse of the upper-triangular portion
541     * of the R matrix.
542     * @param rinv  the storage for the inverse of r
543     * @param nreq how many of the regressors to include (either in canonical
544     * order, or in the current reordered state)
545     */
546    private void inverse(double[] rinv, int nreq) {
547        int pos = nreq * (nreq - 1) / 2 - 1;
548        int pos1 = -1;
549        int pos2 = -1;
550        double total = 0.0;
551        Arrays.fill(rinv, Double.NaN);
552        for (int row = nreq - 1; row > 0; --row) {
553            if (!this.lindep[row]) {
554                final int start = (row - 1) * (nvars + nvars - row) / 2;
555                for (int col = nreq; col > row; --col) {
556                    pos1 = start;
557                    pos2 = pos;
558                    total = 0.0;
559                    for (int k = row; k < col - 1; k++) {
560                        pos2 += nreq - k - 1;
561                        if (!this.lindep[k]) {
562                            total += -r[pos1] * rinv[pos2];
563                        }
564                        ++pos1;
565                    }
566                    rinv[pos] = total - r[pos1];
567                    --pos;
568                }
569            } else {
570                pos -= nreq - row;
571            }
572        }
573    }
574
575    /**
576     * In the original algorithm only the partial correlations of the regressors
577     * is returned to the user. In this implementation, we have <pre>
578     * corr =
579     * {
580     *   corrxx - lower triangular
581     *   corrxy - bottom row of the matrix
582     * }
583     * Replaces subroutines PCORR and COR of:
584     * ALGORITHM AS274  APPL. STATIST. (1992) VOL.41, NO. 2 </pre>
585     *
586     * <p>Calculate partial correlations after the variables in rows
587     * 1, 2, ..., IN have been forced into the regression.
588     * If IN = 1, and the first row of R represents a constant in the
589     * model, then the usual simple correlations are returned.</p>
590     *
591     * <p>If IN = 0, the value returned in array CORMAT for the correlation
592     * of variables Xi & Xj is: <pre>
593     * sum ( Xi.Xj ) / Sqrt ( sum (Xi^2) . sum (Xj^2) )</pre></p>
594     *
595     * <p>On return, array CORMAT contains the upper triangle of the matrix of
596     * partial correlations stored by rows, excluding the 1's on the diagonal.
597     * e.g. if IN = 2, the consecutive elements returned are:
598     * (3,4) (3,5) ... (3,ncol), (4,5) (4,6) ... (4,ncol), etc.
599     * Array YCORR stores the partial correlations with the Y-variable
600     * starting with YCORR(IN+1) = partial correlation with the variable in
601     * position (IN+1). </p>
602     *
603     * @param in how many of the regressors to include (either in canonical
604     * order, or in the current reordered state)
605     * @return an array with the partial correlations of the remainder of
606     * regressors with each other and the regressand, in lower triangular form
607     */
608    public double[] getPartialCorrelations(int in) {
609        final double[] output = new double[(nvars - in + 1) * (nvars - in) / 2];
610        int pos;
611        int pos1;
612        int pos2;
613        final int rms_off = -in;
614        final int wrk_off = -(in + 1);
615        final double[] rms = new double[nvars - in];
616        final double[] work = new double[nvars - in - 1];
617        double sumxx;
618        double sumxy;
619        double sumyy;
620        final int offXX = (nvars - in) * (nvars - in - 1) / 2;
621        if (in < -1 || in >= nvars) {
622            return null;
623        }
624        final int nvm = nvars - 1;
625        final int base_pos = r.length - (nvm - in) * (nvm - in + 1) / 2;
626        if (d[in] > 0.0) {
627            rms[in + rms_off] = 1.0 / FastMath.sqrt(d[in]);
628        }
629        for (int col = in + 1; col < nvars; col++) {
630            pos = base_pos + col - 1 - in;
631            sumxx = d[col];
632            for (int row = in; row < col; row++) {
633                sumxx += d[row] * r[pos] * r[pos];
634                pos += nvars - row - 2;
635            }
636            if (sumxx > 0.0) {
637                rms[col + rms_off] = 1.0 / FastMath.sqrt(sumxx);
638            } else {
639                rms[col + rms_off] = 0.0;
640            }
641        }
642        sumyy = sserr;
643        for (int row = in; row < nvars; row++) {
644            sumyy += d[row] * rhs[row] * rhs[row];
645        }
646        if (sumyy > 0.0) {
647            sumyy = 1.0 / FastMath.sqrt(sumyy);
648        }
649        pos = 0;
650        for (int col1 = in; col1 < nvars; col1++) {
651            sumxy = 0.0;
652            Arrays.fill(work, 0.0);
653            pos1 = base_pos + col1 - in - 1;
654            for (int row = in; row < col1; row++) {
655                pos2 = pos1 + 1;
656                for (int col2 = col1 + 1; col2 < nvars; col2++) {
657                    work[col2 + wrk_off] += d[row] * r[pos1] * r[pos2];
658                    pos2++;
659                }
660                sumxy += d[row] * r[pos1] * rhs[row];
661                pos1 += nvars - row - 2;
662            }
663            pos2 = pos1 + 1;
664            for (int col2 = col1 + 1; col2 < nvars; col2++) {
665                work[col2 + wrk_off] += d[col1] * r[pos2];
666                ++pos2;
667                output[ (col2 - 1 - in) * (col2 - in) / 2 + col1 - in] =
668                        work[col2 + wrk_off] * rms[col1 + rms_off] * rms[col2 + rms_off];
669                ++pos;
670            }
671            sumxy += d[col1] * rhs[col1];
672            output[col1 + rms_off + offXX] = sumxy * rms[col1 + rms_off] * sumyy;
673        }
674
675        return output;
676    }
677
678    /**
679     * ALGORITHM AS274 APPL. STATIST. (1992) VOL.41, NO. 2.
680     * Move variable from position FROM to position TO in an
681     * orthogonal reduction produced by AS75.1.
682     *
683     * @param from initial position
684     * @param to destination
685     */
686    private void vmove(int from, int to) {
687        double d1;
688        double d2;
689        double X;
690        double d1new;
691        double d2new;
692        double cbar;
693        double sbar;
694        double Y;
695        int first;
696        int inc;
697        int m1;
698        int m2;
699        int mp1;
700        int pos;
701        boolean bSkipTo40 = false;
702        if (from == to) {
703            return;
704        }
705        if (!this.rss_set) {
706            ss();
707        }
708        int count = 0;
709        if (from < to) {
710            first = from;
711            inc = 1;
712            count = to - from;
713        } else {
714            first = from - 1;
715            inc = -1;
716            count = from - to;
717        }
718
719        int m = first;
720        int idx = 0;
721        while (idx < count) {
722            m1 = m * (nvars + nvars - m - 1) / 2;
723            m2 = m1 + nvars - m - 1;
724            mp1 = m + 1;
725
726            d1 = d[m];
727            d2 = d[mp1];
728            // Special cases.
729            if (d1 > this.epsilon || d2 > this.epsilon) {
730                X = r[m1];
731                if (FastMath.abs(X) * FastMath.sqrt(d1) < tol[mp1]) {
732                    X = 0.0;
733                }
734                if (d1 < this.epsilon || FastMath.abs(X) < this.epsilon) {
735                    d[m] = d2;
736                    d[mp1] = d1;
737                    r[m1] = 0.0;
738                    for (int col = m + 2; col < nvars; col++) {
739                        ++m1;
740                        X = r[m1];
741                        r[m1] = r[m2];
742                        r[m2] = X;
743                        ++m2;
744                    }
745                    X = rhs[m];
746                    rhs[m] = rhs[mp1];
747                    rhs[mp1] = X;
748                    bSkipTo40 = true;
749                    //break;
750                } else if (d2 < this.epsilon) {
751                    d[m] = d1 * X * X;
752                    r[m1] = 1.0 / X;
753                    for (int _i = m1 + 1; _i < m1 + nvars - m - 1; _i++) {
754                        r[_i] /= X;
755                    }
756                    rhs[m] /= X;
757                    bSkipTo40 = true;
758                    //break;
759                }
760                if (!bSkipTo40) {
761                    d1new = d2 + d1 * X * X;
762                    cbar = d2 / d1new;
763                    sbar = X * d1 / d1new;
764                    d2new = d1 * cbar;
765                    d[m] = d1new;
766                    d[mp1] = d2new;
767                    r[m1] = sbar;
768                    for (int col = m + 2; col < nvars; col++) {
769                        ++m1;
770                        Y = r[m1];
771                        r[m1] = cbar * r[m2] + sbar * Y;
772                        r[m2] = Y - X * r[m2];
773                        ++m2;
774                    }
775                    Y = rhs[m];
776                    rhs[m] = cbar * rhs[mp1] + sbar * Y;
777                    rhs[mp1] = Y - X * rhs[mp1];
778                }
779            }
780            if (m > 0) {
781                pos = m;
782                for (int row = 0; row < m; row++) {
783                    X = r[pos];
784                    r[pos] = r[pos - 1];
785                    r[pos - 1] = X;
786                    pos += nvars - row - 2;
787                }
788            }
789            // Adjust variable order (VORDER), the tolerances (TOL) and
790            // the vector of residual sums of squares (RSS).
791            m1 = vorder[m];
792            vorder[m] = vorder[mp1];
793            vorder[mp1] = m1;
794            X = tol[m];
795            tol[m] = tol[mp1];
796            tol[mp1] = X;
797            rss[m] = rss[mp1] + d[mp1] * rhs[mp1] * rhs[mp1];
798
799            m += inc;
800            ++idx;
801        }
802    }
803
804    /**
805     * ALGORITHM AS274  APPL. STATIST. (1992) VOL.41, NO. 2
806     *
807     * <p> Re-order the variables in an orthogonal reduction produced by
808     * AS75.1 so that the N variables in LIST start at position POS1,
809     * though will not necessarily be in the same order as in LIST.
810     * Any variables in VORDER before position POS1 are not moved.
811     * Auxiliary routine called: VMOVE. </p>
812     *
813     * <p>This internal method reorders the regressors.</p>
814     *
815     * @param list the regressors to move
816     * @param pos1 where the list will be placed
817     * @return -1 error, 0 everything ok
818     */
819    private int reorderRegressors(int[] list, int pos1) {
820        int next;
821        int i;
822        int l;
823        if (list.length < 1 || list.length > nvars + 1 - pos1) {
824            return -1;
825        }
826        next = pos1;
827        i = pos1;
828        while (i < nvars) {
829            l = vorder[i];
830            for (int j = 0; j < list.length; j++) {
831                if (l == list[j] && i > next) {
832                    this.vmove(i, next);
833                    ++next;
834                    if (next >= list.length + pos1) {
835                        return 0;
836                    } else {
837                        break;
838                    }
839                }
840            }
841            ++i;
842        }
843        return 0;
844    }
845
846    /**
847     * Gets the diagonal of the Hat matrix also known as the leverage matrix.
848     *
849     * @param  row_data returns the diagonal of the hat matrix for this observation
850     * @return the diagonal element of the hatmatrix
851     */
852    public double getDiagonalOfHatMatrix(double[] row_data) {
853        double[] wk = new double[this.nvars];
854        int pos;
855        double total;
856
857        if (row_data.length > nvars) {
858            return Double.NaN;
859        }
860        double[] xrow;
861        if (this.hasIntercept) {
862            xrow = new double[row_data.length + 1];
863            xrow[0] = 1.0;
864            System.arraycopy(row_data, 0, xrow, 1, row_data.length);
865        } else {
866            xrow = row_data;
867        }
868        double hii = 0.0;
869        for (int col = 0; col < xrow.length; col++) {
870            if (FastMath.sqrt(d[col]) < tol[col]) {
871                wk[col] = 0.0;
872            } else {
873                pos = col - 1;
874                total = xrow[col];
875                for (int row = 0; row < col; row++) {
876                    total = smartAdd(total, -wk[row] * r[pos]);
877                    pos += nvars - row - 2;
878                }
879                wk[col] = total;
880                hii = smartAdd(hii, (total * total) / d[col]);
881            }
882        }
883        return hii;
884    }
885
886    /**
887     * Gets the order of the regressors, useful if some type of reordering
888     * has been called. Calling regress with int[]{} args will trigger
889     * a reordering.
890     *
891     * @return int[] with the current order of the regressors
892     */
893    public int[] getOrderOfRegressors(){
894        return MathArrays.copyOf(vorder);
895    }
896
897    /**
898     * Conducts a regression on the data in the model, using all regressors.
899     *
900     * @return RegressionResults the structure holding all regression results
901     * @exception  ModelSpecificationException - thrown if number of observations is
902     * less than the number of variables
903     */
904    public RegressionResults regress() throws ModelSpecificationException {
905        return regress(this.nvars);
906    }
907
908    /**
909     * Conducts a regression on the data in the model, using a subset of regressors.
910     *
911     * @param numberOfRegressors many of the regressors to include (either in canonical
912     * order, or in the current reordered state)
913     * @return RegressionResults the structure holding all regression results
914     * @exception  ModelSpecificationException - thrown if number of observations is
915     * less than the number of variables or number of regressors requested
916     * is greater than the regressors in the model
917     */
918    public RegressionResults regress(int numberOfRegressors) throws ModelSpecificationException {
919        if (this.nobs <= numberOfRegressors) {
920           throw new ModelSpecificationException(
921                   LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
922                   this.nobs, numberOfRegressors);
923        }
924        if( numberOfRegressors > this.nvars ){
925            throw new ModelSpecificationException(
926                    LocalizedFormats.TOO_MANY_REGRESSORS, numberOfRegressors, this.nvars);
927        }
928
929        tolset();
930        singcheck();
931
932        double[] beta = this.regcf(numberOfRegressors);
933
934        ss();
935
936        double[] cov = this.cov(numberOfRegressors);
937
938        int rnk = 0;
939        for (int i = 0; i < this.lindep.length; i++) {
940            if (!this.lindep[i]) {
941                ++rnk;
942            }
943        }
944
945        boolean needsReorder = false;
946        for (int i = 0; i < numberOfRegressors; i++) {
947            if (this.vorder[i] != i) {
948                needsReorder = true;
949                break;
950            }
951        }
952        if (!needsReorder) {
953            return new RegressionResults(
954                    beta, new double[][]{cov}, true, this.nobs, rnk,
955                    this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
956        } else {
957            double[] betaNew = new double[beta.length];
958            double[] covNew = new double[cov.length];
959
960            int[] newIndices = new int[beta.length];
961            for (int i = 0; i < nvars; i++) {
962                for (int j = 0; j < numberOfRegressors; j++) {
963                    if (this.vorder[j] == i) {
964                        betaNew[i] = beta[ j];
965                        newIndices[i] = j;
966                    }
967                }
968            }
969
970            int idx1 = 0;
971            int idx2;
972            int _i;
973            int _j;
974            for (int i = 0; i < beta.length; i++) {
975                _i = newIndices[i];
976                for (int j = 0; j <= i; j++, idx1++) {
977                    _j = newIndices[j];
978                    if (_i > _j) {
979                        idx2 = _i * (_i + 1) / 2 + _j;
980                    } else {
981                        idx2 = _j * (_j + 1) / 2 + _i;
982                    }
983                    covNew[idx1] = cov[idx2];
984                }
985            }
986            return new RegressionResults(
987                    betaNew, new double[][]{covNew}, true, this.nobs, rnk,
988                    this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
989        }
990    }
991
992    /**
993     * Conducts a regression on the data in the model, using regressors in array
994     * Calling this method will change the internal order of the regressors
995     * and care is required in interpreting the hatmatrix.
996     *
997     * @param  variablesToInclude array of variables to include in regression
998     * @return RegressionResults the structure holding all regression results
999     * @exception  ModelSpecificationException - thrown if number of observations is
1000     * less than the number of variables, the number of regressors requested
1001     * is greater than the regressors in the model or a regressor index in
1002     * regressor array does not exist
1003     */
1004    public RegressionResults regress(int[] variablesToInclude) throws ModelSpecificationException {
1005        if (variablesToInclude.length > this.nvars) {
1006            throw new ModelSpecificationException(
1007                    LocalizedFormats.TOO_MANY_REGRESSORS, variablesToInclude.length, this.nvars);
1008        }
1009        if (this.nobs <= this.nvars) {
1010            throw new ModelSpecificationException(
1011                    LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
1012                    this.nobs, this.nvars);
1013        }
1014        Arrays.sort(variablesToInclude);
1015        int iExclude = 0;
1016        for (int i = 0; i < variablesToInclude.length; i++) {
1017            if (i >= this.nvars) {
1018                throw new ModelSpecificationException(
1019                        LocalizedFormats.INDEX_LARGER_THAN_MAX, i, this.nvars);
1020            }
1021            if (i > 0 && variablesToInclude[i] == variablesToInclude[i - 1]) {
1022                variablesToInclude[i] = -1;
1023                ++iExclude;
1024            }
1025        }
1026        int[] series;
1027        if (iExclude > 0) {
1028            int j = 0;
1029            series = new int[variablesToInclude.length - iExclude];
1030            for (int i = 0; i < variablesToInclude.length; i++) {
1031                if (variablesToInclude[i] > -1) {
1032                    series[j] = variablesToInclude[i];
1033                    ++j;
1034                }
1035            }
1036        } else {
1037            series = variablesToInclude;
1038        }
1039
1040        reorderRegressors(series, 0);
1041        tolset();
1042        singcheck();
1043
1044        double[] beta = this.regcf(series.length);
1045
1046        ss();
1047
1048        double[] cov = this.cov(series.length);
1049
1050        int rnk = 0;
1051        for (int i = 0; i < this.lindep.length; i++) {
1052            if (!this.lindep[i]) {
1053                ++rnk;
1054            }
1055        }
1056
1057        boolean needsReorder = false;
1058        for (int i = 0; i < this.nvars; i++) {
1059            if (this.vorder[i] != series[i]) {
1060                needsReorder = true;
1061                break;
1062            }
1063        }
1064        if (!needsReorder) {
1065            return new RegressionResults(
1066                    beta, new double[][]{cov}, true, this.nobs, rnk,
1067                    this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
1068        } else {
1069            double[] betaNew = new double[beta.length];
1070            int[] newIndices = new int[beta.length];
1071            for (int i = 0; i < series.length; i++) {
1072                for (int j = 0; j < this.vorder.length; j++) {
1073                    if (this.vorder[j] == series[i]) {
1074                        betaNew[i] = beta[ j];
1075                        newIndices[i] = j;
1076                    }
1077                }
1078            }
1079            double[] covNew = new double[cov.length];
1080            int idx1 = 0;
1081            int idx2;
1082            int _i;
1083            int _j;
1084            for (int i = 0; i < beta.length; i++) {
1085                _i = newIndices[i];
1086                for (int j = 0; j <= i; j++, idx1++) {
1087                    _j = newIndices[j];
1088                    if (_i > _j) {
1089                        idx2 = _i * (_i + 1) / 2 + _j;
1090                    } else {
1091                        idx2 = _j * (_j + 1) / 2 + _i;
1092                    }
1093                    covNew[idx1] = cov[idx2];
1094                }
1095            }
1096            return new RegressionResults(
1097                    betaNew, new double[][]{covNew}, true, this.nobs, rnk,
1098                    this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
1099        }
1100    }
1101}