Source code

001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.math4.legacy.stat.regression;
018
019import java.util.Arrays;
020
021import org.apache.commons.math4.legacy.exception.util.LocalizedFormats;
022import org.apache.commons.math4.core.jdkmath.JdkMath;
023import org.apache.commons.numbers.core.Precision;
024
025/**
026 * This class is a concrete implementation of the {@link UpdatingMultipleLinearRegression} interface.
027 *
028 * <p>The algorithm is described in: <pre>
029 * Algorithm AS 274: Least Squares Routines to Supplement Those of Gentleman
030 * Author(s): Alan J. Miller
031 * Source: Journal of the Royal Statistical Society.
032 * Series C (Applied Statistics), Vol. 41, No. 2
033 * (1992), pp. 458-478
034 * Published by: Blackwell Publishing for the Royal Statistical Society
035 * Stable URL: http://www.jstor.org/stable/2347583 </pre>
036 *
037 * <p>This method for multiple regression forms the solution to the OLS problem
038 * by updating the QR decomposition as described by Gentleman.</p>
039 *
040 * @since 3.0
041 */
042public class MillerUpdatingRegression implements UpdatingMultipleLinearRegression {
043
044    /** number of variables in regression. */
045    private final int nvars;
046    /** diagonals of cross products matrix. */
047    private final double[] d;
048    /** the elements of the R`Y. */
049    private final double[] rhs;
050    /** the off diagonal portion of the R matrix. */
051    private final double[] r;
052    /** the tolerance for each of the variables. */
053    private final double[] tol;
054    /** residual sum of squares for all nested regressions. */
055    private final double[] rss;
056    /** order of the regressors. */
057    private final int[] vorder;
058    /** scratch space for tolerance calc. */
059    private final double[] workTolset;
060    /** number of observations entered. */
061    private long nobs;
062    /** sum of squared errors of largest regression. */
063    private double sserr;
064    /** has rss been called? */
065    private boolean rssSet;
066    /** has the tolerance setting method been called. */
067    private boolean tolSet;
068    /** flags for variables with linear dependency problems. */
069    private final boolean[] lindep;
070    /** singular x values. */
071    private final double[] xSing;
072    /** workspace for singularity method. */
073    private final double[] workSing;
074    /** summation of Y variable. */
075    private double sumy;
076    /** summation of squared Y values. */
077    private double sumsqy;
078    /** boolean flag whether a regression constant is added. */
079    private final boolean hasIntercept;
080    /** zero tolerance. */
081    private final double epsilon;
082    /**
083     *  Set the default constructor to private access.
084     *  to prevent inadvertent instantiation
085     */
086    @SuppressWarnings("unused")
087    private MillerUpdatingRegression() {
088        this(-1, false, Double.NaN);
089    }
090
091    /**
092     * This is the augmented constructor for the MillerUpdatingRegression class.
093     *
094     * @param numberOfVariables number of regressors to expect, not including constant
095     * @param includeConstant include a constant automatically
096     * @param errorTolerance  zero tolerance, how machine zero is determined
097     * @throws ModelSpecificationException if {@code numberOfVariables is less than 1}
098     */
099    public MillerUpdatingRegression(int numberOfVariables, boolean includeConstant, double errorTolerance)
100            throws ModelSpecificationException {
101        if (numberOfVariables < 1) {
102            throw new ModelSpecificationException(LocalizedFormats.NO_REGRESSORS);
103        }
104        if (includeConstant) {
105            this.nvars = numberOfVariables + 1;
106        } else {
107            this.nvars = numberOfVariables;
108        }
109        this.hasIntercept = includeConstant;
110        this.nobs = 0;
111        this.d = new double[this.nvars];
112        this.rhs = new double[this.nvars];
113        this.r = new double[this.nvars * (this.nvars - 1) / 2];
114        this.tol = new double[this.nvars];
115        this.rss = new double[this.nvars];
116        this.vorder = new int[this.nvars];
117        this.xSing = new double[this.nvars];
118        this.workSing = new double[this.nvars];
119        this.workTolset = new double[this.nvars];
120        this.lindep = new boolean[this.nvars];
121        for (int i = 0; i < this.nvars; i++) {
122            vorder[i] = i;
123        }
124        if (errorTolerance > 0) {
125            this.epsilon = errorTolerance;
126        } else {
127            this.epsilon = -errorTolerance;
128        }
129    }
130
131    /**
132     * Primary constructor for the MillerUpdatingRegression.
133     *
134     * @param numberOfVariables maximum number of potential regressors
135     * @param includeConstant include a constant automatically
136     * @throws ModelSpecificationException if {@code numberOfVariables is less than 1}
137     */
138    public MillerUpdatingRegression(int numberOfVariables, boolean includeConstant)
139            throws ModelSpecificationException {
140        this(numberOfVariables, includeConstant, Precision.EPSILON);
141    }
142
143    /**
144     * A getter method which determines whether a constant is included.
145     * @return true regression has an intercept, false no intercept
146     */
147    @Override
148    public boolean hasIntercept() {
149        return this.hasIntercept;
150    }
151
152    /**
153     * Gets the number of observations added to the regression model.
154     * @return number of observations
155     */
156    @Override
157    public long getN() {
158        return this.nobs;
159    }
160
161    /**
162     * Adds an observation to the regression model.
163     * @param x the array with regressor values
164     * @param y  the value of dependent variable given these regressors
165     * @exception ModelSpecificationException if the length of {@code x} does not equal
166     * the number of independent variables in the model
167     */
168    @Override
169    public void addObservation(final double[] x, final double y)
170            throws ModelSpecificationException {
171
172        if ((!this.hasIntercept && x.length != nvars) ||
173               (this.hasIntercept && x.length + 1 != nvars)) {
174            throw new ModelSpecificationException(LocalizedFormats.INVALID_REGRESSION_OBSERVATION,
175                    x.length, nvars);
176        }
177        if (!this.hasIntercept) {
178            include(Arrays.copyOf(x, x.length), 1.0, y);
179        } else {
180            final double[] tmp = new double[x.length + 1];
181            System.arraycopy(x, 0, tmp, 1, x.length);
182            tmp[0] = 1.0;
183            include(tmp, 1.0, y);
184        }
185        ++nobs;
186    }
187
188    /**
189     * Adds multiple observations to the model.
190     * @param x observations on the regressors
191     * @param y observations on the regressand
192     * @throws ModelSpecificationException if {@code x} is not rectangular, does not match
193     * the length of {@code y} or does not contain sufficient data to estimate the model
194     */
195    @Override
196    public void addObservations(double[][] x, double[] y) throws ModelSpecificationException {
197        if (x == null || y == null || x.length != y.length) {
198            throw new ModelSpecificationException(
199                  LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE,
200                  (x == null) ? 0 : x.length,
201                  (y == null) ? 0 : y.length);
202        }
203        if (x.length == 0) {  // Must be no y data either
204            throw new ModelSpecificationException(
205                    LocalizedFormats.NO_DATA);
206        }
207        if (x[0].length + 1 > x.length) {
208            throw new ModelSpecificationException(
209                  LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
210                  x.length, x[0].length);
211        }
212        for (int i = 0; i < x.length; i++) {
213            addObservation(x[i], y[i]);
214        }
215    }
216
217    /**
218     * The include method is where the QR decomposition occurs. This statement forms all
219     * intermediate data which will be used for all derivative measures.
220     * According to the miller paper, note that in the original implementation the x vector
221     * is overwritten. In this implementation, the include method is passed a copy of the
222     * original data vector so that there is no contamination of the data. Additionally,
223     * this method differs slightly from Gentleman's method, in that the assumption is
224     * of dense design matrices, there is some advantage in using the original gentleman algorithm
225     * on sparse matrices.
226     *
227     * @param x observations on the regressors
228     * @param wi weight of the this observation (-1,1)
229     * @param yi observation on the regressand
230     */
231    private void include(final double[] x, final double wi, final double yi) {
232        int nextr = 0;
233        double w = wi;
234        double y = yi;
235        double xi;
236        double di;
237        double wxi;
238        double dpi;
239        double xk;
240        double wPrev;
241        this.rssSet = false;
242        sumy = smartAdd(yi, sumy);
243        sumsqy = smartAdd(sumsqy, yi * yi);
244        for (int i = 0; i < x.length; i++) {
245            if (w == 0.0) {
246                return;
247            }
248            xi = x[i];
249
250            if (xi == 0.0) {
251                nextr += nvars - i - 1;
252                continue;
253            }
254            di = d[i];
255            wxi = w * xi;
256            wPrev = w;
257            if (di != 0.0) {
258                dpi = smartAdd(di, wxi * xi);
259                final double tmp = wxi * xi / di;
260                if (JdkMath.abs(tmp) > Precision.EPSILON) {
261                    w = (di * w) / dpi;
262                }
263            } else {
264                dpi = wxi * xi;
265                w = 0.0;
266            }
267            d[i] = dpi;
268            for (int k = i + 1; k < nvars; k++) {
269                xk = x[k];
270                x[k] = smartAdd(xk, -xi * r[nextr]);
271                if (di != 0.0) {
272                    r[nextr] = smartAdd(di * r[nextr], (wPrev * xi) * xk) / dpi;
273                } else {
274                    r[nextr] = xk / xi;
275                }
276                ++nextr;
277            }
278            xk = y;
279            y = smartAdd(xk, -xi * rhs[i]);
280            if (di != 0.0) {
281                rhs[i] = smartAdd(di * rhs[i], wxi * xk) / dpi;
282            } else {
283                rhs[i] = xk / xi;
284            }
285        }
286        sserr = smartAdd(sserr, w * y * y);
287    }
288
289    /**
290     * Adds to number a and b such that the contamination due to
291     * numerical smallness of one addend does not corrupt the sum.
292     * @param a - an addend
293     * @param b - an addend
294     * @return the sum of the a and b
295     */
296    private double smartAdd(double a, double b) {
297        final double aa = JdkMath.abs(a);
298        final double ba = JdkMath.abs(b);
299        if (aa > ba) {
300            final double eps = aa * Precision.EPSILON;
301            if (ba > eps) {
302                return a + b;
303            }
304            return a;
305        } else {
306            final double eps = ba * Precision.EPSILON;
307            if (aa > eps) {
308                return a + b;
309            }
310            return b;
311        }
312    }
313
314    /**
315     * As the name suggests,  clear wipes the internals and reorders everything in the
316     * canonical order.
317     */
318    @Override
319    public void clear() {
320        Arrays.fill(this.d, 0.0);
321        Arrays.fill(this.rhs, 0.0);
322        Arrays.fill(this.r, 0.0);
323        Arrays.fill(this.tol, 0.0);
324        Arrays.fill(this.rss, 0.0);
325        Arrays.fill(this.workTolset, 0.0);
326        Arrays.fill(this.workSing, 0.0);
327        Arrays.fill(this.xSing, 0.0);
328        Arrays.fill(this.lindep, false);
329        for (int i = 0; i < nvars; i++) {
330            this.vorder[i] = i;
331        }
332        this.nobs = 0;
333        this.sserr = 0.0;
334        this.sumy = 0.0;
335        this.sumsqy = 0.0;
336        this.rssSet = false;
337        this.tolSet = false;
338    }
339
340    /**
341     * This sets up tolerances for singularity testing.
342     */
343    private void tolset() {
344        int pos;
345        double total;
346        final double eps = this.epsilon;
347        for (int i = 0; i < nvars; i++) {
348            this.workTolset[i] = JdkMath.sqrt(d[i]);
349        }
350        tol[0] = eps * this.workTolset[0];
351        for (int col = 1; col < nvars; col++) {
352            pos = col - 1;
353            total = workTolset[col];
354            for (int row = 0; row < col; row++) {
355                total += JdkMath.abs(r[pos]) * workTolset[row];
356                pos += nvars - row - 2;
357            }
358            tol[col] = eps * total;
359        }
360        tolSet = true;
361    }
362
363    /**
364     * The regcf method conducts the linear regression and extracts the
365     * parameter vector. Notice that the algorithm can do subset regression
366     * with no alteration.
367     *
368     * @param nreq how many of the regressors to include (either in canonical
369     * order, or in the current reordered state)
370     * @return an array with the estimated slope coefficients
371     * @throws ModelSpecificationException if {@code nreq} is less than 1
372     * or greater than the number of independent variables
373     */
374    private double[] regcf(int nreq) throws ModelSpecificationException {
375        int nextr;
376        if (nreq < 1) {
377            throw new ModelSpecificationException(LocalizedFormats.NO_REGRESSORS);
378        }
379        if (nreq > this.nvars) {
380            throw new ModelSpecificationException(
381                    LocalizedFormats.TOO_MANY_REGRESSORS, nreq, this.nvars);
382        }
383        if (!this.tolSet) {
384            tolset();
385        }
386        final double[] ret = new double[nreq];
387        boolean rankProblem = false;
388        for (int i = nreq - 1; i > -1; i--) {
389            if (JdkMath.sqrt(d[i]) < tol[i]) {
390                ret[i] = 0.0;
391                d[i] = 0.0;
392                rankProblem = true;
393            } else {
394                ret[i] = rhs[i];
395                nextr = i * (nvars + nvars - i - 1) / 2;
396                for (int j = i + 1; j < nreq; j++) {
397                    ret[i] = smartAdd(ret[i], -r[nextr] * ret[j]);
398                    ++nextr;
399                }
400            }
401        }
402        if (rankProblem) {
403            for (int i = 0; i < nreq; i++) {
404                if (this.lindep[i]) {
405                    ret[i] = Double.NaN;
406                }
407            }
408        }
409        return ret;
410    }
411
412    /**
413     * The method which checks for singularities and then eliminates the offending
414     * columns.
415     */
416    private void singcheck() {
417        int pos;
418        for (int i = 0; i < nvars; i++) {
419            workSing[i] = JdkMath.sqrt(d[i]);
420        }
421        for (int col = 0; col < nvars; col++) {
422            // Set elements within R to zero if they are less than tol(col) in
423            // absolute value after being scaled by the square root of their row
424            // multiplier
425            final double temp = tol[col];
426            pos = col - 1;
427            for (int row = 0; row < col - 1; row++) {
428                if (JdkMath.abs(r[pos]) * workSing[row] < temp) {
429                    r[pos] = 0.0;
430                }
431                pos += nvars - row - 2;
432            }
433            // If diagonal element is near zero, set it to zero, set appropriate
434            // element of LINDEP, and use INCLUD to augment the projections in
435            // the lower rows of the orthogonalization.
436            lindep[col] = false;
437            if (workSing[col] < temp) {
438                lindep[col] = true;
439                if (col < nvars - 1) {
440                    Arrays.fill(xSing, 0.0);
441                    int pi = col * (nvars + nvars - col - 1) / 2;
442                    for (int xi = col + 1; xi < nvars; xi++, pi++) {
443                        xSing[xi] = r[pi];
444                        r[pi] = 0.0;
445                    }
446                    final double y = rhs[col];
447                    final double weight = d[col];
448                    d[col] = 0.0;
449                    rhs[col] = 0.0;
450                    this.include(xSing, weight, y);
451                } else {
452                    sserr += d[col] * rhs[col] * rhs[col];
453                }
454            }
455        }
456    }
457
458    /**
459     * Calculates the sum of squared errors for the full regression.
460     * and all subsets in the following manner: <pre>
461     * rss[] ={
462     * ResidualSumOfSquares_allNvars,
463     * ResidualSumOfSquares_FirstNvars-1,
464     * ResidualSumOfSquares_FirstNvars-2,
465     * ..., ResidualSumOfSquares_FirstVariable} </pre>
466     */
467    private void ss() {
468        double total = sserr;
469        rss[nvars - 1] = sserr;
470        for (int i = nvars - 1; i > 0; i--) {
471            total += d[i] * rhs[i] * rhs[i];
472            rss[i - 1] = total;
473        }
474        rssSet = true;
475    }
476
477    /**
478     * Calculates the cov matrix assuming only the first nreq variables are
479     * included in the calculation. The returned array contains a symmetric
480     * matrix stored in lower triangular form. The matrix will have
481     * ( nreq + 1 ) * nreq / 2 elements. For illustration <pre>
482     * cov =
483     * {
484     *  cov_00,
485     *  cov_10, cov_11,
486     *  cov_20, cov_21, cov22,
487     *  ...
488     * } </pre>
489     *
490     * @param nreq how many of the regressors to include (either in canonical
491     * order, or in the current reordered state)
492     * @return an array with the variance covariance of the included
493     * regressors in lower triangular form
494     */
495    private double[] cov(int nreq) {
496        if (this.nobs <= nreq) {
497            return null;
498        }
499        double rnk = 0.0;
500        for (int i = 0; i < nreq; i++) {
501            if (!this.lindep[i]) {
502                rnk += 1.0;
503            }
504        }
505        final double var = rss[nreq - 1] / (nobs - rnk);
506        final double[] rinv = new double[nreq * (nreq - 1) / 2];
507        inverse(rinv, nreq);
508        final double[] covmat = new double[nreq * (nreq + 1) / 2];
509        Arrays.fill(covmat, Double.NaN);
510        int pos2;
511        int pos1;
512        int start = 0;
513        double total = 0;
514        for (int row = 0; row < nreq; row++) {
515            pos2 = start;
516            if (!this.lindep[row]) {
517                for (int col = row; col < nreq; col++) {
518                    if (!this.lindep[col]) {
519                        pos1 = start + col - row;
520                        if (row == col) {
521                            total = 1.0 / d[col];
522                        } else {
523                            total = rinv[pos1 - 1] / d[col];
524                        }
525                        for (int k = col + 1; k < nreq; k++) {
526                            if (!this.lindep[k]) {
527                                total += rinv[pos1] * rinv[pos2] / d[k];
528                            }
529                            ++pos1;
530                            ++pos2;
531                        }
532                        covmat[ (col + 1) * col / 2 + row] = total * var;
533                    } else {
534                        pos2 += nreq - col - 1;
535                    }
536                }
537            }
538            start += nreq - row - 1;
539        }
540        return covmat;
541    }
542
543    /**
544     * This internal method calculates the inverse of the upper-triangular portion
545     * of the R matrix.
546     * @param rinv  the storage for the inverse of r
547     * @param nreq how many of the regressors to include (either in canonical
548     * order, or in the current reordered state)
549     */
550    private void inverse(double[] rinv, int nreq) {
551        int pos = nreq * (nreq - 1) / 2 - 1;
552        int pos1 = -1;
553        int pos2 = -1;
554        double total = 0.0;
555        Arrays.fill(rinv, Double.NaN);
556        for (int row = nreq - 1; row > 0; --row) {
557            if (!this.lindep[row]) {
558                final int start = (row - 1) * (nvars + nvars - row) / 2;
559                for (int col = nreq; col > row; --col) {
560                    pos1 = start;
561                    pos2 = pos;
562                    total = 0.0;
563                    for (int k = row; k < col - 1; k++) {
564                        pos2 += nreq - k - 1;
565                        if (!this.lindep[k]) {
566                            total += -r[pos1] * rinv[pos2];
567                        }
568                        ++pos1;
569                    }
570                    rinv[pos] = total - r[pos1];
571                    --pos;
572                }
573            } else {
574                pos -= nreq - row;
575            }
576        }
577    }
578
579    /**
580     * In the original algorithm only the partial correlations of the regressors
581     * is returned to the user. In this implementation, we have <pre>
582     * corr =
583     * {
584     *   corrxx - lower triangular
585     *   corrxy - bottom row of the matrix
586     * }
587     * Replaces subroutines PCORR and COR of:
588     * ALGORITHM AS274  APPL. STATIST. (1992) VOL.41, NO. 2 </pre>
589     *
590     * <p>Calculate partial correlations after the variables in rows
591     * 1, 2, ..., IN have been forced into the regression.
592     * If IN = 1, and the first row of R represents a constant in the
593     * model, then the usual simple correlations are returned.</p>
594     *
595     * <p>If IN = 0, the value returned in array CORMAT for the correlation
596     * of variables Xi &amp; Xj is: <pre>
597     * sum ( Xi.Xj ) / Sqrt ( sum (Xi^2) . sum (Xj^2) )</pre>
598     *
599     * <p>On return, array CORMAT contains the upper triangle of the matrix of
600     * partial correlations stored by rows, excluding the 1's on the diagonal.
601     * e.g. if IN = 2, the consecutive elements returned are:
602     * (3,4) (3,5) ... (3,ncol), (4,5) (4,6) ... (4,ncol), etc.
603     * Array YCORR stores the partial correlations with the Y-variable
604     * starting with YCORR(IN+1) = partial correlation with the variable in
605     * position (IN+1). </p>
606     *
607     * @param in how many of the regressors to include (either in canonical
608     * order, or in the current reordered state)
609     * @return an array with the partial correlations of the remainder of
610     * regressors with each other and the regressand, in lower triangular form
611     */
612    public double[] getPartialCorrelations(int in) {
613        final double[] output = new double[(nvars - in + 1) * (nvars - in) / 2];
614        int pos;
615        int pos1;
616        int pos2;
617        final int rmsOff = -in;
618        final int wrkOff = -(in + 1);
619        final double[] rms = new double[nvars - in];
620        final double[] work = new double[nvars - in - 1];
621        double sumxx;
622        double sumxy;
623        double sumyy;
624        final int offXX = (nvars - in) * (nvars - in - 1) / 2;
625        if (in < -1 || in >= nvars) {
626            return null;
627        }
628        final int nvm = nvars - 1;
629        final int basePos = r.length - (nvm - in) * (nvm - in + 1) / 2;
630        if (d[in] > 0.0) {
631            rms[in + rmsOff] = 1.0 / JdkMath.sqrt(d[in]);
632        }
633        for (int col = in + 1; col < nvars; col++) {
634            pos = basePos + col - 1 - in;
635            sumxx = d[col];
636            for (int row = in; row < col; row++) {
637                sumxx += d[row] * r[pos] * r[pos];
638                pos += nvars - row - 2;
639            }
640            if (sumxx > 0.0) {
641                rms[col + rmsOff] = 1.0 / JdkMath.sqrt(sumxx);
642            } else {
643                rms[col + rmsOff] = 0.0;
644            }
645        }
646        sumyy = sserr;
647        for (int row = in; row < nvars; row++) {
648            sumyy += d[row] * rhs[row] * rhs[row];
649        }
650        if (sumyy > 0.0) {
651            sumyy = 1.0 / JdkMath.sqrt(sumyy);
652        }
653        pos = 0;
654        for (int col1 = in; col1 < nvars; col1++) {
655            sumxy = 0.0;
656            Arrays.fill(work, 0.0);
657            pos1 = basePos + col1 - in - 1;
658            for (int row = in; row < col1; row++) {
659                pos2 = pos1 + 1;
660                for (int col2 = col1 + 1; col2 < nvars; col2++) {
661                    work[col2 + wrkOff] += d[row] * r[pos1] * r[pos2];
662                    pos2++;
663                }
664                sumxy += d[row] * r[pos1] * rhs[row];
665                pos1 += nvars - row - 2;
666            }
667            pos2 = pos1 + 1;
668            for (int col2 = col1 + 1; col2 < nvars; col2++) {
669                work[col2 + wrkOff] += d[col1] * r[pos2];
670                ++pos2;
671                output[ (col2 - 1 - in) * (col2 - in) / 2 + col1 - in] =
672                        work[col2 + wrkOff] * rms[col1 + rmsOff] * rms[col2 + rmsOff];
673                ++pos;
674            }
675            sumxy += d[col1] * rhs[col1];
676            output[col1 + rmsOff + offXX] = sumxy * rms[col1 + rmsOff] * sumyy;
677        }
678
679        return output;
680    }
681
682    /**
683     * ALGORITHM AS274 APPL. STATIST. (1992) VOL.41, NO. 2.
684     * Move variable from position FROM to position TO in an
685     * orthogonal reduction produced by AS75.1.
686     *
687     * @param from initial position
688     * @param to destination
689     */
690    private void vmove(int from, int to) {
691        double d1;
692        double d2;
693        double x;
694        double d1new;
695        double d2new;
696        double cbar;
697        double sbar;
698        double y;
699        int first;
700        int inc;
701        int m1;
702        int m2;
703        int mp1;
704        int pos;
705        boolean bSkipTo40 = false;
706        if (from == to) {
707            return;
708        }
709        if (!this.rssSet) {
710            ss();
711        }
712        int count = 0;
713        if (from < to) {
714            first = from;
715            inc = 1;
716            count = to - from;
717        } else {
718            first = from - 1;
719            inc = -1;
720            count = from - to;
721        }
722
723        int m = first;
724        int idx = 0;
725        while (idx < count) {
726            m1 = m * (nvars + nvars - m - 1) / 2;
727            m2 = m1 + nvars - m - 1;
728            mp1 = m + 1;
729
730            d1 = d[m];
731            d2 = d[mp1];
732            // Special cases.
733            if (d1 > this.epsilon || d2 > this.epsilon) {
734                x = r[m1];
735                if (JdkMath.abs(x) * JdkMath.sqrt(d1) < tol[mp1]) {
736                    x = 0.0;
737                }
738                if (d1 < this.epsilon || JdkMath.abs(x) < this.epsilon) {
739                    d[m] = d2;
740                    d[mp1] = d1;
741                    r[m1] = 0.0;
742                    for (int col = m + 2; col < nvars; col++) {
743                        ++m1;
744                        x = r[m1];
745                        r[m1] = r[m2];
746                        r[m2] = x;
747                        ++m2;
748                    }
749                    x = rhs[m];
750                    rhs[m] = rhs[mp1];
751                    rhs[mp1] = x;
752                    bSkipTo40 = true;
753                    //break;
754                } else if (d2 < this.epsilon) {
755                    d[m] = d1 * x * x;
756                    r[m1] = 1.0 / x;
757                    for (int i = m1 + 1; i < m1 + nvars - m - 1; i++) {
758                        r[i] /= x;
759                    }
760                    rhs[m] /= x;
761                    bSkipTo40 = true;
762                    //break;
763                }
764                if (!bSkipTo40) {
765                    d1new = d2 + d1 * x * x;
766                    cbar = d2 / d1new;
767                    sbar = x * d1 / d1new;
768                    d2new = d1 * cbar;
769                    d[m] = d1new;
770                    d[mp1] = d2new;
771                    r[m1] = sbar;
772                    for (int col = m + 2; col < nvars; col++) {
773                        ++m1;
774                        y = r[m1];
775                        r[m1] = cbar * r[m2] + sbar * y;
776                        r[m2] = y - x * r[m2];
777                        ++m2;
778                    }
779                    y = rhs[m];
780                    rhs[m] = cbar * rhs[mp1] + sbar * y;
781                    rhs[mp1] = y - x * rhs[mp1];
782                }
783            }
784            if (m > 0) {
785                pos = m;
786                for (int row = 0; row < m; row++) {
787                    x = r[pos];
788                    r[pos] = r[pos - 1];
789                    r[pos - 1] = x;
790                    pos += nvars - row - 2;
791                }
792            }
793            // Adjust variable order (VORDER), the tolerances (TOL) and
794            // the vector of residual sums of squares (RSS).
795            m1 = vorder[m];
796            vorder[m] = vorder[mp1];
797            vorder[mp1] = m1;
798            x = tol[m];
799            tol[m] = tol[mp1];
800            tol[mp1] = x;
801            rss[m] = rss[mp1] + d[mp1] * rhs[mp1] * rhs[mp1];
802
803            m += inc;
804            ++idx;
805        }
806    }
807
808    /**
809     * ALGORITHM AS274  APPL. STATIST. (1992) VOL.41, NO. 2
810     *
811     * <p> Re-order the variables in an orthogonal reduction produced by
812     * AS75.1 so that the N variables in LIST start at position POS1,
813     * though will not necessarily be in the same order as in LIST.
814     * Any variables in VORDER before position POS1 are not moved.
815     * Auxiliary routine called: VMOVE. </p>
816     *
817     * <p>This internal method reorders the regressors.</p>
818     *
819     * @param list the regressors to move
820     * @param pos1 where the list will be placed
821     * @return -1 error, 0 everything ok
822     */
823    private int reorderRegressors(int[] list, int pos1) {
824        int next;
825        int i;
826        int l;
827        if (list.length < 1 || list.length > nvars + 1 - pos1) {
828            return -1;
829        }
830        next = pos1;
831        i = pos1;
832        while (i < nvars) {
833            l = vorder[i];
834            for (int j = 0; j < list.length; j++) {
835                if (l == list[j] && i > next) {
836                    this.vmove(i, next);
837                    ++next;
838                    if (next >= list.length + pos1) {
839                        return 0;
840                    } else {
841                        break;
842                    }
843                }
844            }
845            ++i;
846        }
847        return 0;
848    }
849
850    /**
851     * Gets the diagonal of the Hat matrix also known as the leverage matrix.
852     *
853     * @param  rowData returns the diagonal of the hat matrix for this observation
854     * @return the diagonal element of the hatmatrix
855     */
856    public double getDiagonalOfHatMatrix(double[] rowData) {
857        double[] wk = new double[this.nvars];
858        int pos;
859        double total;
860
861        if (rowData.length > nvars) {
862            return Double.NaN;
863        }
864        double[] xrow;
865        if (this.hasIntercept) {
866            xrow = new double[rowData.length + 1];
867            xrow[0] = 1.0;
868            System.arraycopy(rowData, 0, xrow, 1, rowData.length);
869        } else {
870            xrow = rowData;
871        }
872        double hii = 0.0;
873        for (int col = 0; col < xrow.length; col++) {
874            if (JdkMath.sqrt(d[col]) < tol[col]) {
875                wk[col] = 0.0;
876            } else {
877                pos = col - 1;
878                total = xrow[col];
879                for (int row = 0; row < col; row++) {
880                    total = smartAdd(total, -wk[row] * r[pos]);
881                    pos += nvars - row - 2;
882                }
883                wk[col] = total;
884                hii = smartAdd(hii, (total * total) / d[col]);
885            }
886        }
887        return hii;
888    }
889
890    /**
891     * Gets the order of the regressors, useful if some type of reordering
892     * has been called. Calling regress with int[]{} args will trigger
893     * a reordering.
894     *
895     * @return int[] with the current order of the regressors
896     */
897    public int[] getOrderOfRegressors(){
898        return Arrays.copyOf(vorder, vorder.length);
899    }
900
901    /**
902     * Conducts a regression on the data in the model, using all regressors.
903     *
904     * @return RegressionResults the structure holding all regression results
905     * @exception  ModelSpecificationException - thrown if number of observations is
906     * less than the number of variables
907     */
908    @Override
909    public RegressionResults regress() throws ModelSpecificationException {
910        return regress(this.nvars);
911    }
912
913    /**
914     * Conducts a regression on the data in the model, using a subset of regressors.
915     *
916     * @param numberOfRegressors many of the regressors to include (either in canonical
917     * order, or in the current reordered state)
918     * @return RegressionResults the structure holding all regression results
919     * @exception  ModelSpecificationException - thrown if number of observations is
920     * less than the number of variables or number of regressors requested
921     * is greater than the regressors in the model
922     */
923    public RegressionResults regress(int numberOfRegressors) throws ModelSpecificationException {
924        if (this.nobs <= numberOfRegressors) {
925           throw new ModelSpecificationException(
926                   LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
927                   this.nobs, numberOfRegressors);
928        }
929        if( numberOfRegressors > this.nvars ){
930            throw new ModelSpecificationException(
931                    LocalizedFormats.TOO_MANY_REGRESSORS, numberOfRegressors, this.nvars);
932        }
933
934        tolset();
935        singcheck();
936
937        double[] beta = this.regcf(numberOfRegressors);
938
939        ss();
940
941        double[] cov = this.cov(numberOfRegressors);
942
943        int rnk = 0;
944        for (int i = 0; i < this.lindep.length; i++) {
945            if (!this.lindep[i]) {
946                ++rnk;
947            }
948        }
949
950        boolean needsReorder = false;
951        for (int i = 0; i < numberOfRegressors; i++) {
952            if (this.vorder[i] != i) {
953                needsReorder = true;
954                break;
955            }
956        }
957        if (!needsReorder) {
958            return new RegressionResults(
959                    beta, new double[][]{cov}, true, this.nobs, rnk,
960                    this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
961        } else {
962            double[] betaNew = new double[beta.length];
963            double[] covNew = new double[cov.length];
964
965            int[] newIndices = new int[beta.length];
966            for (int i = 0; i < nvars; i++) {
967                for (int j = 0; j < numberOfRegressors; j++) {
968                    if (this.vorder[j] == i) {
969                        betaNew[i] = beta[ j];
970                        newIndices[i] = j;
971                    }
972                }
973            }
974
975            int idx1 = 0;
976            int idx2;
977            int ii;
978            int jj;
979            for (int i = 0; i < beta.length; i++) {
980                ii = newIndices[i];
981                for (int j = 0; j <= i; j++, idx1++) {
982                    jj = newIndices[j];
983                    if (ii > jj) {
984                        idx2 = ii * (ii + 1) / 2 + jj;
985                    } else {
986                        idx2 = jj * (jj + 1) / 2 + ii;
987                    }
988                    covNew[idx1] = cov[idx2];
989                }
990            }
991            return new RegressionResults(
992                    betaNew, new double[][]{covNew}, true, this.nobs, rnk,
993                    this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
994        }
995    }
996
997    /**
998     * Conducts a regression on the data in the model, using regressors in array
999     * Calling this method will change the internal order of the regressors
1000     * and care is required in interpreting the hatmatrix.
1001     *
1002     * @param  variablesToInclude array of variables to include in regression
1003     * @return RegressionResults the structure holding all regression results
1004     * @exception  ModelSpecificationException - thrown if number of observations is
1005     * less than the number of variables, the number of regressors requested
1006     * is greater than the regressors in the model or a regressor index in
1007     * regressor array does not exist
1008     */
1009    @Override
1010    public RegressionResults regress(int[] variablesToInclude) throws ModelSpecificationException {
1011        if (variablesToInclude.length > this.nvars) {
1012            throw new ModelSpecificationException(
1013                    LocalizedFormats.TOO_MANY_REGRESSORS, variablesToInclude.length, this.nvars);
1014        }
1015        if (this.nobs <= this.nvars) {
1016            throw new ModelSpecificationException(
1017                    LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
1018                    this.nobs, this.nvars);
1019        }
1020        Arrays.sort(variablesToInclude);
1021        int iExclude = 0;
1022        for (int i = 0; i < variablesToInclude.length; i++) {
1023            if (i >= this.nvars) {
1024                throw new ModelSpecificationException(
1025                        LocalizedFormats.INDEX_LARGER_THAN_MAX, i, this.nvars);
1026            }
1027            if (i > 0 && variablesToInclude[i] == variablesToInclude[i - 1]) {
1028                variablesToInclude[i] = -1;
1029                ++iExclude;
1030            }
1031        }
1032        int[] series;
1033        if (iExclude > 0) {
1034            int j = 0;
1035            series = new int[variablesToInclude.length - iExclude];
1036            for (int i = 0; i < variablesToInclude.length; i++) {
1037                if (variablesToInclude[i] > -1) {
1038                    series[j] = variablesToInclude[i];
1039                    ++j;
1040                }
1041            }
1042        } else {
1043            series = variablesToInclude;
1044        }
1045
1046        reorderRegressors(series, 0);
1047        tolset();
1048        singcheck();
1049
1050        double[] beta = this.regcf(series.length);
1051
1052        ss();
1053
1054        double[] cov = this.cov(series.length);
1055
1056        int rnk = 0;
1057        for (int i = 0; i < this.lindep.length; i++) {
1058            if (!this.lindep[i]) {
1059                ++rnk;
1060            }
1061        }
1062
1063        boolean needsReorder = false;
1064        for (int i = 0; i < series.length; i++) {
1065            if (this.vorder[i] != series[i]) {
1066                needsReorder = true;
1067                break;
1068            }
1069        }
1070        if (!needsReorder) {
1071            return new RegressionResults(
1072                    beta, new double[][]{cov}, true, this.nobs, rnk,
1073                    this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
1074        } else {
1075            double[] betaNew = new double[beta.length];
1076            int[] newIndices = new int[beta.length];
1077            for (int i = 0; i < series.length; i++) {
1078                for (int j = 0; j < this.vorder.length; j++) {
1079                    if (this.vorder[j] == series[i]) {
1080                        betaNew[i] = beta[ j];
1081                        newIndices[i] = j;
1082                    }
1083                }
1084            }
1085            double[] covNew = new double[cov.length];
1086            int idx1 = 0;
1087            int idx2;
1088            int ii;
1089            int jj;
1090            for (int i = 0; i < beta.length; i++) {
1091                ii = newIndices[i];
1092                for (int j = 0; j <= i; j++, idx1++) {
1093                    jj = newIndices[j];
1094                    if (ii > jj) {
1095                        idx2 = ii * (ii + 1) / 2 + jj;
1096                    } else {
1097                        idx2 = jj * (jj + 1) / 2 + ii;
1098                    }
1099                    covNew[idx1] = cov[idx2];
1100                }
1101            }
1102            return new RegressionResults(
1103                    betaNew, new double[][]{covNew}, true, this.nobs, rnk,
1104                    this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
1105        }
1106    }
1107}