View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.math3.stat.regression;
18  
19  import java.util.Arrays;
20  import org.apache.commons.math3.exception.util.LocalizedFormats;
21  import org.apache.commons.math3.util.FastMath;
22  import org.apache.commons.math3.util.Precision;
23  import org.apache.commons.math3.util.MathArrays;
24  
25  /**
26   * This class is a concrete implementation of the {@link UpdatingMultipleLinearRegression} interface.
27   *
28   * <p>The algorithm is described in: <pre>
29   * Algorithm AS 274: Least Squares Routines to Supplement Those of Gentleman
30   * Author(s): Alan J. Miller
31   * Source: Journal of the Royal Statistical Society.
32   * Series C (Applied Statistics), Vol. 41, No. 2
33   * (1992), pp. 458-478
34   * Published by: Blackwell Publishing for the Royal Statistical Society
35   * Stable URL: http://www.jstor.org/stable/2347583 </pre></p>
36   *
37   * <p>This method for multiple regression forms the solution to the OLS problem
38   * by updating the QR decomposition as described by Gentleman.</p>
39   *
40   * @version $Id: MillerUpdatingRegression.java 1547633 2013-12-03 23:03:06Z tn $
41   * @since 3.0
42   */
43  public class MillerUpdatingRegression implements UpdatingMultipleLinearRegression {
44  
45      /** number of variables in regression */
46      private final int nvars;
47      /** diagonals of cross products matrix */
48      private final double[] d;
49      /** the elements of the R`Y */
50      private final double[] rhs;
51      /** the off diagonal portion of the R matrix */
52      private final double[] r;
53      /** the tolerance for each of the variables */
54      private final double[] tol;
55      /** residual sum of squares for all nested regressions */
56      private final double[] rss;
57      /** order of the regressors */
58      private final int[] vorder;
59      /** scratch space for tolerance calc */
60      private final double[] work_tolset;
61      /** number of observations entered */
62      private long nobs = 0;
63      /** sum of squared errors of largest regression */
64      private double sserr = 0.0;
65      /** has rss been called? */
66      private boolean rss_set = false;
67      /** has the tolerance setting method been called */
68      private boolean tol_set = false;
69      /** flags for variables with linear dependency problems */
70      private final boolean[] lindep;
71      /** singular x values */
72      private final double[] x_sing;
73      /** workspace for singularity method */
74      private final double[] work_sing;
75      /** summation of Y variable */
76      private double sumy = 0.0;
77      /** summation of squared Y values */
78      private double sumsqy = 0.0;
79      /** boolean flag whether a regression constant is added */
80      private boolean hasIntercept;
81      /** zero tolerance */
82      private final double epsilon;
83      /**
84       *  Set the default constructor to private access
85       *  to prevent inadvertent instantiation
86       */
87      @SuppressWarnings("unused")
88      private MillerUpdatingRegression() {
89          this(-1, false, Double.NaN);
90      }
91  
92      /**
93       * This is the augmented constructor for the MillerUpdatingRegression class.
94       *
95       * @param numberOfVariables number of regressors to expect, not including constant
96       * @param includeConstant include a constant automatically
97       * @param errorTolerance  zero tolerance, how machine zero is determined
98       * @throws ModelSpecificationException if {@code numberOfVariables is less than 1}
99       */
100     public MillerUpdatingRegression(int numberOfVariables, boolean includeConstant, double errorTolerance)
101     throws ModelSpecificationException {
102         if (numberOfVariables < 1) {
103             throw new ModelSpecificationException(LocalizedFormats.NO_REGRESSORS);
104         }
105         if (includeConstant) {
106             this.nvars = numberOfVariables + 1;
107         } else {
108             this.nvars = numberOfVariables;
109         }
110         this.hasIntercept = includeConstant;
111         this.nobs = 0;
112         this.d = new double[this.nvars];
113         this.rhs = new double[this.nvars];
114         this.r = new double[this.nvars * (this.nvars - 1) / 2];
115         this.tol = new double[this.nvars];
116         this.rss = new double[this.nvars];
117         this.vorder = new int[this.nvars];
118         this.x_sing = new double[this.nvars];
119         this.work_sing = new double[this.nvars];
120         this.work_tolset = new double[this.nvars];
121         this.lindep = new boolean[this.nvars];
122         for (int i = 0; i < this.nvars; i++) {
123             vorder[i] = i;
124         }
125         if (errorTolerance > 0) {
126             this.epsilon = errorTolerance;
127         } else {
128             this.epsilon = -errorTolerance;
129         }
130     }
131 
132     /**
133      * Primary constructor for the MillerUpdatingRegression.
134      *
135      * @param numberOfVariables maximum number of potential regressors
136      * @param includeConstant include a constant automatically
137      * @throws ModelSpecificationException if {@code numberOfVariables is less than 1}
138      */
139     public MillerUpdatingRegression(int numberOfVariables, boolean includeConstant)
140     throws ModelSpecificationException {
141         this(numberOfVariables, includeConstant, Precision.EPSILON);
142     }
143 
144     /**
145      * A getter method which determines whether a constant is included.
146      * @return true regression has an intercept, false no intercept
147      */
148     public boolean hasIntercept() {
149         return this.hasIntercept;
150     }
151 
152     /**
153      * Gets the number of observations added to the regression model.
154      * @return number of observations
155      */
156     public long getN() {
157         return this.nobs;
158     }
159 
160     /**
161      * Adds an observation to the regression model.
162      * @param x the array with regressor values
163      * @param y  the value of dependent variable given these regressors
164      * @exception ModelSpecificationException if the length of {@code x} does not equal
165      * the number of independent variables in the model
166      */
167     public void addObservation(final double[] x, final double y)
168     throws ModelSpecificationException {
169 
170         if ((!this.hasIntercept && x.length != nvars) ||
171                (this.hasIntercept && x.length + 1 != nvars)) {
172             throw new ModelSpecificationException(LocalizedFormats.INVALID_REGRESSION_OBSERVATION,
173                     x.length, nvars);
174         }
175         if (!this.hasIntercept) {
176             include(MathArrays.copyOf(x, x.length), 1.0, y);
177         } else {
178             final double[] tmp = new double[x.length + 1];
179             System.arraycopy(x, 0, tmp, 1, x.length);
180             tmp[0] = 1.0;
181             include(tmp, 1.0, y);
182         }
183         ++nobs;
184 
185     }
186 
187     /**
188      * Adds multiple observations to the model.
189      * @param x observations on the regressors
190      * @param y observations on the regressand
191      * @throws ModelSpecificationException if {@code x} is not rectangular, does not match
192      * the length of {@code y} or does not contain sufficient data to estimate the model
193      */
194     public void addObservations(double[][] x, double[] y) throws ModelSpecificationException {
195         if ((x == null) || (y == null) || (x.length != y.length)) {
196             throw new ModelSpecificationException(
197                   LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE,
198                   (x == null) ? 0 : x.length,
199                   (y == null) ? 0 : y.length);
200         }
201         if (x.length == 0) {  // Must be no y data either
202             throw new ModelSpecificationException(
203                     LocalizedFormats.NO_DATA);
204         }
205         if (x[0].length + 1 > x.length) {
206             throw new ModelSpecificationException(
207                   LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
208                   x.length, x[0].length);
209         }
210         for (int i = 0; i < x.length; i++) {
211             addObservation(x[i], y[i]);
212         }
213     }
214 
215     /**
216      * The include method is where the QR decomposition occurs. This statement forms all
217      * intermediate data which will be used for all derivative measures.
218      * According to the miller paper, note that in the original implementation the x vector
219      * is overwritten. In this implementation, the include method is passed a copy of the
220      * original data vector so that there is no contamination of the data. Additionally,
221      * this method differs slightly from Gentleman's method, in that the assumption is
222      * of dense design matrices, there is some advantage in using the original gentleman algorithm
223      * on sparse matrices.
224      *
225      * @param x observations on the regressors
226      * @param wi weight of the this observation (-1,1)
227      * @param yi observation on the regressand
228      */
229     private void include(final double[] x, final double wi, final double yi) {
230         int nextr = 0;
231         double w = wi;
232         double y = yi;
233         double xi;
234         double di;
235         double wxi;
236         double dpi;
237         double xk;
238         double _w;
239         this.rss_set = false;
240         sumy = smartAdd(yi, sumy);
241         sumsqy = smartAdd(sumsqy, yi * yi);
242         for (int i = 0; i < x.length; i++) {
243             if (w == 0.0) {
244                 return;
245             }
246             xi = x[i];
247 
248             if (xi == 0.0) {
249                 nextr += nvars - i - 1;
250                 continue;
251             }
252             di = d[i];
253             wxi = w * xi;
254             _w = w;
255             if (di != 0.0) {
256                 dpi = smartAdd(di, wxi * xi);
257                 final double tmp = wxi * xi / di;
258                 if (FastMath.abs(tmp) > Precision.EPSILON) {
259                     w = (di * w) / dpi;
260                 }
261             } else {
262                 dpi = wxi * xi;
263                 w = 0.0;
264             }
265             d[i] = dpi;
266             for (int k = i + 1; k < nvars; k++) {
267                 xk = x[k];
268                 x[k] = smartAdd(xk, -xi * r[nextr]);
269                 if (di != 0.0) {
270                     r[nextr] = smartAdd(di * r[nextr], (_w * xi) * xk) / dpi;
271                 } else {
272                     r[nextr] = xk / xi;
273                 }
274                 ++nextr;
275             }
276             xk = y;
277             y = smartAdd(xk, -xi * rhs[i]);
278             if (di != 0.0) {
279                 rhs[i] = smartAdd(di * rhs[i], wxi * xk) / dpi;
280             } else {
281                 rhs[i] = xk / xi;
282             }
283         }
284         sserr = smartAdd(sserr, w * y * y);
285     }
286 
287     /**
288      * Adds to number a and b such that the contamination due to
289      * numerical smallness of one addend does not corrupt the sum.
290      * @param a - an addend
291      * @param b - an addend
292      * @return the sum of the a and b
293      */
294     private double smartAdd(double a, double b) {
295         final double _a = FastMath.abs(a);
296         final double _b = FastMath.abs(b);
297         if (_a > _b) {
298             final double eps = _a * Precision.EPSILON;
299             if (_b > eps) {
300                 return a + b;
301             }
302             return a;
303         } else {
304             final double eps = _b * Precision.EPSILON;
305             if (_a > eps) {
306                 return a + b;
307             }
308             return b;
309         }
310     }
311 
312     /**
313      * As the name suggests,  clear wipes the internals and reorders everything in the
314      * canonical order.
315      */
316     public void clear() {
317         Arrays.fill(this.d, 0.0);
318         Arrays.fill(this.rhs, 0.0);
319         Arrays.fill(this.r, 0.0);
320         Arrays.fill(this.tol, 0.0);
321         Arrays.fill(this.rss, 0.0);
322         Arrays.fill(this.work_tolset, 0.0);
323         Arrays.fill(this.work_sing, 0.0);
324         Arrays.fill(this.x_sing, 0.0);
325         Arrays.fill(this.lindep, false);
326         for (int i = 0; i < nvars; i++) {
327             this.vorder[i] = i;
328         }
329         this.nobs = 0;
330         this.sserr = 0.0;
331         this.sumy = 0.0;
332         this.sumsqy = 0.0;
333         this.rss_set = false;
334         this.tol_set = false;
335     }
336 
337     /**
338      * This sets up tolerances for singularity testing.
339      */
340     private void tolset() {
341         int pos;
342         double total;
343         final double eps = this.epsilon;
344         for (int i = 0; i < nvars; i++) {
345             this.work_tolset[i] = FastMath.sqrt(d[i]);
346         }
347         tol[0] = eps * this.work_tolset[0];
348         for (int col = 1; col < nvars; col++) {
349             pos = col - 1;
350             total = work_tolset[col];
351             for (int row = 0; row < col; row++) {
352                 total += FastMath.abs(r[pos]) * work_tolset[row];
353                 pos += nvars - row - 2;
354             }
355             tol[col] = eps * total;
356         }
357         tol_set = true;
358     }
359 
360     /**
361      * The regcf method conducts the linear regression and extracts the
362      * parameter vector. Notice that the algorithm can do subset regression
363      * with no alteration.
364      *
365      * @param nreq how many of the regressors to include (either in canonical
366      * order, or in the current reordered state)
367      * @return an array with the estimated slope coefficients
368      * @throws ModelSpecificationException if {@code nreq} is less than 1
369      * or greater than the number of independent variables
370      */
371     private double[] regcf(int nreq) throws ModelSpecificationException {
372         int nextr;
373         if (nreq < 1) {
374             throw new ModelSpecificationException(LocalizedFormats.NO_REGRESSORS);
375         }
376         if (nreq > this.nvars) {
377             throw new ModelSpecificationException(
378                     LocalizedFormats.TOO_MANY_REGRESSORS, nreq, this.nvars);
379         }
380         if (!this.tol_set) {
381             tolset();
382         }
383         final double[] ret = new double[nreq];
384         boolean rankProblem = false;
385         for (int i = nreq - 1; i > -1; i--) {
386             if (FastMath.sqrt(d[i]) < tol[i]) {
387                 ret[i] = 0.0;
388                 d[i] = 0.0;
389                 rankProblem = true;
390             } else {
391                 ret[i] = rhs[i];
392                 nextr = i * (nvars + nvars - i - 1) / 2;
393                 for (int j = i + 1; j < nreq; j++) {
394                     ret[i] = smartAdd(ret[i], -r[nextr] * ret[j]);
395                     ++nextr;
396                 }
397             }
398         }
399         if (rankProblem) {
400             for (int i = 0; i < nreq; i++) {
401                 if (this.lindep[i]) {
402                     ret[i] = Double.NaN;
403                 }
404             }
405         }
406         return ret;
407     }
408 
409     /**
410      * The method which checks for singularities and then eliminates the offending
411      * columns.
412      */
413     private void singcheck() {
414         int pos;
415         for (int i = 0; i < nvars; i++) {
416             work_sing[i] = FastMath.sqrt(d[i]);
417         }
418         for (int col = 0; col < nvars; col++) {
419             // Set elements within R to zero if they are less than tol(col) in
420             // absolute value after being scaled by the square root of their row
421             // multiplier
422             final double temp = tol[col];
423             pos = col - 1;
424             for (int row = 0; row < col - 1; row++) {
425                 if (FastMath.abs(r[pos]) * work_sing[row] < temp) {
426                     r[pos] = 0.0;
427                 }
428                 pos += nvars - row - 2;
429             }
430             // If diagonal element is near zero, set it to zero, set appropriate
431             // element of LINDEP, and use INCLUD to augment the projections in
432             // the lower rows of the orthogonalization.
433             lindep[col] = false;
434             if (work_sing[col] < temp) {
435                 lindep[col] = true;
436                 if (col < nvars - 1) {
437                     Arrays.fill(x_sing, 0.0);
438                     int _pi = col * (nvars + nvars - col - 1) / 2;
439                     for (int _xi = col + 1; _xi < nvars; _xi++, _pi++) {
440                         x_sing[_xi] = r[_pi];
441                         r[_pi] = 0.0;
442                     }
443                     final double y = rhs[col];
444                     final double weight = d[col];
445                     d[col] = 0.0;
446                     rhs[col] = 0.0;
447                     this.include(x_sing, weight, y);
448                 } else {
449                     sserr += d[col] * rhs[col] * rhs[col];
450                 }
451             }
452         }
453     }
454 
455     /**
456      * Calculates the sum of squared errors for the full regression
457      * and all subsets in the following manner: <pre>
458      * rss[] ={
459      * ResidualSumOfSquares_allNvars,
460      * ResidualSumOfSquares_FirstNvars-1,
461      * ResidualSumOfSquares_FirstNvars-2,
462      * ..., ResidualSumOfSquares_FirstVariable} </pre>
463      */
464     private void ss() {
465         double total = sserr;
466         rss[nvars - 1] = sserr;
467         for (int i = nvars - 1; i > 0; i--) {
468             total += d[i] * rhs[i] * rhs[i];
469             rss[i - 1] = total;
470         }
471         rss_set = true;
472     }
473 
474     /**
475      * Calculates the cov matrix assuming only the first nreq variables are
476      * included in the calculation. The returned array contains a symmetric
477      * matrix stored in lower triangular form. The matrix will have
478      * ( nreq + 1 ) * nreq / 2 elements. For illustration <pre>
479      * cov =
480      * {
481      *  cov_00,
482      *  cov_10, cov_11,
483      *  cov_20, cov_21, cov22,
484      *  ...
485      * } </pre>
486      *
487      * @param nreq how many of the regressors to include (either in canonical
488      * order, or in the current reordered state)
489      * @return an array with the variance covariance of the included
490      * regressors in lower triangular form
491      */
492     private double[] cov(int nreq) {
493         if (this.nobs <= nreq) {
494             return null;
495         }
496         double rnk = 0.0;
497         for (int i = 0; i < nreq; i++) {
498             if (!this.lindep[i]) {
499                 rnk += 1.0;
500             }
501         }
502         final double var = rss[nreq - 1] / (nobs - rnk);
503         final double[] rinv = new double[nreq * (nreq - 1) / 2];
504         inverse(rinv, nreq);
505         final double[] covmat = new double[nreq * (nreq + 1) / 2];
506         Arrays.fill(covmat, Double.NaN);
507         int pos2;
508         int pos1;
509         int start = 0;
510         double total = 0;
511         for (int row = 0; row < nreq; row++) {
512             pos2 = start;
513             if (!this.lindep[row]) {
514                 for (int col = row; col < nreq; col++) {
515                     if (!this.lindep[col]) {
516                         pos1 = start + col - row;
517                         if (row == col) {
518                             total = 1.0 / d[col];
519                         } else {
520                             total = rinv[pos1 - 1] / d[col];
521                         }
522                         for (int k = col + 1; k < nreq; k++) {
523                             if (!this.lindep[k]) {
524                                 total += rinv[pos1] * rinv[pos2] / d[k];
525                             }
526                             ++pos1;
527                             ++pos2;
528                         }
529                         covmat[ (col + 1) * col / 2 + row] = total * var;
530                     } else {
531                         pos2 += nreq - col - 1;
532                     }
533                 }
534             }
535             start += nreq - row - 1;
536         }
537         return covmat;
538     }
539 
540     /**
541      * This internal method calculates the inverse of the upper-triangular portion
542      * of the R matrix.
543      * @param rinv  the storage for the inverse of r
544      * @param nreq how many of the regressors to include (either in canonical
545      * order, or in the current reordered state)
546      */
547     private void inverse(double[] rinv, int nreq) {
548         int pos = nreq * (nreq - 1) / 2 - 1;
549         int pos1 = -1;
550         int pos2 = -1;
551         double total = 0.0;
552         Arrays.fill(rinv, Double.NaN);
553         for (int row = nreq - 1; row > 0; --row) {
554             if (!this.lindep[row]) {
555                 final int start = (row - 1) * (nvars + nvars - row) / 2;
556                 for (int col = nreq; col > row; --col) {
557                     pos1 = start;
558                     pos2 = pos;
559                     total = 0.0;
560                     for (int k = row; k < col - 1; k++) {
561                         pos2 += nreq - k - 1;
562                         if (!this.lindep[k]) {
563                             total += -r[pos1] * rinv[pos2];
564                         }
565                         ++pos1;
566                     }
567                     rinv[pos] = total - r[pos1];
568                     --pos;
569                 }
570             } else {
571                 pos -= nreq - row;
572             }
573         }
574     }
575 
576     /**
577      * In the original algorithm only the partial correlations of the regressors
578      * is returned to the user. In this implementation, we have <pre>
579      * corr =
580      * {
581      *   corrxx - lower triangular
582      *   corrxy - bottom row of the matrix
583      * }
584      * Replaces subroutines PCORR and COR of:
585      * ALGORITHM AS274  APPL. STATIST. (1992) VOL.41, NO. 2 </pre>
586      *
587      * <p>Calculate partial correlations after the variables in rows
588      * 1, 2, ..., IN have been forced into the regression.
589      * If IN = 1, and the first row of R represents a constant in the
590      * model, then the usual simple correlations are returned.</p>
591      *
592      * <p>If IN = 0, the value returned in array CORMAT for the correlation
593      * of variables Xi & Xj is: <pre>
594      * sum ( Xi.Xj ) / Sqrt ( sum (Xi^2) . sum (Xj^2) )</pre></p>
595      *
596      * <p>On return, array CORMAT contains the upper triangle of the matrix of
597      * partial correlations stored by rows, excluding the 1's on the diagonal.
598      * e.g. if IN = 2, the consecutive elements returned are:
599      * (3,4) (3,5) ... (3,ncol), (4,5) (4,6) ... (4,ncol), etc.
600      * Array YCORR stores the partial correlations with the Y-variable
601      * starting with YCORR(IN+1) = partial correlation with the variable in
602      * position (IN+1). </p>
603      *
604      * @param in how many of the regressors to include (either in canonical
605      * order, or in the current reordered state)
606      * @return an array with the partial correlations of the remainder of
607      * regressors with each other and the regressand, in lower triangular form
608      */
609     public double[] getPartialCorrelations(int in) {
610         final double[] output = new double[(nvars - in + 1) * (nvars - in) / 2];
611         int pos;
612         int pos1;
613         int pos2;
614         final int rms_off = -in;
615         final int wrk_off = -(in + 1);
616         final double[] rms = new double[nvars - in];
617         final double[] work = new double[nvars - in - 1];
618         double sumxx;
619         double sumxy;
620         double sumyy;
621         final int offXX = (nvars - in) * (nvars - in - 1) / 2;
622         if (in < -1 || in >= nvars) {
623             return null;
624         }
625         final int nvm = nvars - 1;
626         final int base_pos = r.length - (nvm - in) * (nvm - in + 1) / 2;
627         if (d[in] > 0.0) {
628             rms[in + rms_off] = 1.0 / FastMath.sqrt(d[in]);
629         }
630         for (int col = in + 1; col < nvars; col++) {
631             pos = base_pos + col - 1 - in;
632             sumxx = d[col];
633             for (int row = in; row < col; row++) {
634                 sumxx += d[row] * r[pos] * r[pos];
635                 pos += nvars - row - 2;
636             }
637             if (sumxx > 0.0) {
638                 rms[col + rms_off] = 1.0 / FastMath.sqrt(sumxx);
639             } else {
640                 rms[col + rms_off] = 0.0;
641             }
642         }
643         sumyy = sserr;
644         for (int row = in; row < nvars; row++) {
645             sumyy += d[row] * rhs[row] * rhs[row];
646         }
647         if (sumyy > 0.0) {
648             sumyy = 1.0 / FastMath.sqrt(sumyy);
649         }
650         pos = 0;
651         for (int col1 = in; col1 < nvars; col1++) {
652             sumxy = 0.0;
653             Arrays.fill(work, 0.0);
654             pos1 = base_pos + col1 - in - 1;
655             for (int row = in; row < col1; row++) {
656                 pos2 = pos1 + 1;
657                 for (int col2 = col1 + 1; col2 < nvars; col2++) {
658                     work[col2 + wrk_off] += d[row] * r[pos1] * r[pos2];
659                     pos2++;
660                 }
661                 sumxy += d[row] * r[pos1] * rhs[row];
662                 pos1 += nvars - row - 2;
663             }
664             pos2 = pos1 + 1;
665             for (int col2 = col1 + 1; col2 < nvars; col2++) {
666                 work[col2 + wrk_off] += d[col1] * r[pos2];
667                 ++pos2;
668                 output[ (col2 - 1 - in) * (col2 - in) / 2 + col1 - in] =
669                         work[col2 + wrk_off] * rms[col1 + rms_off] * rms[col2 + rms_off];
670                 ++pos;
671             }
672             sumxy += d[col1] * rhs[col1];
673             output[col1 + rms_off + offXX] = sumxy * rms[col1 + rms_off] * sumyy;
674         }
675 
676         return output;
677     }
678 
679     /**
680      * ALGORITHM AS274 APPL. STATIST. (1992) VOL.41, NO. 2.
681      * Move variable from position FROM to position TO in an
682      * orthogonal reduction produced by AS75.1.
683      *
684      * @param from initial position
685      * @param to destination
686      */
687     private void vmove(int from, int to) {
688         double d1;
689         double d2;
690         double X;
691         double d1new;
692         double d2new;
693         double cbar;
694         double sbar;
695         double Y;
696         int first;
697         int inc;
698         int m1;
699         int m2;
700         int mp1;
701         int pos;
702         boolean bSkipTo40 = false;
703         if (from == to) {
704             return;
705         }
706         if (!this.rss_set) {
707             ss();
708         }
709         int count = 0;
710         if (from < to) {
711             first = from;
712             inc = 1;
713             count = to - from;
714         } else {
715             first = from - 1;
716             inc = -1;
717             count = from - to;
718         }
719 
720         int m = first;
721         int idx = 0;
722         while (idx < count) {
723             m1 = m * (nvars + nvars - m - 1) / 2;
724             m2 = m1 + nvars - m - 1;
725             mp1 = m + 1;
726 
727             d1 = d[m];
728             d2 = d[mp1];
729             // Special cases.
730             if (d1 > this.epsilon || d2 > this.epsilon) {
731                 X = r[m1];
732                 if (FastMath.abs(X) * FastMath.sqrt(d1) < tol[mp1]) {
733                     X = 0.0;
734                 }
735                 if (d1 < this.epsilon || FastMath.abs(X) < this.epsilon) {
736                     d[m] = d2;
737                     d[mp1] = d1;
738                     r[m1] = 0.0;
739                     for (int col = m + 2; col < nvars; col++) {
740                         ++m1;
741                         X = r[m1];
742                         r[m1] = r[m2];
743                         r[m2] = X;
744                         ++m2;
745                     }
746                     X = rhs[m];
747                     rhs[m] = rhs[mp1];
748                     rhs[mp1] = X;
749                     bSkipTo40 = true;
750                     //break;
751                 } else if (d2 < this.epsilon) {
752                     d[m] = d1 * X * X;
753                     r[m1] = 1.0 / X;
754                     for (int _i = m1 + 1; _i < m1 + nvars - m - 1; _i++) {
755                         r[_i] /= X;
756                     }
757                     rhs[m] /= X;
758                     bSkipTo40 = true;
759                     //break;
760                 }
761                 if (!bSkipTo40) {
762                     d1new = d2 + d1 * X * X;
763                     cbar = d2 / d1new;
764                     sbar = X * d1 / d1new;
765                     d2new = d1 * cbar;
766                     d[m] = d1new;
767                     d[mp1] = d2new;
768                     r[m1] = sbar;
769                     for (int col = m + 2; col < nvars; col++) {
770                         ++m1;
771                         Y = r[m1];
772                         r[m1] = cbar * r[m2] + sbar * Y;
773                         r[m2] = Y - X * r[m2];
774                         ++m2;
775                     }
776                     Y = rhs[m];
777                     rhs[m] = cbar * rhs[mp1] + sbar * Y;
778                     rhs[mp1] = Y - X * rhs[mp1];
779                 }
780             }
781             if (m > 0) {
782                 pos = m;
783                 for (int row = 0; row < m; row++) {
784                     X = r[pos];
785                     r[pos] = r[pos - 1];
786                     r[pos - 1] = X;
787                     pos += nvars - row - 2;
788                 }
789             }
790             // Adjust variable order (VORDER), the tolerances (TOL) and
791             // the vector of residual sums of squares (RSS).
792             m1 = vorder[m];
793             vorder[m] = vorder[mp1];
794             vorder[mp1] = m1;
795             X = tol[m];
796             tol[m] = tol[mp1];
797             tol[mp1] = X;
798             rss[m] = rss[mp1] + d[mp1] * rhs[mp1] * rhs[mp1];
799 
800             m += inc;
801             ++idx;
802         }
803     }
804 
805     /**
806      * ALGORITHM AS274  APPL. STATIST. (1992) VOL.41, NO. 2
807      *
808      * <p> Re-order the variables in an orthogonal reduction produced by
809      * AS75.1 so that the N variables in LIST start at position POS1,
810      * though will not necessarily be in the same order as in LIST.
811      * Any variables in VORDER before position POS1 are not moved.
812      * Auxiliary routine called: VMOVE. </p>
813      *
814      * <p>This internal method reorders the regressors.</p>
815      *
816      * @param list the regressors to move
817      * @param pos1 where the list will be placed
818      * @return -1 error, 0 everything ok
819      */
820     private int reorderRegressors(int[] list, int pos1) {
821         int next;
822         int i;
823         int l;
824         if (list.length < 1 || list.length > nvars + 1 - pos1) {
825             return -1;
826         }
827         next = pos1;
828         i = pos1;
829         while (i < nvars) {
830             l = vorder[i];
831             for (int j = 0; j < list.length; j++) {
832                 if (l == list[j] && i > next) {
833                     this.vmove(i, next);
834                     ++next;
835                     if (next >= list.length + pos1) {
836                         return 0;
837                     } else {
838                         break;
839                     }
840                 }
841             }
842             ++i;
843         }
844         return 0;
845     }
846 
847     /**
848      * Gets the diagonal of the Hat matrix also known as the leverage matrix.
849      *
850      * @param  row_data returns the diagonal of the hat matrix for this observation
851      * @return the diagonal element of the hatmatrix
852      */
853     public double getDiagonalOfHatMatrix(double[] row_data) {
854         double[] wk = new double[this.nvars];
855         int pos;
856         double total;
857 
858         if (row_data.length > nvars) {
859             return Double.NaN;
860         }
861         double[] xrow;
862         if (this.hasIntercept) {
863             xrow = new double[row_data.length + 1];
864             xrow[0] = 1.0;
865             System.arraycopy(row_data, 0, xrow, 1, row_data.length);
866         } else {
867             xrow = row_data;
868         }
869         double hii = 0.0;
870         for (int col = 0; col < xrow.length; col++) {
871             if (FastMath.sqrt(d[col]) < tol[col]) {
872                 wk[col] = 0.0;
873             } else {
874                 pos = col - 1;
875                 total = xrow[col];
876                 for (int row = 0; row < col; row++) {
877                     total = smartAdd(total, -wk[row] * r[pos]);
878                     pos += nvars - row - 2;
879                 }
880                 wk[col] = total;
881                 hii = smartAdd(hii, (total * total) / d[col]);
882             }
883         }
884         return hii;
885     }
886 
887     /**
888      * Gets the order of the regressors, useful if some type of reordering
889      * has been called. Calling regress with int[]{} args will trigger
890      * a reordering.
891      *
892      * @return int[] with the current order of the regressors
893      */
894     public int[] getOrderOfRegressors(){
895         return MathArrays.copyOf(vorder);
896     }
897 
898     /**
899      * Conducts a regression on the data in the model, using all regressors.
900      *
901      * @return RegressionResults the structure holding all regression results
902      * @exception  ModelSpecificationException - thrown if number of observations is
903      * less than the number of variables
904      */
905     public RegressionResults regress() throws ModelSpecificationException {
906         return regress(this.nvars);
907     }
908 
909     /**
910      * Conducts a regression on the data in the model, using a subset of regressors.
911      *
912      * @param numberOfRegressors many of the regressors to include (either in canonical
913      * order, or in the current reordered state)
914      * @return RegressionResults the structure holding all regression results
915      * @exception  ModelSpecificationException - thrown if number of observations is
916      * less than the number of variables or number of regressors requested
917      * is greater than the regressors in the model
918      */
919     public RegressionResults regress(int numberOfRegressors) throws ModelSpecificationException {
920         if (this.nobs <= numberOfRegressors) {
921            throw new ModelSpecificationException(
922                    LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
923                    this.nobs, numberOfRegressors);
924         }
925         if( numberOfRegressors > this.nvars ){
926             throw new ModelSpecificationException(
927                     LocalizedFormats.TOO_MANY_REGRESSORS, numberOfRegressors, this.nvars);
928         }
929 
930         tolset();
931         singcheck();
932 
933         double[] beta = this.regcf(numberOfRegressors);
934 
935         ss();
936 
937         double[] cov = this.cov(numberOfRegressors);
938 
939         int rnk = 0;
940         for (int i = 0; i < this.lindep.length; i++) {
941             if (!this.lindep[i]) {
942                 ++rnk;
943             }
944         }
945 
946         boolean needsReorder = false;
947         for (int i = 0; i < numberOfRegressors; i++) {
948             if (this.vorder[i] != i) {
949                 needsReorder = true;
950                 break;
951             }
952         }
953         if (!needsReorder) {
954             return new RegressionResults(
955                     beta, new double[][]{cov}, true, this.nobs, rnk,
956                     this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
957         } else {
958             double[] betaNew = new double[beta.length];
959             double[] covNew = new double[cov.length];
960 
961             int[] newIndices = new int[beta.length];
962             for (int i = 0; i < nvars; i++) {
963                 for (int j = 0; j < numberOfRegressors; j++) {
964                     if (this.vorder[j] == i) {
965                         betaNew[i] = beta[ j];
966                         newIndices[i] = j;
967                     }
968                 }
969             }
970 
971             int idx1 = 0;
972             int idx2;
973             int _i;
974             int _j;
975             for (int i = 0; i < beta.length; i++) {
976                 _i = newIndices[i];
977                 for (int j = 0; j <= i; j++, idx1++) {
978                     _j = newIndices[j];
979                     if (_i > _j) {
980                         idx2 = _i * (_i + 1) / 2 + _j;
981                     } else {
982                         idx2 = _j * (_j + 1) / 2 + _i;
983                     }
984                     covNew[idx1] = cov[idx2];
985                 }
986             }
987             return new RegressionResults(
988                     betaNew, new double[][]{covNew}, true, this.nobs, rnk,
989                     this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
990         }
991     }
992 
993     /**
994      * Conducts a regression on the data in the model, using regressors in array
995      * Calling this method will change the internal order of the regressors
996      * and care is required in interpreting the hatmatrix.
997      *
998      * @param  variablesToInclude array of variables to include in regression
999      * @return RegressionResults the structure holding all regression results
1000      * @exception  ModelSpecificationException - thrown if number of observations is
1001      * less than the number of variables, the number of regressors requested
1002      * is greater than the regressors in the model or a regressor index in
1003      * regressor array does not exist
1004      */
1005     public RegressionResults regress(int[] variablesToInclude) throws ModelSpecificationException {
1006         if (variablesToInclude.length > this.nvars) {
1007             throw new ModelSpecificationException(
1008                     LocalizedFormats.TOO_MANY_REGRESSORS, variablesToInclude.length, this.nvars);
1009         }
1010         if (this.nobs <= this.nvars) {
1011             throw new ModelSpecificationException(
1012                     LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
1013                     this.nobs, this.nvars);
1014         }
1015         Arrays.sort(variablesToInclude);
1016         int iExclude = 0;
1017         for (int i = 0; i < variablesToInclude.length; i++) {
1018             if (i >= this.nvars) {
1019                 throw new ModelSpecificationException(
1020                         LocalizedFormats.INDEX_LARGER_THAN_MAX, i, this.nvars);
1021             }
1022             if (i > 0 && variablesToInclude[i] == variablesToInclude[i - 1]) {
1023                 variablesToInclude[i] = -1;
1024                 ++iExclude;
1025             }
1026         }
1027         int[] series;
1028         if (iExclude > 0) {
1029             int j = 0;
1030             series = new int[variablesToInclude.length - iExclude];
1031             for (int i = 0; i < variablesToInclude.length; i++) {
1032                 if (variablesToInclude[i] > -1) {
1033                     series[j] = variablesToInclude[i];
1034                     ++j;
1035                 }
1036             }
1037         } else {
1038             series = variablesToInclude;
1039         }
1040 
1041         reorderRegressors(series, 0);
1042         tolset();
1043         singcheck();
1044 
1045         double[] beta = this.regcf(series.length);
1046 
1047         ss();
1048 
1049         double[] cov = this.cov(series.length);
1050 
1051         int rnk = 0;
1052         for (int i = 0; i < this.lindep.length; i++) {
1053             if (!this.lindep[i]) {
1054                 ++rnk;
1055             }
1056         }
1057 
1058         boolean needsReorder = false;
1059         for (int i = 0; i < this.nvars; i++) {
1060             if (this.vorder[i] != series[i]) {
1061                 needsReorder = true;
1062                 break;
1063             }
1064         }
1065         if (!needsReorder) {
1066             return new RegressionResults(
1067                     beta, new double[][]{cov}, true, this.nobs, rnk,
1068                     this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
1069         } else {
1070             double[] betaNew = new double[beta.length];
1071             int[] newIndices = new int[beta.length];
1072             for (int i = 0; i < series.length; i++) {
1073                 for (int j = 0; j < this.vorder.length; j++) {
1074                     if (this.vorder[j] == series[i]) {
1075                         betaNew[i] = beta[ j];
1076                         newIndices[i] = j;
1077                     }
1078                 }
1079             }
1080             double[] covNew = new double[cov.length];
1081             int idx1 = 0;
1082             int idx2;
1083             int _i;
1084             int _j;
1085             for (int i = 0; i < beta.length; i++) {
1086                 _i = newIndices[i];
1087                 for (int j = 0; j <= i; j++, idx1++) {
1088                     _j = newIndices[j];
1089                     if (_i > _j) {
1090                         idx2 = _i * (_i + 1) / 2 + _j;
1091                     } else {
1092                         idx2 = _j * (_j + 1) / 2 + _i;
1093                     }
1094                     covNew[idx1] = cov[idx2];
1095                 }
1096             }
1097             return new RegressionResults(
1098                     betaNew, new double[][]{covNew}, true, this.nobs, rnk,
1099                     this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
1100         }
1101     }
1102 }