View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.math4.legacy.stat.regression;
18  
19  import java.util.Arrays;
20  
21  import org.apache.commons.math4.legacy.exception.util.LocalizedFormats;
22  import org.apache.commons.math4.core.jdkmath.JdkMath;
23  import org.apache.commons.numbers.core.Precision;
24  
25  /**
26   * This class is a concrete implementation of the {@link UpdatingMultipleLinearRegression} interface.
27   *
28   * <p>The algorithm is described in: <pre>
29   * Algorithm AS 274: Least Squares Routines to Supplement Those of Gentleman
30   * Author(s): Alan J. Miller
31   * Source: Journal of the Royal Statistical Society.
32   * Series C (Applied Statistics), Vol. 41, No. 2
33   * (1992), pp. 458-478
34   * Published by: Blackwell Publishing for the Royal Statistical Society
35   * Stable URL: http://www.jstor.org/stable/2347583 </pre>
36   *
37   * <p>This method for multiple regression forms the solution to the OLS problem
38   * by updating the QR decomposition as described by Gentleman.</p>
39   *
40   * @since 3.0
41   */
42  public class MillerUpdatingRegression implements UpdatingMultipleLinearRegression {
43  
44      /** number of variables in regression. */
45      private final int nvars;
46      /** diagonals of cross products matrix. */
47      private final double[] d;
48      /** the elements of the R`Y. */
49      private final double[] rhs;
50      /** the off diagonal portion of the R matrix. */
51      private final double[] r;
52      /** the tolerance for each of the variables. */
53      private final double[] tol;
54      /** residual sum of squares for all nested regressions. */
55      private final double[] rss;
56      /** order of the regressors. */
57      private final int[] vorder;
58      /** scratch space for tolerance calc. */
59      private final double[] workTolset;
60      /** number of observations entered. */
61      private long nobs;
62      /** sum of squared errors of largest regression. */
63      private double sserr;
64      /** has rss been called? */
65      private boolean rssSet;
66      /** has the tolerance setting method been called. */
67      private boolean tolSet;
68      /** flags for variables with linear dependency problems. */
69      private final boolean[] lindep;
70      /** singular x values. */
71      private final double[] xSing;
72      /** workspace for singularity method. */
73      private final double[] workSing;
74      /** summation of Y variable. */
75      private double sumy;
76      /** summation of squared Y values. */
77      private double sumsqy;
78      /** boolean flag whether a regression constant is added. */
79      private final boolean hasIntercept;
80      /** zero tolerance. */
81      private final double epsilon;
82      /**
83       *  Set the default constructor to private access.
84       *  to prevent inadvertent instantiation
85       */
86      @SuppressWarnings("unused")
87      private MillerUpdatingRegression() {
88          this(-1, false, Double.NaN);
89      }
90  
91      /**
92       * This is the augmented constructor for the MillerUpdatingRegression class.
93       *
94       * @param numberOfVariables number of regressors to expect, not including constant
95       * @param includeConstant include a constant automatically
96       * @param errorTolerance  zero tolerance, how machine zero is determined
97       * @throws ModelSpecificationException if {@code numberOfVariables is less than 1}
98       */
99      public MillerUpdatingRegression(int numberOfVariables, boolean includeConstant, double errorTolerance)
100             throws ModelSpecificationException {
101         if (numberOfVariables < 1) {
102             throw new ModelSpecificationException(LocalizedFormats.NO_REGRESSORS);
103         }
104         if (includeConstant) {
105             this.nvars = numberOfVariables + 1;
106         } else {
107             this.nvars = numberOfVariables;
108         }
109         this.hasIntercept = includeConstant;
110         this.nobs = 0;
111         this.d = new double[this.nvars];
112         this.rhs = new double[this.nvars];
113         this.r = new double[this.nvars * (this.nvars - 1) / 2];
114         this.tol = new double[this.nvars];
115         this.rss = new double[this.nvars];
116         this.vorder = new int[this.nvars];
117         this.xSing = new double[this.nvars];
118         this.workSing = new double[this.nvars];
119         this.workTolset = new double[this.nvars];
120         this.lindep = new boolean[this.nvars];
121         for (int i = 0; i < this.nvars; i++) {
122             vorder[i] = i;
123         }
124         if (errorTolerance > 0) {
125             this.epsilon = errorTolerance;
126         } else {
127             this.epsilon = -errorTolerance;
128         }
129     }
130 
131     /**
132      * Primary constructor for the MillerUpdatingRegression.
133      *
134      * @param numberOfVariables maximum number of potential regressors
135      * @param includeConstant include a constant automatically
136      * @throws ModelSpecificationException if {@code numberOfVariables is less than 1}
137      */
138     public MillerUpdatingRegression(int numberOfVariables, boolean includeConstant)
139             throws ModelSpecificationException {
140         this(numberOfVariables, includeConstant, Precision.EPSILON);
141     }
142 
143     /**
144      * A getter method which determines whether a constant is included.
145      * @return true regression has an intercept, false no intercept
146      */
147     @Override
148     public boolean hasIntercept() {
149         return this.hasIntercept;
150     }
151 
152     /**
153      * Gets the number of observations added to the regression model.
154      * @return number of observations
155      */
156     @Override
157     public long getN() {
158         return this.nobs;
159     }
160 
161     /**
162      * Adds an observation to the regression model.
163      * @param x the array with regressor values
164      * @param y  the value of dependent variable given these regressors
165      * @exception ModelSpecificationException if the length of {@code x} does not equal
166      * the number of independent variables in the model
167      */
168     @Override
169     public void addObservation(final double[] x, final double y)
170             throws ModelSpecificationException {
171 
172         if ((!this.hasIntercept && x.length != nvars) ||
173                (this.hasIntercept && x.length + 1 != nvars)) {
174             throw new ModelSpecificationException(LocalizedFormats.INVALID_REGRESSION_OBSERVATION,
175                     x.length, nvars);
176         }
177         if (!this.hasIntercept) {
178             include(Arrays.copyOf(x, x.length), 1.0, y);
179         } else {
180             final double[] tmp = new double[x.length + 1];
181             System.arraycopy(x, 0, tmp, 1, x.length);
182             tmp[0] = 1.0;
183             include(tmp, 1.0, y);
184         }
185         ++nobs;
186     }
187 
188     /**
189      * Adds multiple observations to the model.
190      * @param x observations on the regressors
191      * @param y observations on the regressand
192      * @throws ModelSpecificationException if {@code x} is not rectangular, does not match
193      * the length of {@code y} or does not contain sufficient data to estimate the model
194      */
195     @Override
196     public void addObservations(double[][] x, double[] y) throws ModelSpecificationException {
197         if (x == null || y == null || x.length != y.length) {
198             throw new ModelSpecificationException(
199                   LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE,
200                   (x == null) ? 0 : x.length,
201                   (y == null) ? 0 : y.length);
202         }
203         if (x.length == 0) {  // Must be no y data either
204             throw new ModelSpecificationException(
205                     LocalizedFormats.NO_DATA);
206         }
207         if (x[0].length + 1 > x.length) {
208             throw new ModelSpecificationException(
209                   LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
210                   x.length, x[0].length);
211         }
212         for (int i = 0; i < x.length; i++) {
213             addObservation(x[i], y[i]);
214         }
215     }
216 
217     /**
218      * The include method is where the QR decomposition occurs. This statement forms all
219      * intermediate data which will be used for all derivative measures.
220      * According to the miller paper, note that in the original implementation the x vector
221      * is overwritten. In this implementation, the include method is passed a copy of the
222      * original data vector so that there is no contamination of the data. Additionally,
223      * this method differs slightly from Gentleman's method, in that the assumption is
224      * of dense design matrices, there is some advantage in using the original gentleman algorithm
225      * on sparse matrices.
226      *
227      * @param x observations on the regressors
228      * @param wi weight of the this observation (-1,1)
229      * @param yi observation on the regressand
230      */
231     private void include(final double[] x, final double wi, final double yi) {
232         int nextr = 0;
233         double w = wi;
234         double y = yi;
235         double xi;
236         double di;
237         double wxi;
238         double dpi;
239         double xk;
240         double wPrev;
241         this.rssSet = false;
242         sumy = smartAdd(yi, sumy);
243         sumsqy = smartAdd(sumsqy, yi * yi);
244         for (int i = 0; i < x.length; i++) {
245             if (w == 0.0) {
246                 return;
247             }
248             xi = x[i];
249 
250             if (xi == 0.0) {
251                 nextr += nvars - i - 1;
252                 continue;
253             }
254             di = d[i];
255             wxi = w * xi;
256             wPrev = w;
257             if (di != 0.0) {
258                 dpi = smartAdd(di, wxi * xi);
259                 final double tmp = wxi * xi / di;
260                 if (JdkMath.abs(tmp) > Precision.EPSILON) {
261                     w = (di * w) / dpi;
262                 }
263             } else {
264                 dpi = wxi * xi;
265                 w = 0.0;
266             }
267             d[i] = dpi;
268             for (int k = i + 1; k < nvars; k++) {
269                 xk = x[k];
270                 x[k] = smartAdd(xk, -xi * r[nextr]);
271                 if (di != 0.0) {
272                     r[nextr] = smartAdd(di * r[nextr], (wPrev * xi) * xk) / dpi;
273                 } else {
274                     r[nextr] = xk / xi;
275                 }
276                 ++nextr;
277             }
278             xk = y;
279             y = smartAdd(xk, -xi * rhs[i]);
280             if (di != 0.0) {
281                 rhs[i] = smartAdd(di * rhs[i], wxi * xk) / dpi;
282             } else {
283                 rhs[i] = xk / xi;
284             }
285         }
286         sserr = smartAdd(sserr, w * y * y);
287     }
288 
289     /**
290      * Adds to number a and b such that the contamination due to
291      * numerical smallness of one addend does not corrupt the sum.
292      * @param a - an addend
293      * @param b - an addend
294      * @return the sum of the a and b
295      */
296     private double smartAdd(double a, double b) {
297         final double aa = JdkMath.abs(a);
298         final double ba = JdkMath.abs(b);
299         if (aa > ba) {
300             final double eps = aa * Precision.EPSILON;
301             if (ba > eps) {
302                 return a + b;
303             }
304             return a;
305         } else {
306             final double eps = ba * Precision.EPSILON;
307             if (aa > eps) {
308                 return a + b;
309             }
310             return b;
311         }
312     }
313 
314     /**
315      * As the name suggests,  clear wipes the internals and reorders everything in the
316      * canonical order.
317      */
318     @Override
319     public void clear() {
320         Arrays.fill(this.d, 0.0);
321         Arrays.fill(this.rhs, 0.0);
322         Arrays.fill(this.r, 0.0);
323         Arrays.fill(this.tol, 0.0);
324         Arrays.fill(this.rss, 0.0);
325         Arrays.fill(this.workTolset, 0.0);
326         Arrays.fill(this.workSing, 0.0);
327         Arrays.fill(this.xSing, 0.0);
328         Arrays.fill(this.lindep, false);
329         for (int i = 0; i < nvars; i++) {
330             this.vorder[i] = i;
331         }
332         this.nobs = 0;
333         this.sserr = 0.0;
334         this.sumy = 0.0;
335         this.sumsqy = 0.0;
336         this.rssSet = false;
337         this.tolSet = false;
338     }
339 
340     /**
341      * This sets up tolerances for singularity testing.
342      */
343     private void tolset() {
344         int pos;
345         double total;
346         final double eps = this.epsilon;
347         for (int i = 0; i < nvars; i++) {
348             this.workTolset[i] = JdkMath.sqrt(d[i]);
349         }
350         tol[0] = eps * this.workTolset[0];
351         for (int col = 1; col < nvars; col++) {
352             pos = col - 1;
353             total = workTolset[col];
354             for (int row = 0; row < col; row++) {
355                 total += JdkMath.abs(r[pos]) * workTolset[row];
356                 pos += nvars - row - 2;
357             }
358             tol[col] = eps * total;
359         }
360         tolSet = true;
361     }
362 
363     /**
364      * The regcf method conducts the linear regression and extracts the
365      * parameter vector. Notice that the algorithm can do subset regression
366      * with no alteration.
367      *
368      * @param nreq how many of the regressors to include (either in canonical
369      * order, or in the current reordered state)
370      * @return an array with the estimated slope coefficients
371      * @throws ModelSpecificationException if {@code nreq} is less than 1
372      * or greater than the number of independent variables
373      */
374     private double[] regcf(int nreq) throws ModelSpecificationException {
375         int nextr;
376         if (nreq < 1) {
377             throw new ModelSpecificationException(LocalizedFormats.NO_REGRESSORS);
378         }
379         if (nreq > this.nvars) {
380             throw new ModelSpecificationException(
381                     LocalizedFormats.TOO_MANY_REGRESSORS, nreq, this.nvars);
382         }
383         if (!this.tolSet) {
384             tolset();
385         }
386         final double[] ret = new double[nreq];
387         boolean rankProblem = false;
388         for (int i = nreq - 1; i > -1; i--) {
389             if (JdkMath.sqrt(d[i]) < tol[i]) {
390                 ret[i] = 0.0;
391                 d[i] = 0.0;
392                 rankProblem = true;
393             } else {
394                 ret[i] = rhs[i];
395                 nextr = i * (nvars + nvars - i - 1) / 2;
396                 for (int j = i + 1; j < nreq; j++) {
397                     ret[i] = smartAdd(ret[i], -r[nextr] * ret[j]);
398                     ++nextr;
399                 }
400             }
401         }
402         if (rankProblem) {
403             for (int i = 0; i < nreq; i++) {
404                 if (this.lindep[i]) {
405                     ret[i] = Double.NaN;
406                 }
407             }
408         }
409         return ret;
410     }
411 
412     /**
413      * The method which checks for singularities and then eliminates the offending
414      * columns.
415      */
416     private void singcheck() {
417         int pos;
418         for (int i = 0; i < nvars; i++) {
419             workSing[i] = JdkMath.sqrt(d[i]);
420         }
421         for (int col = 0; col < nvars; col++) {
422             // Set elements within R to zero if they are less than tol(col) in
423             // absolute value after being scaled by the square root of their row
424             // multiplier
425             final double temp = tol[col];
426             pos = col - 1;
427             for (int row = 0; row < col - 1; row++) {
428                 if (JdkMath.abs(r[pos]) * workSing[row] < temp) {
429                     r[pos] = 0.0;
430                 }
431                 pos += nvars - row - 2;
432             }
433             // If diagonal element is near zero, set it to zero, set appropriate
434             // element of LINDEP, and use INCLUD to augment the projections in
435             // the lower rows of the orthogonalization.
436             lindep[col] = false;
437             if (workSing[col] < temp) {
438                 lindep[col] = true;
439                 if (col < nvars - 1) {
440                     Arrays.fill(xSing, 0.0);
441                     int pi = col * (nvars + nvars - col - 1) / 2;
442                     for (int xi = col + 1; xi < nvars; xi++, pi++) {
443                         xSing[xi] = r[pi];
444                         r[pi] = 0.0;
445                     }
446                     final double y = rhs[col];
447                     final double weight = d[col];
448                     d[col] = 0.0;
449                     rhs[col] = 0.0;
450                     this.include(xSing, weight, y);
451                 } else {
452                     sserr += d[col] * rhs[col] * rhs[col];
453                 }
454             }
455         }
456     }
457 
458     /**
459      * Calculates the sum of squared errors for the full regression.
460      * and all subsets in the following manner: <pre>
461      * rss[] ={
462      * ResidualSumOfSquares_allNvars,
463      * ResidualSumOfSquares_FirstNvars-1,
464      * ResidualSumOfSquares_FirstNvars-2,
465      * ..., ResidualSumOfSquares_FirstVariable} </pre>
466      */
467     private void ss() {
468         double total = sserr;
469         rss[nvars - 1] = sserr;
470         for (int i = nvars - 1; i > 0; i--) {
471             total += d[i] * rhs[i] * rhs[i];
472             rss[i - 1] = total;
473         }
474         rssSet = true;
475     }
476 
477     /**
478      * Calculates the cov matrix assuming only the first nreq variables are
479      * included in the calculation. The returned array contains a symmetric
480      * matrix stored in lower triangular form. The matrix will have
481      * ( nreq + 1 ) * nreq / 2 elements. For illustration <pre>
482      * cov =
483      * {
484      *  cov_00,
485      *  cov_10, cov_11,
486      *  cov_20, cov_21, cov22,
487      *  ...
488      * } </pre>
489      *
490      * @param nreq how many of the regressors to include (either in canonical
491      * order, or in the current reordered state)
492      * @return an array with the variance covariance of the included
493      * regressors in lower triangular form
494      */
495     private double[] cov(int nreq) {
496         if (this.nobs <= nreq) {
497             return null;
498         }
499         double rnk = 0.0;
500         for (int i = 0; i < nreq; i++) {
501             if (!this.lindep[i]) {
502                 rnk += 1.0;
503             }
504         }
505         final double var = rss[nreq - 1] / (nobs - rnk);
506         final double[] rinv = new double[nreq * (nreq - 1) / 2];
507         inverse(rinv, nreq);
508         final double[] covmat = new double[nreq * (nreq + 1) / 2];
509         Arrays.fill(covmat, Double.NaN);
510         int pos2;
511         int pos1;
512         int start = 0;
513         double total = 0;
514         for (int row = 0; row < nreq; row++) {
515             pos2 = start;
516             if (!this.lindep[row]) {
517                 for (int col = row; col < nreq; col++) {
518                     if (!this.lindep[col]) {
519                         pos1 = start + col - row;
520                         if (row == col) {
521                             total = 1.0 / d[col];
522                         } else {
523                             total = rinv[pos1 - 1] / d[col];
524                         }
525                         for (int k = col + 1; k < nreq; k++) {
526                             if (!this.lindep[k]) {
527                                 total += rinv[pos1] * rinv[pos2] / d[k];
528                             }
529                             ++pos1;
530                             ++pos2;
531                         }
532                         covmat[ (col + 1) * col / 2 + row] = total * var;
533                     } else {
534                         pos2 += nreq - col - 1;
535                     }
536                 }
537             }
538             start += nreq - row - 1;
539         }
540         return covmat;
541     }
542 
543     /**
544      * This internal method calculates the inverse of the upper-triangular portion
545      * of the R matrix.
546      * @param rinv  the storage for the inverse of r
547      * @param nreq how many of the regressors to include (either in canonical
548      * order, or in the current reordered state)
549      */
550     private void inverse(double[] rinv, int nreq) {
551         int pos = nreq * (nreq - 1) / 2 - 1;
552         int pos1 = -1;
553         int pos2 = -1;
554         double total = 0.0;
555         Arrays.fill(rinv, Double.NaN);
556         for (int row = nreq - 1; row > 0; --row) {
557             if (!this.lindep[row]) {
558                 final int start = (row - 1) * (nvars + nvars - row) / 2;
559                 for (int col = nreq; col > row; --col) {
560                     pos1 = start;
561                     pos2 = pos;
562                     total = 0.0;
563                     for (int k = row; k < col - 1; k++) {
564                         pos2 += nreq - k - 1;
565                         if (!this.lindep[k]) {
566                             total += -r[pos1] * rinv[pos2];
567                         }
568                         ++pos1;
569                     }
570                     rinv[pos] = total - r[pos1];
571                     --pos;
572                 }
573             } else {
574                 pos -= nreq - row;
575             }
576         }
577     }
578 
579     /**
580      * In the original algorithm only the partial correlations of the regressors
581      * is returned to the user. In this implementation, we have <pre>
582      * corr =
583      * {
584      *   corrxx - lower triangular
585      *   corrxy - bottom row of the matrix
586      * }
587      * Replaces subroutines PCORR and COR of:
588      * ALGORITHM AS274  APPL. STATIST. (1992) VOL.41, NO. 2 </pre>
589      *
590      * <p>Calculate partial correlations after the variables in rows
591      * 1, 2, ..., IN have been forced into the regression.
592      * If IN = 1, and the first row of R represents a constant in the
593      * model, then the usual simple correlations are returned.</p>
594      *
595      * <p>If IN = 0, the value returned in array CORMAT for the correlation
596      * of variables Xi &amp; Xj is: <pre>
597      * sum ( Xi.Xj ) / Sqrt ( sum (Xi^2) . sum (Xj^2) )</pre>
598      *
599      * <p>On return, array CORMAT contains the upper triangle of the matrix of
600      * partial correlations stored by rows, excluding the 1's on the diagonal.
601      * e.g. if IN = 2, the consecutive elements returned are:
602      * (3,4) (3,5) ... (3,ncol), (4,5) (4,6) ... (4,ncol), etc.
603      * Array YCORR stores the partial correlations with the Y-variable
604      * starting with YCORR(IN+1) = partial correlation with the variable in
605      * position (IN+1). </p>
606      *
607      * @param in how many of the regressors to include (either in canonical
608      * order, or in the current reordered state)
609      * @return an array with the partial correlations of the remainder of
610      * regressors with each other and the regressand, in lower triangular form
611      */
612     public double[] getPartialCorrelations(int in) {
613         final double[] output = new double[(nvars - in + 1) * (nvars - in) / 2];
614         int pos;
615         int pos1;
616         int pos2;
617         final int rmsOff = -in;
618         final int wrkOff = -(in + 1);
619         final double[] rms = new double[nvars - in];
620         final double[] work = new double[nvars - in - 1];
621         double sumxx;
622         double sumxy;
623         double sumyy;
624         final int offXX = (nvars - in) * (nvars - in - 1) / 2;
625         if (in < -1 || in >= nvars) {
626             return null;
627         }
628         final int nvm = nvars - 1;
629         final int basePos = r.length - (nvm - in) * (nvm - in + 1) / 2;
630         if (d[in] > 0.0) {
631             rms[in + rmsOff] = 1.0 / JdkMath.sqrt(d[in]);
632         }
633         for (int col = in + 1; col < nvars; col++) {
634             pos = basePos + col - 1 - in;
635             sumxx = d[col];
636             for (int row = in; row < col; row++) {
637                 sumxx += d[row] * r[pos] * r[pos];
638                 pos += nvars - row - 2;
639             }
640             if (sumxx > 0.0) {
641                 rms[col + rmsOff] = 1.0 / JdkMath.sqrt(sumxx);
642             } else {
643                 rms[col + rmsOff] = 0.0;
644             }
645         }
646         sumyy = sserr;
647         for (int row = in; row < nvars; row++) {
648             sumyy += d[row] * rhs[row] * rhs[row];
649         }
650         if (sumyy > 0.0) {
651             sumyy = 1.0 / JdkMath.sqrt(sumyy);
652         }
653         pos = 0;
654         for (int col1 = in; col1 < nvars; col1++) {
655             sumxy = 0.0;
656             Arrays.fill(work, 0.0);
657             pos1 = basePos + col1 - in - 1;
658             for (int row = in; row < col1; row++) {
659                 pos2 = pos1 + 1;
660                 for (int col2 = col1 + 1; col2 < nvars; col2++) {
661                     work[col2 + wrkOff] += d[row] * r[pos1] * r[pos2];
662                     pos2++;
663                 }
664                 sumxy += d[row] * r[pos1] * rhs[row];
665                 pos1 += nvars - row - 2;
666             }
667             pos2 = pos1 + 1;
668             for (int col2 = col1 + 1; col2 < nvars; col2++) {
669                 work[col2 + wrkOff] += d[col1] * r[pos2];
670                 ++pos2;
671                 output[ (col2 - 1 - in) * (col2 - in) / 2 + col1 - in] =
672                         work[col2 + wrkOff] * rms[col1 + rmsOff] * rms[col2 + rmsOff];
673                 ++pos;
674             }
675             sumxy += d[col1] * rhs[col1];
676             output[col1 + rmsOff + offXX] = sumxy * rms[col1 + rmsOff] * sumyy;
677         }
678 
679         return output;
680     }
681 
682     /**
683      * ALGORITHM AS274 APPL. STATIST. (1992) VOL.41, NO. 2.
684      * Move variable from position FROM to position TO in an
685      * orthogonal reduction produced by AS75.1.
686      *
687      * @param from initial position
688      * @param to destination
689      */
690     private void vmove(int from, int to) {
691         double d1;
692         double d2;
693         double x;
694         double d1new;
695         double d2new;
696         double cbar;
697         double sbar;
698         double y;
699         int first;
700         int inc;
701         int m1;
702         int m2;
703         int mp1;
704         int pos;
705         boolean bSkipTo40 = false;
706         if (from == to) {
707             return;
708         }
709         if (!this.rssSet) {
710             ss();
711         }
712         int count = 0;
713         if (from < to) {
714             first = from;
715             inc = 1;
716             count = to - from;
717         } else {
718             first = from - 1;
719             inc = -1;
720             count = from - to;
721         }
722 
723         int m = first;
724         int idx = 0;
725         while (idx < count) {
726             m1 = m * (nvars + nvars - m - 1) / 2;
727             m2 = m1 + nvars - m - 1;
728             mp1 = m + 1;
729 
730             d1 = d[m];
731             d2 = d[mp1];
732             // Special cases.
733             if (d1 > this.epsilon || d2 > this.epsilon) {
734                 x = r[m1];
735                 if (JdkMath.abs(x) * JdkMath.sqrt(d1) < tol[mp1]) {
736                     x = 0.0;
737                 }
738                 if (d1 < this.epsilon || JdkMath.abs(x) < this.epsilon) {
739                     d[m] = d2;
740                     d[mp1] = d1;
741                     r[m1] = 0.0;
742                     for (int col = m + 2; col < nvars; col++) {
743                         ++m1;
744                         x = r[m1];
745                         r[m1] = r[m2];
746                         r[m2] = x;
747                         ++m2;
748                     }
749                     x = rhs[m];
750                     rhs[m] = rhs[mp1];
751                     rhs[mp1] = x;
752                     bSkipTo40 = true;
753                     //break;
754                 } else if (d2 < this.epsilon) {
755                     d[m] = d1 * x * x;
756                     r[m1] = 1.0 / x;
757                     for (int i = m1 + 1; i < m1 + nvars - m - 1; i++) {
758                         r[i] /= x;
759                     }
760                     rhs[m] /= x;
761                     bSkipTo40 = true;
762                     //break;
763                 }
764                 if (!bSkipTo40) {
765                     d1new = d2 + d1 * x * x;
766                     cbar = d2 / d1new;
767                     sbar = x * d1 / d1new;
768                     d2new = d1 * cbar;
769                     d[m] = d1new;
770                     d[mp1] = d2new;
771                     r[m1] = sbar;
772                     for (int col = m + 2; col < nvars; col++) {
773                         ++m1;
774                         y = r[m1];
775                         r[m1] = cbar * r[m2] + sbar * y;
776                         r[m2] = y - x * r[m2];
777                         ++m2;
778                     }
779                     y = rhs[m];
780                     rhs[m] = cbar * rhs[mp1] + sbar * y;
781                     rhs[mp1] = y - x * rhs[mp1];
782                 }
783             }
784             if (m > 0) {
785                 pos = m;
786                 for (int row = 0; row < m; row++) {
787                     x = r[pos];
788                     r[pos] = r[pos - 1];
789                     r[pos - 1] = x;
790                     pos += nvars - row - 2;
791                 }
792             }
793             // Adjust variable order (VORDER), the tolerances (TOL) and
794             // the vector of residual sums of squares (RSS).
795             m1 = vorder[m];
796             vorder[m] = vorder[mp1];
797             vorder[mp1] = m1;
798             x = tol[m];
799             tol[m] = tol[mp1];
800             tol[mp1] = x;
801             rss[m] = rss[mp1] + d[mp1] * rhs[mp1] * rhs[mp1];
802 
803             m += inc;
804             ++idx;
805         }
806     }
807 
808     /**
809      * ALGORITHM AS274  APPL. STATIST. (1992) VOL.41, NO. 2
810      *
811      * <p> Re-order the variables in an orthogonal reduction produced by
812      * AS75.1 so that the N variables in LIST start at position POS1,
813      * though will not necessarily be in the same order as in LIST.
814      * Any variables in VORDER before position POS1 are not moved.
815      * Auxiliary routine called: VMOVE. </p>
816      *
817      * <p>This internal method reorders the regressors.</p>
818      *
819      * @param list the regressors to move
820      * @param pos1 where the list will be placed
821      * @return -1 error, 0 everything ok
822      */
823     private int reorderRegressors(int[] list, int pos1) {
824         int next;
825         int i;
826         int l;
827         if (list.length < 1 || list.length > nvars + 1 - pos1) {
828             return -1;
829         }
830         next = pos1;
831         i = pos1;
832         while (i < nvars) {
833             l = vorder[i];
834             for (int j = 0; j < list.length; j++) {
835                 if (l == list[j] && i > next) {
836                     this.vmove(i, next);
837                     ++next;
838                     if (next >= list.length + pos1) {
839                         return 0;
840                     } else {
841                         break;
842                     }
843                 }
844             }
845             ++i;
846         }
847         return 0;
848     }
849 
850     /**
851      * Gets the diagonal of the Hat matrix also known as the leverage matrix.
852      *
853      * @param  rowData returns the diagonal of the hat matrix for this observation
854      * @return the diagonal element of the hatmatrix
855      */
856     public double getDiagonalOfHatMatrix(double[] rowData) {
857         double[] wk = new double[this.nvars];
858         int pos;
859         double total;
860 
861         if (rowData.length > nvars) {
862             return Double.NaN;
863         }
864         double[] xrow;
865         if (this.hasIntercept) {
866             xrow = new double[rowData.length + 1];
867             xrow[0] = 1.0;
868             System.arraycopy(rowData, 0, xrow, 1, rowData.length);
869         } else {
870             xrow = rowData;
871         }
872         double hii = 0.0;
873         for (int col = 0; col < xrow.length; col++) {
874             if (JdkMath.sqrt(d[col]) < tol[col]) {
875                 wk[col] = 0.0;
876             } else {
877                 pos = col - 1;
878                 total = xrow[col];
879                 for (int row = 0; row < col; row++) {
880                     total = smartAdd(total, -wk[row] * r[pos]);
881                     pos += nvars - row - 2;
882                 }
883                 wk[col] = total;
884                 hii = smartAdd(hii, (total * total) / d[col]);
885             }
886         }
887         return hii;
888     }
889 
890     /**
891      * Gets the order of the regressors, useful if some type of reordering
892      * has been called. Calling regress with int[]{} args will trigger
893      * a reordering.
894      *
895      * @return int[] with the current order of the regressors
896      */
897     public int[] getOrderOfRegressors(){
898         return Arrays.copyOf(vorder, vorder.length);
899     }
900 
901     /**
902      * Conducts a regression on the data in the model, using all regressors.
903      *
904      * @return RegressionResults the structure holding all regression results
905      * @exception  ModelSpecificationException - thrown if number of observations is
906      * less than the number of variables
907      */
908     @Override
909     public RegressionResults regress() throws ModelSpecificationException {
910         return regress(this.nvars);
911     }
912 
913     /**
914      * Conducts a regression on the data in the model, using a subset of regressors.
915      *
916      * @param numberOfRegressors many of the regressors to include (either in canonical
917      * order, or in the current reordered state)
918      * @return RegressionResults the structure holding all regression results
919      * @exception  ModelSpecificationException - thrown if number of observations is
920      * less than the number of variables or number of regressors requested
921      * is greater than the regressors in the model
922      */
923     public RegressionResults regress(int numberOfRegressors) throws ModelSpecificationException {
924         if (this.nobs <= numberOfRegressors) {
925            throw new ModelSpecificationException(
926                    LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
927                    this.nobs, numberOfRegressors);
928         }
929         if( numberOfRegressors > this.nvars ){
930             throw new ModelSpecificationException(
931                     LocalizedFormats.TOO_MANY_REGRESSORS, numberOfRegressors, this.nvars);
932         }
933 
934         tolset();
935         singcheck();
936 
937         double[] beta = this.regcf(numberOfRegressors);
938 
939         ss();
940 
941         double[] cov = this.cov(numberOfRegressors);
942 
943         int rnk = 0;
944         for (int i = 0; i < this.lindep.length; i++) {
945             if (!this.lindep[i]) {
946                 ++rnk;
947             }
948         }
949 
950         boolean needsReorder = false;
951         for (int i = 0; i < numberOfRegressors; i++) {
952             if (this.vorder[i] != i) {
953                 needsReorder = true;
954                 break;
955             }
956         }
957         if (!needsReorder) {
958             return new RegressionResults(
959                     beta, new double[][]{cov}, true, this.nobs, rnk,
960                     this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
961         } else {
962             double[] betaNew = new double[beta.length];
963             double[] covNew = new double[cov.length];
964 
965             int[] newIndices = new int[beta.length];
966             for (int i = 0; i < nvars; i++) {
967                 for (int j = 0; j < numberOfRegressors; j++) {
968                     if (this.vorder[j] == i) {
969                         betaNew[i] = beta[ j];
970                         newIndices[i] = j;
971                     }
972                 }
973             }
974 
975             int idx1 = 0;
976             int idx2;
977             int ii;
978             int jj;
979             for (int i = 0; i < beta.length; i++) {
980                 ii = newIndices[i];
981                 for (int j = 0; j <= i; j++, idx1++) {
982                     jj = newIndices[j];
983                     if (ii > jj) {
984                         idx2 = ii * (ii + 1) / 2 + jj;
985                     } else {
986                         idx2 = jj * (jj + 1) / 2 + ii;
987                     }
988                     covNew[idx1] = cov[idx2];
989                 }
990             }
991             return new RegressionResults(
992                     betaNew, new double[][]{covNew}, true, this.nobs, rnk,
993                     this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
994         }
995     }
996 
997     /**
998      * Conducts a regression on the data in the model, using regressors in array
999      * Calling this method will change the internal order of the regressors
1000      * and care is required in interpreting the hatmatrix.
1001      *
1002      * @param  variablesToInclude array of variables to include in regression
1003      * @return RegressionResults the structure holding all regression results
1004      * @exception  ModelSpecificationException - thrown if number of observations is
1005      * less than the number of variables, the number of regressors requested
1006      * is greater than the regressors in the model or a regressor index in
1007      * regressor array does not exist
1008      */
1009     @Override
1010     public RegressionResults regress(int[] variablesToInclude) throws ModelSpecificationException {
1011         if (variablesToInclude.length > this.nvars) {
1012             throw new ModelSpecificationException(
1013                     LocalizedFormats.TOO_MANY_REGRESSORS, variablesToInclude.length, this.nvars);
1014         }
1015         if (this.nobs <= this.nvars) {
1016             throw new ModelSpecificationException(
1017                     LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
1018                     this.nobs, this.nvars);
1019         }
1020         Arrays.sort(variablesToInclude);
1021         int iExclude = 0;
1022         for (int i = 0; i < variablesToInclude.length; i++) {
1023             if (i >= this.nvars) {
1024                 throw new ModelSpecificationException(
1025                         LocalizedFormats.INDEX_LARGER_THAN_MAX, i, this.nvars);
1026             }
1027             if (i > 0 && variablesToInclude[i] == variablesToInclude[i - 1]) {
1028                 variablesToInclude[i] = -1;
1029                 ++iExclude;
1030             }
1031         }
1032         int[] series;
1033         if (iExclude > 0) {
1034             int j = 0;
1035             series = new int[variablesToInclude.length - iExclude];
1036             for (int i = 0; i < variablesToInclude.length; i++) {
1037                 if (variablesToInclude[i] > -1) {
1038                     series[j] = variablesToInclude[i];
1039                     ++j;
1040                 }
1041             }
1042         } else {
1043             series = variablesToInclude;
1044         }
1045 
1046         reorderRegressors(series, 0);
1047         tolset();
1048         singcheck();
1049 
1050         double[] beta = this.regcf(series.length);
1051 
1052         ss();
1053 
1054         double[] cov = this.cov(series.length);
1055 
1056         int rnk = 0;
1057         for (int i = 0; i < this.lindep.length; i++) {
1058             if (!this.lindep[i]) {
1059                 ++rnk;
1060             }
1061         }
1062 
1063         boolean needsReorder = false;
1064         for (int i = 0; i < series.length; i++) {
1065             if (this.vorder[i] != series[i]) {
1066                 needsReorder = true;
1067                 break;
1068             }
1069         }
1070         if (!needsReorder) {
1071             return new RegressionResults(
1072                     beta, new double[][]{cov}, true, this.nobs, rnk,
1073                     this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
1074         } else {
1075             double[] betaNew = new double[beta.length];
1076             int[] newIndices = new int[beta.length];
1077             for (int i = 0; i < series.length; i++) {
1078                 for (int j = 0; j < this.vorder.length; j++) {
1079                     if (this.vorder[j] == series[i]) {
1080                         betaNew[i] = beta[ j];
1081                         newIndices[i] = j;
1082                     }
1083                 }
1084             }
1085             double[] covNew = new double[cov.length];
1086             int idx1 = 0;
1087             int idx2;
1088             int ii;
1089             int jj;
1090             for (int i = 0; i < beta.length; i++) {
1091                 ii = newIndices[i];
1092                 for (int j = 0; j <= i; j++, idx1++) {
1093                     jj = newIndices[j];
1094                     if (ii > jj) {
1095                         idx2 = ii * (ii + 1) / 2 + jj;
1096                     } else {
1097                         idx2 = jj * (jj + 1) / 2 + ii;
1098                     }
1099                     covNew[idx1] = cov[idx2];
1100                 }
1101             }
1102             return new RegressionResults(
1103                     betaNew, new double[][]{covNew}, true, this.nobs, rnk,
1104                     this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
1105         }
1106     }
1107 }