View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.math.stat.regression;
18  
19  import java.util.Arrays;
20  import org.apache.commons.math.exception.util.LocalizedFormats;
21  import org.apache.commons.math.util.FastMath;
22  import org.apache.commons.math.util.MathUtils;
23  import org.apache.commons.math.util.MathArrays;
24  
25  /**
26   * <p>This class is a concrete implementation of the {@link UpdatingMultipleLinearRegression} interface.</p>
27   *
28   * <p>The algorithm is described in: <pre>
29   * Algorithm AS 274: Least Squares Routines to Supplement Those of Gentleman
30   * Author(s): Alan J. Miller
31   * Source: Journal of the Royal Statistical Society.
32   * Series C (Applied Statistics), Vol. 41, No. 2
33   * (1992), pp. 458-478
34   * Published by: Blackwell Publishing for the Royal Statistical Society
35   * Stable URL: http://www.jstor.org/stable/2347583 </pre></p>
36   *
37   * <p>This method for multiple regression forms the solution to the OLS problem
38   * by updating the QR decomposition as described by Gentleman.</p>
39   *
40   * @version $Id: MillerUpdatingRegression.java 1182134 2011-10-11 22:55:08Z erans $
41   * @since 3.0
42   */
43  public class MillerUpdatingRegression implements UpdatingMultipleLinearRegression {
44  
45      /** number of variables in regression */
46      private final int nvars;
47      /** diagonals of cross products matrix */
48      private final double[] d;
49      /** the elements of the R`Y */
50      private final double[] rhs;
51      /** the off diagonal portion of the R matrix */
52      private final double[] r;
53      /** the tolerance for each of the variables */
54      private final double[] tol;
55      /** residual sum of squares for all nested regressions */
56      private final double[] rss;
57      /** order of the regressors */
58      private final int[] vorder;
59      /** scratch space for tolerance calc */
60      private final double[] work_tolset;
61      /** number of observations entered */
62      private long nobs = 0;
63      /** sum of squared errors of largest regression */
64      private double sserr = 0.0;
65      /** has rss been called? */
66      private boolean rss_set = false;
67      /** has the tolerance setting method been called */
68      private boolean tol_set = false;
69      /** flags for variables with linear dependency problems */
70      private final boolean[] lindep;
71      /** singular x values */
72      private final double[] x_sing;
73      /** workspace for singularity method */
74      private final double[] work_sing;
75      /** summation of Y variable */
76      private double sumy = 0.0;
77      /** summation of squared Y values */
78      private double sumsqy = 0.0;
79      /** boolean flag whether a regression constant is added */
80      private boolean hasIntercept;
81      /** zero tolerance */
82      private final double epsilon;
83      /**
84       *  Set the default constructor to private access
85       *  to prevent inadvertent instantiation
86       */
87      @SuppressWarnings("unused")
88      private MillerUpdatingRegression() {
89          this.d = null;
90          this.hasIntercept = false;
91          this.lindep = null;
92          this.nobs = -1;
93          this.nvars = -1;
94          this.r = null;
95          this.rhs = null;
96          this.rss = null;
97          this.rss_set = false;
98          this.sserr = Double.NaN;
99          this.sumsqy = Double.NaN;
100         this.sumy = Double.NaN;
101         this.tol = null;
102         this.tol_set = false;
103         this.vorder = null;
104         this.work_sing = null;
105         this.work_tolset = null;
106         this.x_sing = null;
107         this.epsilon = Double.NaN;
108     }
109 
110     /**
111      * This is the augmented constructor for the MillerUpdatingRegression class
112      *
113      * @param numberOfVariables number of regressors to expect, not including constant
114      * @param includeConstant include a constant automatically
115      * @param errorTolerance  zero tolerance, how machine zero is determined
116      */
117     public MillerUpdatingRegression(int numberOfVariables, boolean includeConstant, double errorTolerance) {
118         if (numberOfVariables < 1) {
119             throw new ModelSpecificationException(LocalizedFormats.NO_REGRESSORS);
120         }
121         if (includeConstant) {
122             this.nvars = numberOfVariables + 1;
123         } else {
124             this.nvars = numberOfVariables;
125         }
126         this.hasIntercept = includeConstant;
127         this.nobs = 0;
128         this.d = new double[this.nvars];
129         this.rhs = new double[this.nvars];
130         this.r = new double[this.nvars * (this.nvars - 1) / 2];
131         this.tol = new double[this.nvars];
132         this.rss = new double[this.nvars];
133         this.vorder = new int[this.nvars];
134         this.x_sing = new double[this.nvars];
135         this.work_sing = new double[this.nvars];
136         this.work_tolset = new double[this.nvars];
137         this.lindep = new boolean[this.nvars];
138         for (int i = 0; i < this.nvars; i++) {
139             vorder[i] = i;
140         }
141         if (errorTolerance > 0) {
142             this.epsilon = errorTolerance;
143         } else {
144             this.epsilon = -errorTolerance;
145         }
146         return;
147     }
148 
149     /**
150      * Primary constructor for the MillerUpdatingRegression
151      *
152      * @param numberOfVariables maximum number of potential regressors
153      * @param includeConstant include a constant automatically
154      */
155     public MillerUpdatingRegression(int numberOfVariables, boolean includeConstant) {
156         this(numberOfVariables, includeConstant, MathUtils.EPSILON);
157     }
158 
159     /**
160      * A getter method which determines whether a constant is included
161      * @return true regression has an intercept, false no intercept
162      */
163     public boolean hasIntercept() {
164         return this.hasIntercept;
165     }
166 
167     /**
168      * Gets the number of observations added to the regression model
169      * @return number of observations
170      */
171     public long getN() {
172         return this.nobs;
173     }
174 
175     /**
176      * Adds an observation to the regression model
177      * @param x the array with regressor values
178      * @param y  the value of dependent variable given these regressors
179      * @exception ModelSpecificationException if the length of {@code x} does not equal
180      * the number of independent variables in the model
181      */
182     public void addObservation(final double[] x, final double y) {
183 
184         if ((!this.hasIntercept && x.length != nvars) ||
185                (this.hasIntercept && x.length + 1 != nvars)) {
186             throw new ModelSpecificationException(LocalizedFormats.INVALID_REGRESSION_OBSERVATION,
187                     x.length, nvars);
188         }
189         if (!this.hasIntercept) {
190             include(MathArrays.copyOf(x, x.length), 1.0, y);
191         } else {
192             double[] tmp = new double[x.length + 1];
193             System.arraycopy(x, 0, tmp, 1, x.length);
194             tmp[0] = 1.0;
195             include(tmp, 1.0, y);
196         }
197         ++nobs;
198         return;
199 
200     }
201 
202     /**
203      * Adds multiple observations to the model
204      * @param x observations on the regressors
205      * @param y observations on the regressand
206      * @throws ModelSpecificationException if {@code x} is not rectangular, does not match
207      * the length of {@code y} or does not contain sufficient data to estimate the model
208      */
209     public void addObservations(double[][] x, double[] y) {
210         if ((x == null) || (y == null) || (x.length != y.length)) {
211             throw new ModelSpecificationException(
212                   LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE,
213                   (x == null) ? 0 : x.length,
214                   (y == null) ? 0 : y.length);
215         }
216         if (x.length == 0) {  // Must be no y data either
217             throw new ModelSpecificationException(
218                     LocalizedFormats.NO_DATA);
219         }
220         if (x[0].length + 1 > x.length) {
221             throw new ModelSpecificationException(
222                   LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
223                   x.length, x[0].length);
224         }
225         for (int i = 0; i < x.length; i++) {
226             this.addObservation(x[i], y[i]);
227         }
228         return;
229     }
230 
231     /**
232      * The include method is where the QR decomposition occurs. This statement forms all
233      * intermediate data which will be used for all derivative measures.
234      * According to the miller paper, note that in the original implementation the x vector
235      * is overwritten. In this implementation, the include method is passed a copy of the
236      * original data vector so that there is no contamination of the data. Additionally,
237      * this method differs slightly from Gentleman's method, in that the assumption is
238      * of dense design matrices, there is some advantage in using the original gentleman algorithm
239      * on sparse matrices.
240      *
241      * @param x observations on the regressors
242      * @param wi weight of the this observation (-1,1)
243      * @param yi observation on the regressand
244      */
245     private void include(final double[] x, final double wi, final double yi) {
246         int nextr = 0;
247         double w = wi;
248         double y = yi;
249         double xi;
250         double di;
251         double wxi;
252         double dpi;
253         double xk;
254         double _w;
255         this.rss_set = false;
256         sumy = smartAdd(yi, sumy);
257         sumsqy = smartAdd(sumsqy, yi * yi);
258         for (int i = 0; i < x.length; i++) {
259             if (w == 0.0) {
260                 return;
261             }
262             xi = x[i];
263 
264             if (xi == 0.0) {
265                 nextr += nvars - i - 1;
266                 continue;
267             }
268             di = d[i];
269             wxi = w * xi;
270             _w = w;
271             if (di != 0.0) {
272                 dpi = smartAdd(di, wxi * xi);
273                 double tmp = wxi * xi / di;
274                 if (FastMath.abs(tmp) > MathUtils.EPSILON) {
275                     w = (di * w) / dpi;
276                 }
277             } else {
278                 dpi = wxi * xi;
279                 w = 0.0;
280             }
281             d[i] = dpi;
282             for (int k = i + 1; k < nvars; k++) {
283                 xk = x[k];
284                 x[k] = smartAdd(xk, -xi * r[nextr]);
285                 if (di != 0.0) {
286                     r[nextr] = smartAdd(di * r[nextr], (_w * xi) * xk) / dpi;
287                 } else {
288                     r[nextr] = xk / xi;
289                 }
290                 ++nextr;
291             }
292             xk = y;
293             y = smartAdd(xk, -xi * rhs[i]);
294             if (di != 0.0) {
295                 rhs[i] = smartAdd(di * rhs[i], wxi * xk) / dpi;
296             } else {
297                 rhs[i] = xk / xi;
298             }
299         }
300         sserr = smartAdd(sserr, w * y * y);
301         return;
302     }
303 
304     /**
305      * Adds to number a and b such that the contamination due to
306      * numerical smallness of one addend does not corrupt the sum
307      * @param a - an addend
308      * @param b - an addend
309      * @return the sum of the a and b
310      */
311     private double smartAdd(double a, double b) {
312         double _a = FastMath.abs(a);
313         double _b = FastMath.abs(b);
314         if (_a > _b) {
315             double eps = _a * MathUtils.EPSILON;
316             if (_b > eps) {
317                 return a + b;
318             }
319             return a;
320         } else {
321             double eps = _b * MathUtils.EPSILON;
322             if (_a > eps) {
323                 return a + b;
324             }
325             return b;
326         }
327     }
328 
329     /**
330      * As the name suggests,  clear wipes the internals and reorders everything in the
331      * canonical order.
332      */
333     public void clear() {
334         Arrays.fill(this.d, 0.0);
335         Arrays.fill(this.rhs, 0.0);
336         Arrays.fill(this.r, 0.0);
337         Arrays.fill(this.tol, 0.0);
338         Arrays.fill(this.rss, 0.0);
339         Arrays.fill(this.work_tolset, 0.0);
340         Arrays.fill(this.work_sing, 0.0);
341         Arrays.fill(this.x_sing, 0.0);
342         Arrays.fill(this.lindep, false);
343         for (int i = 0; i < nvars; i++) {
344             this.vorder[i] = i;
345         }
346         this.nobs = 0;
347         this.sserr = 0.0;
348         this.sumy = 0.0;
349         this.sumsqy = 0.0;
350         this.rss_set = false;
351         this.tol_set = false;
352         return;
353     }
354 
355     /**
356      * This sets up tolerances for singularity testing.
357      */
358     private void tolset() {
359         int pos;
360         double total;
361         final double eps = this.epsilon;
362         for (int i = 0; i < nvars; i++) {
363             this.work_tolset[i] = Math.sqrt(d[i]);
364         }
365         tol[0] = eps * this.work_tolset[0];
366         for (int col = 1; col < nvars; col++) {
367             pos = col - 1;
368             total = work_tolset[col];
369             for (int row = 0; row < col; row++) {
370                 total += Math.abs(r[pos]) * work_tolset[row];
371                 pos += nvars - row - 2;
372             }
373             tol[col] = eps * total;
374         }
375         tol_set = true;
376         return;
377     }
378 
379     /**
380      * The regcf method conducts the linear regression and extracts the
381      * parameter vector. Notice that the algorithm can do subset regression
382      * with no alteration.
383      *
384      * @param nreq how many of the regressors to include (either in canonical
385      * order, or in the current reordered state)
386      * @return an array with the estimated slope coefficients
387      */
388     private double[] regcf(int nreq) {
389         int nextr;
390         if (nreq < 1) {
391             throw new ModelSpecificationException(LocalizedFormats.NO_REGRESSORS);
392         }
393         if (nreq > this.nvars) {
394             throw new ModelSpecificationException(
395                     LocalizedFormats.TOO_MANY_REGRESSORS, nreq, this.nvars);
396         }
397         if (!this.tol_set) {
398             tolset();
399         }
400         double[] ret = new double[nreq];
401         boolean rankProblem = false;
402         for (int i = nreq - 1; i > -1; i--) {
403             if (Math.sqrt(d[i]) < tol[i]) {
404                 ret[i] = 0.0;
405                 d[i] = 0.0;
406                 rankProblem = true;
407             } else {
408                 ret[i] = rhs[i];
409                 nextr = i * (nvars + nvars - i - 1) / 2;
410                 for (int j = i + 1; j < nreq; j++) {
411                     ret[i] = smartAdd(ret[i], -r[nextr] * ret[j]);
412                     ++nextr;
413                 }
414             }
415         }
416         if (rankProblem) {
417             for (int i = 0; i < nreq; i++) {
418                 if (this.lindep[i]) {
419                     ret[i] = Double.NaN;
420                 }
421             }
422         }
423         return ret;
424     }
425 
426     /**
427      * The method which checks for singularities and then eliminates the offending
428      * columns.
429      */
430     private void singcheck() {
431         double temp;
432         double y;
433         double weight;
434         int pos;
435         for (int i = 0; i < nvars; i++) {
436             work_sing[i] = Math.sqrt(d[i]);
437         }
438         for (int col = 0; col < nvars; col++) {
439             // Set elements within R to zero if they are less than tol(col) in
440             // absolute value after being scaled by the square root of their row
441             // multiplier
442             temp = tol[col];
443             pos = col - 1;
444             for (int row = 0; row < col - 1; row++) {
445                 if (Math.abs(r[pos]) * work_sing[row] < temp) {
446                     r[pos] = 0.0;
447                 }
448                 pos += nvars - row - 2;
449             }
450             // If diagonal element is near zero, set it to zero, set appropriate
451             // element of LINDEP, and use INCLUD to augment the projections in
452             // the lower rows of the orthogonalization.
453             lindep[col] = false;
454             if (work_sing[col] < temp) {
455                 lindep[col] = true;
456                 if (col < nvars - 1) {
457                     Arrays.fill(x_sing, 0.0);
458                     int _pi = col * (nvars + nvars - col - 1) / 2;
459                     for (int _xi = col + 1; _xi < nvars; _xi++, _pi++) {
460                         x_sing[_xi] = r[_pi];
461                         r[_pi] = 0.0;
462                     }
463                     y = rhs[col];
464                     weight = d[col];
465                     d[col] = 0.0;
466                     rhs[col] = 0.0;
467                     this.include(x_sing, weight, y);
468                 } else {
469                     sserr += d[col] * rhs[col] * rhs[col];
470                 }
471             }
472         }
473         return;
474     }
475 
476     /**
477      * Calculates the sum of squared errors for the full regression
478      * and all subsets in the following manner: <pre>
479      * rss[] ={
480      * ResidualSumOfSquares_allNvars,
481      * ResidualSumOfSquares_FirstNvars-1,
482      * ResidualSumOfSquares_FirstNvars-2,
483      * ..., ResidualSumOfSquares_FirstVariable} </pre>
484      */
485     private void ss() {
486         double total = sserr;
487         rss[nvars - 1] = sserr;
488         for (int i = nvars - 1; i > 0; i--) {
489             total += d[i] * rhs[i] * rhs[i];
490             rss[i - 1] = total;
491         }
492         rss_set = true;
493         return;
494     }
495 
496     /**
497      * Calculates the cov matrix assuming only the first nreq variables are
498      * included in the calculation. The returned array contains a symmetric
499      * matrix stored in lower triangular form. The matrix will have
500      * ( nreq + 1 ) * nreq / 2 elements. For illustration <pre>
501      * cov =
502      * {
503      *  cov_00,
504      *  cov_10, cov_11,
505      *  cov_20, cov_21, cov22,
506      *  ...
507      * } </pre>
508      *
509      * @param nreq how many of the regressors to include (either in canonical
510      * order, or in the current reordered state)
511      * @return an array with the variance covariance of the included
512      * regressors in lower triangular form
513      */
514     private double[] cov(int nreq) {
515         if (this.nobs <= nreq) {
516             return null;
517         }
518         double rnk = 0.0;
519         for (int i = 0; i < nreq; i++) {
520             if (!this.lindep[i]) {
521                 rnk += 1.0;
522             }
523         }
524         double var = rss[nreq - 1] / (nobs - rnk);
525         double[] rinv = new double[nreq * (nreq - 1) / 2];
526         inverse(rinv, nreq);
527         double[] covmat = new double[nreq * (nreq + 1) / 2];
528         Arrays.fill(covmat, Double.NaN);
529         int pos2;
530         int pos1;
531         int start = 0;
532         double total = 0;
533         for (int row = 0; row < nreq; row++) {
534             pos2 = start;
535             if (!this.lindep[row]) {
536                 for (int col = row; col < nreq; col++) {
537                     if (!this.lindep[col]) {
538                         pos1 = start + col - row;
539                         if (row == col) {
540                             total = 1.0 / d[col];
541                         } else {
542                             total = rinv[pos1 - 1] / d[col];
543                         }
544                         for (int k = col + 1; k < nreq; k++) {
545                             if (!this.lindep[k]) {
546                                 total += rinv[pos1] * rinv[pos2] / d[k];
547                             }
548                             ++pos1;
549                             ++pos2;
550                         }
551                         covmat[ (col + 1) * col / 2 + row] = total * var;
552                     } else {
553                         pos2 += nreq - col - 1;
554                     }
555                 }
556             }
557             start += nreq - row - 1;
558         }
559         return covmat;
560     }
561 
562     /**
563      * This internal method calculates the inverse of the upper-triangular portion
564      * of the R matrix.
565      * @param rinv  the storage for the inverse of r
566      * @param nreq how many of the regressors to include (either in canonical
567      * order, or in the current reordered state)
568      */
569     private void inverse(double[] rinv, int nreq) {
570         int pos = nreq * (nreq - 1) / 2 - 1;
571         int pos1 = -1;
572         int pos2 = -1;
573         double total = 0.0;
574         int start;
575         Arrays.fill(rinv, Double.NaN);
576         for (int row = nreq - 1; row > 0; --row) {
577             if (!this.lindep[row]) {
578                 start = (row - 1) * (nvars + nvars - row) / 2;
579                 for (int col = nreq; col > row; --col) {
580                     pos1 = start;
581                     pos2 = pos;
582                     total = 0.0;
583                     for (int k = row; k < col - 1; k++) {
584                         pos2 += nreq - k - 1;
585                         if (!this.lindep[k]) {
586                             total += -r[pos1] * rinv[pos2];
587                         }
588                         ++pos1;
589                     }
590                     rinv[pos] = total - r[pos1];
591                     --pos;
592                 }
593             } else {
594                 pos -= nreq - row;
595             }
596         }
597         return;
598     }
599 
600     /**
601      * <p>In the original algorithm only the partial correlations of the regressors
602      * is returned to the user. In this implementation, we have <pre>
603      * corr =
604      * {
605      *   corrxx - lower triangular
606      *   corrxy - bottom row of the matrix
607      * }
608      * Replaces subroutines PCORR and COR of:
609      * ALGORITHM AS274  APPL. STATIST. (1992) VOL.41, NO. 2 </pre></p>
610      *
611      * <p>Calculate partial correlations after the variables in rows
612      * 1, 2, ..., IN have been forced into the regression.
613      * If IN = 1, and the first row of R represents a constant in the
614      * model, then the usual simple correlations are returned.</p>
615      *
616      * <p>If IN = 0, the value returned in array CORMAT for the correlation
617      * of variables Xi & Xj is: <pre>
618      * sum ( Xi.Xj ) / Sqrt ( sum (Xi^2) . sum (Xj^2) )</pre></p>
619      *
620      * <p>On return, array CORMAT contains the upper triangle of the matrix of
621      * partial correlations stored by rows, excluding the 1's on the diagonal.
622      * e.g. if IN = 2, the consecutive elements returned are:
623      * (3,4) (3,5) ... (3,ncol), (4,5) (4,6) ... (4,ncol), etc.
624      * Array YCORR stores the partial correlations with the Y-variable
625      * starting with YCORR(IN+1) = partial correlation with the variable in
626      * position (IN+1). </p>
627      *
628      * @param in how many of the regressors to include (either in canonical
629      * order, or in the current reordered state)
630      * @return an array with the partial correlations of the remainder of
631      * regressors with each other and the regressand, in lower triangular form
632      */
633     public double[] getPartialCorrelations(int in) {
634         double[] output = new double[(nvars - in + 1) * (nvars - in) / 2];
635         int base_pos;
636         int pos;
637         int pos1;
638         int pos2;
639         int rms_off = -in;
640         int wrk_off = -(in + 1);
641         double[] rms = new double[nvars - in];
642         double[] work = new double[nvars - in - 1];
643         double sumxx;
644         double sumxy;
645         double sumyy;
646         int offXX = (nvars - in) * (nvars - in - 1) / 2;
647         if (in < -1 || in >= nvars) {
648             return null;
649         }
650         int nvm = nvars - 1;
651         base_pos = r.length - (nvm - in) * (nvm - in + 1) / 2;
652         if (d[in] > 0.0) {
653             rms[in + rms_off] = 1.0 / Math.sqrt(d[in]);
654         }
655         for (int col = in + 1; col < nvars; col++) {
656             pos = base_pos + col - 1 - in;
657             sumxx = d[col];
658             for (int row = in; row < col; row++) {
659                 sumxx += d[row] * r[pos] * r[pos];
660                 pos += nvars - row - 2;
661             }
662             if (sumxx > 0.0) {
663                 rms[col + rms_off] = 1.0 / Math.sqrt(sumxx);
664             } else {
665                 rms[col + rms_off] = 0.0;
666             }
667         }
668         sumyy = sserr;
669         for (int row = in; row < nvars; row++) {
670             sumyy += d[row] * rhs[row] * rhs[row];
671         }
672         if (sumyy > 0.0) {
673             sumyy = 1.0 / Math.sqrt(sumyy);
674         }
675         pos = 0;
676         for (int col1 = in; col1 < nvars; col1++) {
677             sumxy = 0.0;
678             Arrays.fill(work, 0.0);
679             pos1 = base_pos + col1 - in - 1;
680             for (int row = in; row < col1; row++) {
681                 pos2 = pos1 + 1;
682                 for (int col2 = col1 + 1; col2 < nvars; col2++) {
683                     work[col2 + wrk_off] += d[row] * r[pos1] * r[pos2];
684                     pos2++;
685                 }
686                 sumxy += d[row] * r[pos1] * rhs[row];
687                 pos1 += nvars - row - 2;
688             }
689             pos2 = pos1 + 1;
690             for (int col2 = col1 + 1; col2 < nvars; col2++) {
691                 work[col2 + wrk_off] += d[col1] * r[pos2];
692                 ++pos2;
693                 output[ (col2 - 1 - in) * (col2 - in) / 2 + col1 - in] =
694                         work[col2 + wrk_off] * rms[col1 + rms_off] * rms[col2 + rms_off];
695                 ++pos;
696             }
697             sumxy += d[col1] * rhs[col1];
698             output[col1 + rms_off + offXX] = sumxy * rms[col1 + rms_off] * sumyy;
699         }
700 
701         return output;
702     }
703 
704     /**
705      * ALGORITHM AS274 APPL. STATIST. (1992) VOL.41, NO. 2.
706      * Move variable from position FROM to position TO in an
707      * orthogonal reduction produced by AS75.1.
708      *
709      * @param from initial position
710      * @param to destination
711      */
712     private void vmove(int from, int to) {
713         double d1;
714         double d2;
715         double X;
716         double d1new;
717         double d2new;
718         double cbar;
719         double sbar;
720         double Y;
721         int first;
722         int inc;
723         int m1;
724         int m2;
725         int mp1;
726         int pos;
727         boolean bSkipTo40 = false;
728         if (from == to) {
729             return;
730         }
731         if (!this.rss_set) {
732             ss();
733         }
734         int count = 0;
735         if (from < to) {
736             first = from;
737             inc = 1;
738             count = to - from;
739         } else {
740             first = from - 1;
741             inc = -1;
742             count = from - to;
743         }
744 
745         int m = first;
746         int idx = 0;
747         while (idx < count) {
748             m1 = m * (nvars + nvars - m - 1) / 2;
749             m2 = m1 + nvars - m - 1;
750             mp1 = m + 1;
751 
752             d1 = d[m];
753             d2 = d[mp1];
754             // Special cases.
755             if (d1 > this.epsilon || d2 > this.epsilon) {
756                 X = r[m1];
757                 if (Math.abs(X) * Math.sqrt(d1) < tol[mp1]) {
758                     X = 0.0;
759                 }
760                 if (d1 < this.epsilon || Math.abs(X) < this.epsilon) {
761                     d[m] = d2;
762                     d[mp1] = d1;
763                     r[m1] = 0.0;
764                     for (int col = m + 2; col < nvars; col++) {
765                         ++m1;
766                         X = r[m1];
767                         r[m1] = r[m2];
768                         r[m2] = X;
769                         ++m2;
770                     }
771                     X = rhs[m];
772                     rhs[m] = rhs[mp1];
773                     rhs[mp1] = X;
774                     bSkipTo40 = true;
775                     //break;
776                 } else if (d2 < this.epsilon) {
777                     d[m] = d1 * X * X;
778                     r[m1] = 1.0 / X;
779                     for (int _i = m1 + 1; _i < m1 + nvars - m - 1; _i++) {
780                         r[_i] /= X;
781                     }
782                     rhs[m] = rhs[m] / X;
783                     bSkipTo40 = true;
784                     //break;
785                 }
786                 if (!bSkipTo40) {
787                     d1new = d2 + d1 * X * X;
788                     cbar = d2 / d1new;
789                     sbar = X * d1 / d1new;
790                     d2new = d1 * cbar;
791                     d[m] = d1new;
792                     d[mp1] = d2new;
793                     r[m1] = sbar;
794                     for (int col = m + 2; col < nvars; col++) {
795                         ++m1;
796                         Y = r[m1];
797                         r[m1] = cbar * r[m2] + sbar * Y;
798                         r[m2] = Y - X * r[m2];
799                         ++m2;
800                     }
801                     Y = rhs[m];
802                     rhs[m] = cbar * rhs[mp1] + sbar * Y;
803                     rhs[mp1] = Y - X * rhs[mp1];
804                 }
805             }
806             if (m > 0) {
807                 pos = m;
808                 for (int row = 0; row < m; row++) {
809                     X = r[pos];
810                     r[pos] = r[pos - 1];
811                     r[pos - 1] = X;
812                     pos += nvars - row - 2;
813                 }
814             }
815             // Adjust variable order (VORDER), the tolerances (TOL) and
816             // the vector of residual sums of squares (RSS).
817             m1 = vorder[m];
818             vorder[m] = vorder[mp1];
819             vorder[mp1] = m1;
820             X = tol[m];
821             tol[m] = tol[mp1];
822             tol[mp1] = X;
823             rss[m] = rss[mp1] + d[mp1] * rhs[mp1] * rhs[mp1];
824 
825             m += inc;
826             ++idx;
827         }
828     }
829 
830     /**
831      * <p>ALGORITHM AS274  APPL. STATIST. (1992) VOL.41, NO. 2</p>
832      *
833      * <p> Re-order the variables in an orthogonal reduction produced by
834      * AS75.1 so that the N variables in LIST start at position POS1,
835      * though will not necessarily be in the same order as in LIST.
836      * Any variables in VORDER before position POS1 are not moved.
837      * Auxiliary routine called: VMOVE. </p>
838      *
839      * <p>This internal method reorders the regressors.</p>
840      *
841      * @param list the regressors to move
842      * @param pos1 where the list will be placed
843      * @return -1 error, 0 everything ok
844      */
845     private int reorderRegressors(int[] list, int pos1) {
846         int next;
847         int i;
848         int l;
849         if (list.length < 1 || list.length > nvars + 1 - pos1) {
850             return -1;
851         }
852         next = pos1;
853         i = pos1;
854         while (i < nvars) {
855             l = vorder[i];
856             for (int j = 0; j < list.length; j++) {
857                 if (l == list[j]) {
858                     if (i > next) {
859                         this.vmove(i, next);
860                         ++next;
861                         if (next >= list.length + pos1) {
862                             return 0;
863                         } else {
864                             break;
865                         }
866                     }
867                 }
868             }
869             ++i;
870         }
871         return 0;
872     }
873 
874     /**
875      * Gets the diagonal of the Hat matrix also known as the leverage matrix.
876      *
877      * @param  row_data returns the diagonal of the hat matrix for this observation
878      * @return the diagonal element of the hatmatrix
879      */
880     public double getDiagonalOfHatMatrix(double[] row_data) {
881         double[] wk = new double[this.nvars];
882         int pos;
883         double total;
884 
885         if (row_data.length > nvars) {
886             return Double.NaN;
887         }
888         double[] xrow;
889         if (this.hasIntercept) {
890             xrow = new double[row_data.length + 1];
891             xrow[0] = 1.0;
892             System.arraycopy(row_data, 0, xrow, 1, row_data.length);
893         } else {
894             xrow = row_data;
895         }
896         double hii = 0.0;
897         for (int col = 0; col < xrow.length; col++) {
898             if (Math.sqrt(d[col]) < tol[col]) {
899                 wk[col] = 0.0;
900             } else {
901                 pos = col - 1;
902                 total = xrow[col];
903                 for (int row = 0; row < col; row++) {
904                     total = smartAdd(total, -wk[row] * r[pos]);
905                     pos += nvars - row - 2;
906                 }
907                 wk[col] = total;
908                 hii = smartAdd(hii, (total * total) / d[col]);
909             }
910         }
911         return hii;
912     }
913 
914     /**
915      * Gets the order of the regressors, useful if some type of reordering
916      * has been called. Calling regress with int[]{} args will trigger
917      * a reordering.
918      *
919      * @return int[] with the current order of the regressors
920      */
921     public int[] getOrderOfRegressors(){
922         return MathArrays.copyOf(vorder);
923     }
924 
925     /**
926      * Conducts a regression on the data in the model, using all regressors.
927      *
928      * @return RegressionResults the structure holding all regression results
929      * @exception  ModelSpecificationException - thrown if number of observations is
930      * less than the number of variables
931      */
932     public RegressionResults regress() throws ModelSpecificationException {
933         return regress(this.nvars);
934     }
935 
936     /**
937      * Conducts a regression on the data in the model, using a subset of regressors.
938      *
939      * @param numberOfRegressors many of the regressors to include (either in canonical
940      * order, or in the current reordered state)
941      * @return RegressionResults the structure holding all regression results
942      * @exception  ModelSpecificationException - thrown if number of observations is
943      * less than the number of variables or number of regressors requested
944      * is greater than the regressors in the model
945      */
946     public RegressionResults regress(int numberOfRegressors) throws ModelSpecificationException {
947         if (this.nobs <= numberOfRegressors) {
948            throw new ModelSpecificationException(
949                    LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
950                    this.nobs, numberOfRegressors);
951         }
952         if( numberOfRegressors > this.nvars ){
953             throw new ModelSpecificationException(
954                     LocalizedFormats.TOO_MANY_REGRESSORS, numberOfRegressors, this.nvars);
955         }
956         this.tolset();
957 
958         this.singcheck();
959 
960         double[] beta = this.regcf(numberOfRegressors);
961 
962         this.ss();
963 
964         double[] cov = this.cov(numberOfRegressors);
965 
966         int rnk = 0;
967         for (int i = 0; i < this.lindep.length; i++) {
968             if (!this.lindep[i]) {
969                 ++rnk;
970             }
971         }
972 
973         boolean needsReorder = false;
974         for (int i = 0; i < numberOfRegressors; i++) {
975             if (this.vorder[i] != i) {
976                 needsReorder = true;
977                 break;
978             }
979         }
980         if (!needsReorder) {
981             return new RegressionResults(
982                     beta, new double[][]{cov}, true, this.nobs, rnk,
983                     this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
984         } else {
985             double[] betaNew = new double[beta.length];
986             double[] covNew = new double[cov.length];
987 
988             int[] newIndices = new int[beta.length];
989             for (int i = 0; i < nvars; i++) {
990                 for (int j = 0; j < numberOfRegressors; j++) {
991                     if (this.vorder[j] == i) {
992                         betaNew[i] = beta[ j];
993                         newIndices[i] = j;
994                     }
995                 }
996             }
997 
998             int idx1 = 0;
999             int idx2;
1000             int _i;
1001             int _j;
1002             for (int i = 0; i < beta.length; i++) {
1003                 _i = newIndices[i];
1004                 for (int j = 0; j <= i; j++, idx1++) {
1005                     _j = newIndices[j];
1006                     if (_i > _j) {
1007                         idx2 = _i * (_i + 1) / 2 + _j;
1008                     } else {
1009                         idx2 = _j * (_j + 1) / 2 + _i;
1010                     }
1011                     covNew[idx1] = cov[idx2];
1012                 }
1013             }
1014             return new RegressionResults(
1015                     betaNew, new double[][]{covNew}, true, this.nobs, rnk,
1016                     this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
1017         }
1018     }
1019 
1020     /**
1021      * Conducts a regression on the data in the model, using regressors in array
1022      * Calling this method will change the internal order of the regressors
1023      * and care is required in interpreting the hatmatrix.
1024      *
1025      * @param  variablesToInclude array of variables to include in regression
1026      * @return RegressionResults the structure holding all regression results
1027      * @exception  ModelSpecificationException - thrown if number of observations is
1028      * less than the number of variables, the number of regressors requested
1029      * is greater than the regressors in the model or a regressor index in
1030      * regressor array does not exist
1031      */
1032     public RegressionResults regress(int[] variablesToInclude) throws ModelSpecificationException {
1033         if (variablesToInclude.length > this.nvars) {
1034             throw new ModelSpecificationException(
1035                     LocalizedFormats.TOO_MANY_REGRESSORS, variablesToInclude.length, this.nvars);
1036         }
1037         if (this.nobs <= this.nvars) {
1038             throw new ModelSpecificationException(
1039                     LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
1040                     this.nobs, this.nvars);
1041         }
1042         Arrays.sort(variablesToInclude);
1043         int iExclude = 0;
1044         for (int i = 0; i < variablesToInclude.length; i++) {
1045             if (i >= this.nvars) {
1046                 throw new ModelSpecificationException(
1047                         LocalizedFormats.INDEX_LARGER_THAN_MAX, i, this.nvars);
1048             }
1049             if (i > 0 && variablesToInclude[i] == variablesToInclude[i - 1]) {
1050                 variablesToInclude[i] = -1;
1051                 ++iExclude;
1052             }
1053         }
1054         int[] series;
1055         if (iExclude > 0) {
1056             int j = 0;
1057             series = new int[variablesToInclude.length - iExclude];
1058             for (int i = 0; i < variablesToInclude.length; i++) {
1059                 if (variablesToInclude[i] > -1) {
1060                     series[j] = variablesToInclude[i];
1061                     ++j;
1062                 }
1063             }
1064         } else {
1065             series = variablesToInclude;
1066         }
1067 
1068         this.reorderRegressors(series, 0);
1069 
1070         this.tolset();
1071 
1072         this.singcheck();
1073 
1074         double[] beta = this.regcf(series.length);
1075 
1076         this.ss();
1077 
1078         double[] cov = this.cov(series.length);
1079 
1080         int rnk = 0;
1081         for (int i = 0; i < this.lindep.length; i++) {
1082             if (!this.lindep[i]) {
1083                 ++rnk;
1084             }
1085         }
1086 
1087         boolean needsReorder = false;
1088         for (int i = 0; i < this.nvars; i++) {
1089             if (this.vorder[i] != series[i]) {
1090                 needsReorder = true;
1091                 break;
1092             }
1093         }
1094         if (!needsReorder) {
1095             return new RegressionResults(
1096                     beta, new double[][]{cov}, true, this.nobs, rnk,
1097                     this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
1098         } else {
1099             double[] betaNew = new double[beta.length];
1100             int[] newIndices = new int[beta.length];
1101             for (int i = 0; i < series.length; i++) {
1102                 for (int j = 0; j < this.vorder.length; j++) {
1103                     if (this.vorder[j] == series[i]) {
1104                         betaNew[i] = beta[ j];
1105                         newIndices[i] = j;
1106                     }
1107                 }
1108             }
1109             double[] covNew = new double[cov.length];
1110             int idx1 = 0;
1111             int idx2;
1112             int _i;
1113             int _j;
1114             for (int i = 0; i < beta.length; i++) {
1115                 _i = newIndices[i];
1116                 for (int j = 0; j <= i; j++, idx1++) {
1117                     _j = newIndices[j];
1118                     if (_i > _j) {
1119                         idx2 = _i * (_i + 1) / 2 + _j;
1120                     } else {
1121                         idx2 = _j * (_j + 1) / 2 + _i;
1122                     }
1123                     covNew[idx1] = cov[idx2];
1124                 }
1125             }
1126             return new RegressionResults(
1127                     betaNew, new double[][]{covNew}, true, this.nobs, rnk,
1128                     this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
1129         }
1130     }
1131 }