1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.math.stat.regression;
18
19 import java.util.Arrays;
20 import org.apache.commons.math.exception.util.LocalizedFormats;
21 import org.apache.commons.math.util.FastMath;
22 import org.apache.commons.math.util.MathUtils;
23 import org.apache.commons.math.util.MathArrays;
24
25 /**
26 * <p>This class is a concrete implementation of the {@link UpdatingMultipleLinearRegression} interface.</p>
27 *
28 * <p>The algorithm is described in: <pre>
29 * Algorithm AS 274: Least Squares Routines to Supplement Those of Gentleman
30 * Author(s): Alan J. Miller
31 * Source: Journal of the Royal Statistical Society.
32 * Series C (Applied Statistics), Vol. 41, No. 2
33 * (1992), pp. 458-478
34 * Published by: Blackwell Publishing for the Royal Statistical Society
35 * Stable URL: http://www.jstor.org/stable/2347583 </pre></p>
36 *
37 * <p>This method for multiple regression forms the solution to the OLS problem
38 * by updating the QR decomposition as described by Gentleman.</p>
39 *
40 * @version $Id: MillerUpdatingRegression.java 1182134 2011-10-11 22:55:08Z erans $
41 * @since 3.0
42 */
43 public class MillerUpdatingRegression implements UpdatingMultipleLinearRegression {
44
45 /** number of variables in regression */
46 private final int nvars;
47 /** diagonals of cross products matrix */
48 private final double[] d;
49 /** the elements of the R`Y */
50 private final double[] rhs;
51 /** the off diagonal portion of the R matrix */
52 private final double[] r;
53 /** the tolerance for each of the variables */
54 private final double[] tol;
55 /** residual sum of squares for all nested regressions */
56 private final double[] rss;
57 /** order of the regressors */
58 private final int[] vorder;
59 /** scratch space for tolerance calc */
60 private final double[] work_tolset;
61 /** number of observations entered */
62 private long nobs = 0;
63 /** sum of squared errors of largest regression */
64 private double sserr = 0.0;
65 /** has rss been called? */
66 private boolean rss_set = false;
67 /** has the tolerance setting method been called */
68 private boolean tol_set = false;
69 /** flags for variables with linear dependency problems */
70 private final boolean[] lindep;
71 /** singular x values */
72 private final double[] x_sing;
73 /** workspace for singularity method */
74 private final double[] work_sing;
75 /** summation of Y variable */
76 private double sumy = 0.0;
77 /** summation of squared Y values */
78 private double sumsqy = 0.0;
79 /** boolean flag whether a regression constant is added */
80 private boolean hasIntercept;
81 /** zero tolerance */
82 private final double epsilon;
83 /**
84 * Set the default constructor to private access
85 * to prevent inadvertent instantiation
86 */
87 @SuppressWarnings("unused")
88 private MillerUpdatingRegression() {
89 this.d = null;
90 this.hasIntercept = false;
91 this.lindep = null;
92 this.nobs = -1;
93 this.nvars = -1;
94 this.r = null;
95 this.rhs = null;
96 this.rss = null;
97 this.rss_set = false;
98 this.sserr = Double.NaN;
99 this.sumsqy = Double.NaN;
100 this.sumy = Double.NaN;
101 this.tol = null;
102 this.tol_set = false;
103 this.vorder = null;
104 this.work_sing = null;
105 this.work_tolset = null;
106 this.x_sing = null;
107 this.epsilon = Double.NaN;
108 }
109
110 /**
111 * This is the augmented constructor for the MillerUpdatingRegression class
112 *
113 * @param numberOfVariables number of regressors to expect, not including constant
114 * @param includeConstant include a constant automatically
115 * @param errorTolerance zero tolerance, how machine zero is determined
116 */
117 public MillerUpdatingRegression(int numberOfVariables, boolean includeConstant, double errorTolerance) {
118 if (numberOfVariables < 1) {
119 throw new ModelSpecificationException(LocalizedFormats.NO_REGRESSORS);
120 }
121 if (includeConstant) {
122 this.nvars = numberOfVariables + 1;
123 } else {
124 this.nvars = numberOfVariables;
125 }
126 this.hasIntercept = includeConstant;
127 this.nobs = 0;
128 this.d = new double[this.nvars];
129 this.rhs = new double[this.nvars];
130 this.r = new double[this.nvars * (this.nvars - 1) / 2];
131 this.tol = new double[this.nvars];
132 this.rss = new double[this.nvars];
133 this.vorder = new int[this.nvars];
134 this.x_sing = new double[this.nvars];
135 this.work_sing = new double[this.nvars];
136 this.work_tolset = new double[this.nvars];
137 this.lindep = new boolean[this.nvars];
138 for (int i = 0; i < this.nvars; i++) {
139 vorder[i] = i;
140 }
141 if (errorTolerance > 0) {
142 this.epsilon = errorTolerance;
143 } else {
144 this.epsilon = -errorTolerance;
145 }
146 return;
147 }
148
149 /**
150 * Primary constructor for the MillerUpdatingRegression
151 *
152 * @param numberOfVariables maximum number of potential regressors
153 * @param includeConstant include a constant automatically
154 */
155 public MillerUpdatingRegression(int numberOfVariables, boolean includeConstant) {
156 this(numberOfVariables, includeConstant, MathUtils.EPSILON);
157 }
158
159 /**
160 * A getter method which determines whether a constant is included
161 * @return true regression has an intercept, false no intercept
162 */
163 public boolean hasIntercept() {
164 return this.hasIntercept;
165 }
166
167 /**
168 * Gets the number of observations added to the regression model
169 * @return number of observations
170 */
171 public long getN() {
172 return this.nobs;
173 }
174
175 /**
176 * Adds an observation to the regression model
177 * @param x the array with regressor values
178 * @param y the value of dependent variable given these regressors
179 * @exception ModelSpecificationException if the length of {@code x} does not equal
180 * the number of independent variables in the model
181 */
182 public void addObservation(final double[] x, final double y) {
183
184 if ((!this.hasIntercept && x.length != nvars) ||
185 (this.hasIntercept && x.length + 1 != nvars)) {
186 throw new ModelSpecificationException(LocalizedFormats.INVALID_REGRESSION_OBSERVATION,
187 x.length, nvars);
188 }
189 if (!this.hasIntercept) {
190 include(MathArrays.copyOf(x, x.length), 1.0, y);
191 } else {
192 double[] tmp = new double[x.length + 1];
193 System.arraycopy(x, 0, tmp, 1, x.length);
194 tmp[0] = 1.0;
195 include(tmp, 1.0, y);
196 }
197 ++nobs;
198 return;
199
200 }
201
202 /**
203 * Adds multiple observations to the model
204 * @param x observations on the regressors
205 * @param y observations on the regressand
206 * @throws ModelSpecificationException if {@code x} is not rectangular, does not match
207 * the length of {@code y} or does not contain sufficient data to estimate the model
208 */
209 public void addObservations(double[][] x, double[] y) {
210 if ((x == null) || (y == null) || (x.length != y.length)) {
211 throw new ModelSpecificationException(
212 LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE,
213 (x == null) ? 0 : x.length,
214 (y == null) ? 0 : y.length);
215 }
216 if (x.length == 0) { // Must be no y data either
217 throw new ModelSpecificationException(
218 LocalizedFormats.NO_DATA);
219 }
220 if (x[0].length + 1 > x.length) {
221 throw new ModelSpecificationException(
222 LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
223 x.length, x[0].length);
224 }
225 for (int i = 0; i < x.length; i++) {
226 this.addObservation(x[i], y[i]);
227 }
228 return;
229 }
230
231 /**
232 * The include method is where the QR decomposition occurs. This statement forms all
233 * intermediate data which will be used for all derivative measures.
234 * According to the miller paper, note that in the original implementation the x vector
235 * is overwritten. In this implementation, the include method is passed a copy of the
236 * original data vector so that there is no contamination of the data. Additionally,
237 * this method differs slightly from Gentleman's method, in that the assumption is
238 * of dense design matrices, there is some advantage in using the original gentleman algorithm
239 * on sparse matrices.
240 *
241 * @param x observations on the regressors
242 * @param wi weight of the this observation (-1,1)
243 * @param yi observation on the regressand
244 */
245 private void include(final double[] x, final double wi, final double yi) {
246 int nextr = 0;
247 double w = wi;
248 double y = yi;
249 double xi;
250 double di;
251 double wxi;
252 double dpi;
253 double xk;
254 double _w;
255 this.rss_set = false;
256 sumy = smartAdd(yi, sumy);
257 sumsqy = smartAdd(sumsqy, yi * yi);
258 for (int i = 0; i < x.length; i++) {
259 if (w == 0.0) {
260 return;
261 }
262 xi = x[i];
263
264 if (xi == 0.0) {
265 nextr += nvars - i - 1;
266 continue;
267 }
268 di = d[i];
269 wxi = w * xi;
270 _w = w;
271 if (di != 0.0) {
272 dpi = smartAdd(di, wxi * xi);
273 double tmp = wxi * xi / di;
274 if (FastMath.abs(tmp) > MathUtils.EPSILON) {
275 w = (di * w) / dpi;
276 }
277 } else {
278 dpi = wxi * xi;
279 w = 0.0;
280 }
281 d[i] = dpi;
282 for (int k = i + 1; k < nvars; k++) {
283 xk = x[k];
284 x[k] = smartAdd(xk, -xi * r[nextr]);
285 if (di != 0.0) {
286 r[nextr] = smartAdd(di * r[nextr], (_w * xi) * xk) / dpi;
287 } else {
288 r[nextr] = xk / xi;
289 }
290 ++nextr;
291 }
292 xk = y;
293 y = smartAdd(xk, -xi * rhs[i]);
294 if (di != 0.0) {
295 rhs[i] = smartAdd(di * rhs[i], wxi * xk) / dpi;
296 } else {
297 rhs[i] = xk / xi;
298 }
299 }
300 sserr = smartAdd(sserr, w * y * y);
301 return;
302 }
303
304 /**
305 * Adds to number a and b such that the contamination due to
306 * numerical smallness of one addend does not corrupt the sum
307 * @param a - an addend
308 * @param b - an addend
309 * @return the sum of the a and b
310 */
311 private double smartAdd(double a, double b) {
312 double _a = FastMath.abs(a);
313 double _b = FastMath.abs(b);
314 if (_a > _b) {
315 double eps = _a * MathUtils.EPSILON;
316 if (_b > eps) {
317 return a + b;
318 }
319 return a;
320 } else {
321 double eps = _b * MathUtils.EPSILON;
322 if (_a > eps) {
323 return a + b;
324 }
325 return b;
326 }
327 }
328
329 /**
330 * As the name suggests, clear wipes the internals and reorders everything in the
331 * canonical order.
332 */
333 public void clear() {
334 Arrays.fill(this.d, 0.0);
335 Arrays.fill(this.rhs, 0.0);
336 Arrays.fill(this.r, 0.0);
337 Arrays.fill(this.tol, 0.0);
338 Arrays.fill(this.rss, 0.0);
339 Arrays.fill(this.work_tolset, 0.0);
340 Arrays.fill(this.work_sing, 0.0);
341 Arrays.fill(this.x_sing, 0.0);
342 Arrays.fill(this.lindep, false);
343 for (int i = 0; i < nvars; i++) {
344 this.vorder[i] = i;
345 }
346 this.nobs = 0;
347 this.sserr = 0.0;
348 this.sumy = 0.0;
349 this.sumsqy = 0.0;
350 this.rss_set = false;
351 this.tol_set = false;
352 return;
353 }
354
355 /**
356 * This sets up tolerances for singularity testing.
357 */
358 private void tolset() {
359 int pos;
360 double total;
361 final double eps = this.epsilon;
362 for (int i = 0; i < nvars; i++) {
363 this.work_tolset[i] = Math.sqrt(d[i]);
364 }
365 tol[0] = eps * this.work_tolset[0];
366 for (int col = 1; col < nvars; col++) {
367 pos = col - 1;
368 total = work_tolset[col];
369 for (int row = 0; row < col; row++) {
370 total += Math.abs(r[pos]) * work_tolset[row];
371 pos += nvars - row - 2;
372 }
373 tol[col] = eps * total;
374 }
375 tol_set = true;
376 return;
377 }
378
379 /**
380 * The regcf method conducts the linear regression and extracts the
381 * parameter vector. Notice that the algorithm can do subset regression
382 * with no alteration.
383 *
384 * @param nreq how many of the regressors to include (either in canonical
385 * order, or in the current reordered state)
386 * @return an array with the estimated slope coefficients
387 */
388 private double[] regcf(int nreq) {
389 int nextr;
390 if (nreq < 1) {
391 throw new ModelSpecificationException(LocalizedFormats.NO_REGRESSORS);
392 }
393 if (nreq > this.nvars) {
394 throw new ModelSpecificationException(
395 LocalizedFormats.TOO_MANY_REGRESSORS, nreq, this.nvars);
396 }
397 if (!this.tol_set) {
398 tolset();
399 }
400 double[] ret = new double[nreq];
401 boolean rankProblem = false;
402 for (int i = nreq - 1; i > -1; i--) {
403 if (Math.sqrt(d[i]) < tol[i]) {
404 ret[i] = 0.0;
405 d[i] = 0.0;
406 rankProblem = true;
407 } else {
408 ret[i] = rhs[i];
409 nextr = i * (nvars + nvars - i - 1) / 2;
410 for (int j = i + 1; j < nreq; j++) {
411 ret[i] = smartAdd(ret[i], -r[nextr] * ret[j]);
412 ++nextr;
413 }
414 }
415 }
416 if (rankProblem) {
417 for (int i = 0; i < nreq; i++) {
418 if (this.lindep[i]) {
419 ret[i] = Double.NaN;
420 }
421 }
422 }
423 return ret;
424 }
425
426 /**
427 * The method which checks for singularities and then eliminates the offending
428 * columns.
429 */
430 private void singcheck() {
431 double temp;
432 double y;
433 double weight;
434 int pos;
435 for (int i = 0; i < nvars; i++) {
436 work_sing[i] = Math.sqrt(d[i]);
437 }
438 for (int col = 0; col < nvars; col++) {
439 // Set elements within R to zero if they are less than tol(col) in
440 // absolute value after being scaled by the square root of their row
441 // multiplier
442 temp = tol[col];
443 pos = col - 1;
444 for (int row = 0; row < col - 1; row++) {
445 if (Math.abs(r[pos]) * work_sing[row] < temp) {
446 r[pos] = 0.0;
447 }
448 pos += nvars - row - 2;
449 }
450 // If diagonal element is near zero, set it to zero, set appropriate
451 // element of LINDEP, and use INCLUD to augment the projections in
452 // the lower rows of the orthogonalization.
453 lindep[col] = false;
454 if (work_sing[col] < temp) {
455 lindep[col] = true;
456 if (col < nvars - 1) {
457 Arrays.fill(x_sing, 0.0);
458 int _pi = col * (nvars + nvars - col - 1) / 2;
459 for (int _xi = col + 1; _xi < nvars; _xi++, _pi++) {
460 x_sing[_xi] = r[_pi];
461 r[_pi] = 0.0;
462 }
463 y = rhs[col];
464 weight = d[col];
465 d[col] = 0.0;
466 rhs[col] = 0.0;
467 this.include(x_sing, weight, y);
468 } else {
469 sserr += d[col] * rhs[col] * rhs[col];
470 }
471 }
472 }
473 return;
474 }
475
476 /**
477 * Calculates the sum of squared errors for the full regression
478 * and all subsets in the following manner: <pre>
479 * rss[] ={
480 * ResidualSumOfSquares_allNvars,
481 * ResidualSumOfSquares_FirstNvars-1,
482 * ResidualSumOfSquares_FirstNvars-2,
483 * ..., ResidualSumOfSquares_FirstVariable} </pre>
484 */
485 private void ss() {
486 double total = sserr;
487 rss[nvars - 1] = sserr;
488 for (int i = nvars - 1; i > 0; i--) {
489 total += d[i] * rhs[i] * rhs[i];
490 rss[i - 1] = total;
491 }
492 rss_set = true;
493 return;
494 }
495
496 /**
497 * Calculates the cov matrix assuming only the first nreq variables are
498 * included in the calculation. The returned array contains a symmetric
499 * matrix stored in lower triangular form. The matrix will have
500 * ( nreq + 1 ) * nreq / 2 elements. For illustration <pre>
501 * cov =
502 * {
503 * cov_00,
504 * cov_10, cov_11,
505 * cov_20, cov_21, cov22,
506 * ...
507 * } </pre>
508 *
509 * @param nreq how many of the regressors to include (either in canonical
510 * order, or in the current reordered state)
511 * @return an array with the variance covariance of the included
512 * regressors in lower triangular form
513 */
514 private double[] cov(int nreq) {
515 if (this.nobs <= nreq) {
516 return null;
517 }
518 double rnk = 0.0;
519 for (int i = 0; i < nreq; i++) {
520 if (!this.lindep[i]) {
521 rnk += 1.0;
522 }
523 }
524 double var = rss[nreq - 1] / (nobs - rnk);
525 double[] rinv = new double[nreq * (nreq - 1) / 2];
526 inverse(rinv, nreq);
527 double[] covmat = new double[nreq * (nreq + 1) / 2];
528 Arrays.fill(covmat, Double.NaN);
529 int pos2;
530 int pos1;
531 int start = 0;
532 double total = 0;
533 for (int row = 0; row < nreq; row++) {
534 pos2 = start;
535 if (!this.lindep[row]) {
536 for (int col = row; col < nreq; col++) {
537 if (!this.lindep[col]) {
538 pos1 = start + col - row;
539 if (row == col) {
540 total = 1.0 / d[col];
541 } else {
542 total = rinv[pos1 - 1] / d[col];
543 }
544 for (int k = col + 1; k < nreq; k++) {
545 if (!this.lindep[k]) {
546 total += rinv[pos1] * rinv[pos2] / d[k];
547 }
548 ++pos1;
549 ++pos2;
550 }
551 covmat[ (col + 1) * col / 2 + row] = total * var;
552 } else {
553 pos2 += nreq - col - 1;
554 }
555 }
556 }
557 start += nreq - row - 1;
558 }
559 return covmat;
560 }
561
562 /**
563 * This internal method calculates the inverse of the upper-triangular portion
564 * of the R matrix.
565 * @param rinv the storage for the inverse of r
566 * @param nreq how many of the regressors to include (either in canonical
567 * order, or in the current reordered state)
568 */
569 private void inverse(double[] rinv, int nreq) {
570 int pos = nreq * (nreq - 1) / 2 - 1;
571 int pos1 = -1;
572 int pos2 = -1;
573 double total = 0.0;
574 int start;
575 Arrays.fill(rinv, Double.NaN);
576 for (int row = nreq - 1; row > 0; --row) {
577 if (!this.lindep[row]) {
578 start = (row - 1) * (nvars + nvars - row) / 2;
579 for (int col = nreq; col > row; --col) {
580 pos1 = start;
581 pos2 = pos;
582 total = 0.0;
583 for (int k = row; k < col - 1; k++) {
584 pos2 += nreq - k - 1;
585 if (!this.lindep[k]) {
586 total += -r[pos1] * rinv[pos2];
587 }
588 ++pos1;
589 }
590 rinv[pos] = total - r[pos1];
591 --pos;
592 }
593 } else {
594 pos -= nreq - row;
595 }
596 }
597 return;
598 }
599
600 /**
601 * <p>In the original algorithm only the partial correlations of the regressors
602 * is returned to the user. In this implementation, we have <pre>
603 * corr =
604 * {
605 * corrxx - lower triangular
606 * corrxy - bottom row of the matrix
607 * }
608 * Replaces subroutines PCORR and COR of:
609 * ALGORITHM AS274 APPL. STATIST. (1992) VOL.41, NO. 2 </pre></p>
610 *
611 * <p>Calculate partial correlations after the variables in rows
612 * 1, 2, ..., IN have been forced into the regression.
613 * If IN = 1, and the first row of R represents a constant in the
614 * model, then the usual simple correlations are returned.</p>
615 *
616 * <p>If IN = 0, the value returned in array CORMAT for the correlation
617 * of variables Xi & Xj is: <pre>
618 * sum ( Xi.Xj ) / Sqrt ( sum (Xi^2) . sum (Xj^2) )</pre></p>
619 *
620 * <p>On return, array CORMAT contains the upper triangle of the matrix of
621 * partial correlations stored by rows, excluding the 1's on the diagonal.
622 * e.g. if IN = 2, the consecutive elements returned are:
623 * (3,4) (3,5) ... (3,ncol), (4,5) (4,6) ... (4,ncol), etc.
624 * Array YCORR stores the partial correlations with the Y-variable
625 * starting with YCORR(IN+1) = partial correlation with the variable in
626 * position (IN+1). </p>
627 *
628 * @param in how many of the regressors to include (either in canonical
629 * order, or in the current reordered state)
630 * @return an array with the partial correlations of the remainder of
631 * regressors with each other and the regressand, in lower triangular form
632 */
633 public double[] getPartialCorrelations(int in) {
634 double[] output = new double[(nvars - in + 1) * (nvars - in) / 2];
635 int base_pos;
636 int pos;
637 int pos1;
638 int pos2;
639 int rms_off = -in;
640 int wrk_off = -(in + 1);
641 double[] rms = new double[nvars - in];
642 double[] work = new double[nvars - in - 1];
643 double sumxx;
644 double sumxy;
645 double sumyy;
646 int offXX = (nvars - in) * (nvars - in - 1) / 2;
647 if (in < -1 || in >= nvars) {
648 return null;
649 }
650 int nvm = nvars - 1;
651 base_pos = r.length - (nvm - in) * (nvm - in + 1) / 2;
652 if (d[in] > 0.0) {
653 rms[in + rms_off] = 1.0 / Math.sqrt(d[in]);
654 }
655 for (int col = in + 1; col < nvars; col++) {
656 pos = base_pos + col - 1 - in;
657 sumxx = d[col];
658 for (int row = in; row < col; row++) {
659 sumxx += d[row] * r[pos] * r[pos];
660 pos += nvars - row - 2;
661 }
662 if (sumxx > 0.0) {
663 rms[col + rms_off] = 1.0 / Math.sqrt(sumxx);
664 } else {
665 rms[col + rms_off] = 0.0;
666 }
667 }
668 sumyy = sserr;
669 for (int row = in; row < nvars; row++) {
670 sumyy += d[row] * rhs[row] * rhs[row];
671 }
672 if (sumyy > 0.0) {
673 sumyy = 1.0 / Math.sqrt(sumyy);
674 }
675 pos = 0;
676 for (int col1 = in; col1 < nvars; col1++) {
677 sumxy = 0.0;
678 Arrays.fill(work, 0.0);
679 pos1 = base_pos + col1 - in - 1;
680 for (int row = in; row < col1; row++) {
681 pos2 = pos1 + 1;
682 for (int col2 = col1 + 1; col2 < nvars; col2++) {
683 work[col2 + wrk_off] += d[row] * r[pos1] * r[pos2];
684 pos2++;
685 }
686 sumxy += d[row] * r[pos1] * rhs[row];
687 pos1 += nvars - row - 2;
688 }
689 pos2 = pos1 + 1;
690 for (int col2 = col1 + 1; col2 < nvars; col2++) {
691 work[col2 + wrk_off] += d[col1] * r[pos2];
692 ++pos2;
693 output[ (col2 - 1 - in) * (col2 - in) / 2 + col1 - in] =
694 work[col2 + wrk_off] * rms[col1 + rms_off] * rms[col2 + rms_off];
695 ++pos;
696 }
697 sumxy += d[col1] * rhs[col1];
698 output[col1 + rms_off + offXX] = sumxy * rms[col1 + rms_off] * sumyy;
699 }
700
701 return output;
702 }
703
704 /**
705 * ALGORITHM AS274 APPL. STATIST. (1992) VOL.41, NO. 2.
706 * Move variable from position FROM to position TO in an
707 * orthogonal reduction produced by AS75.1.
708 *
709 * @param from initial position
710 * @param to destination
711 */
712 private void vmove(int from, int to) {
713 double d1;
714 double d2;
715 double X;
716 double d1new;
717 double d2new;
718 double cbar;
719 double sbar;
720 double Y;
721 int first;
722 int inc;
723 int m1;
724 int m2;
725 int mp1;
726 int pos;
727 boolean bSkipTo40 = false;
728 if (from == to) {
729 return;
730 }
731 if (!this.rss_set) {
732 ss();
733 }
734 int count = 0;
735 if (from < to) {
736 first = from;
737 inc = 1;
738 count = to - from;
739 } else {
740 first = from - 1;
741 inc = -1;
742 count = from - to;
743 }
744
745 int m = first;
746 int idx = 0;
747 while (idx < count) {
748 m1 = m * (nvars + nvars - m - 1) / 2;
749 m2 = m1 + nvars - m - 1;
750 mp1 = m + 1;
751
752 d1 = d[m];
753 d2 = d[mp1];
754 // Special cases.
755 if (d1 > this.epsilon || d2 > this.epsilon) {
756 X = r[m1];
757 if (Math.abs(X) * Math.sqrt(d1) < tol[mp1]) {
758 X = 0.0;
759 }
760 if (d1 < this.epsilon || Math.abs(X) < this.epsilon) {
761 d[m] = d2;
762 d[mp1] = d1;
763 r[m1] = 0.0;
764 for (int col = m + 2; col < nvars; col++) {
765 ++m1;
766 X = r[m1];
767 r[m1] = r[m2];
768 r[m2] = X;
769 ++m2;
770 }
771 X = rhs[m];
772 rhs[m] = rhs[mp1];
773 rhs[mp1] = X;
774 bSkipTo40 = true;
775 //break;
776 } else if (d2 < this.epsilon) {
777 d[m] = d1 * X * X;
778 r[m1] = 1.0 / X;
779 for (int _i = m1 + 1; _i < m1 + nvars - m - 1; _i++) {
780 r[_i] /= X;
781 }
782 rhs[m] = rhs[m] / X;
783 bSkipTo40 = true;
784 //break;
785 }
786 if (!bSkipTo40) {
787 d1new = d2 + d1 * X * X;
788 cbar = d2 / d1new;
789 sbar = X * d1 / d1new;
790 d2new = d1 * cbar;
791 d[m] = d1new;
792 d[mp1] = d2new;
793 r[m1] = sbar;
794 for (int col = m + 2; col < nvars; col++) {
795 ++m1;
796 Y = r[m1];
797 r[m1] = cbar * r[m2] + sbar * Y;
798 r[m2] = Y - X * r[m2];
799 ++m2;
800 }
801 Y = rhs[m];
802 rhs[m] = cbar * rhs[mp1] + sbar * Y;
803 rhs[mp1] = Y - X * rhs[mp1];
804 }
805 }
806 if (m > 0) {
807 pos = m;
808 for (int row = 0; row < m; row++) {
809 X = r[pos];
810 r[pos] = r[pos - 1];
811 r[pos - 1] = X;
812 pos += nvars - row - 2;
813 }
814 }
815 // Adjust variable order (VORDER), the tolerances (TOL) and
816 // the vector of residual sums of squares (RSS).
817 m1 = vorder[m];
818 vorder[m] = vorder[mp1];
819 vorder[mp1] = m1;
820 X = tol[m];
821 tol[m] = tol[mp1];
822 tol[mp1] = X;
823 rss[m] = rss[mp1] + d[mp1] * rhs[mp1] * rhs[mp1];
824
825 m += inc;
826 ++idx;
827 }
828 }
829
830 /**
831 * <p>ALGORITHM AS274 APPL. STATIST. (1992) VOL.41, NO. 2</p>
832 *
833 * <p> Re-order the variables in an orthogonal reduction produced by
834 * AS75.1 so that the N variables in LIST start at position POS1,
835 * though will not necessarily be in the same order as in LIST.
836 * Any variables in VORDER before position POS1 are not moved.
837 * Auxiliary routine called: VMOVE. </p>
838 *
839 * <p>This internal method reorders the regressors.</p>
840 *
841 * @param list the regressors to move
842 * @param pos1 where the list will be placed
843 * @return -1 error, 0 everything ok
844 */
845 private int reorderRegressors(int[] list, int pos1) {
846 int next;
847 int i;
848 int l;
849 if (list.length < 1 || list.length > nvars + 1 - pos1) {
850 return -1;
851 }
852 next = pos1;
853 i = pos1;
854 while (i < nvars) {
855 l = vorder[i];
856 for (int j = 0; j < list.length; j++) {
857 if (l == list[j]) {
858 if (i > next) {
859 this.vmove(i, next);
860 ++next;
861 if (next >= list.length + pos1) {
862 return 0;
863 } else {
864 break;
865 }
866 }
867 }
868 }
869 ++i;
870 }
871 return 0;
872 }
873
874 /**
875 * Gets the diagonal of the Hat matrix also known as the leverage matrix.
876 *
877 * @param row_data returns the diagonal of the hat matrix for this observation
878 * @return the diagonal element of the hatmatrix
879 */
880 public double getDiagonalOfHatMatrix(double[] row_data) {
881 double[] wk = new double[this.nvars];
882 int pos;
883 double total;
884
885 if (row_data.length > nvars) {
886 return Double.NaN;
887 }
888 double[] xrow;
889 if (this.hasIntercept) {
890 xrow = new double[row_data.length + 1];
891 xrow[0] = 1.0;
892 System.arraycopy(row_data, 0, xrow, 1, row_data.length);
893 } else {
894 xrow = row_data;
895 }
896 double hii = 0.0;
897 for (int col = 0; col < xrow.length; col++) {
898 if (Math.sqrt(d[col]) < tol[col]) {
899 wk[col] = 0.0;
900 } else {
901 pos = col - 1;
902 total = xrow[col];
903 for (int row = 0; row < col; row++) {
904 total = smartAdd(total, -wk[row] * r[pos]);
905 pos += nvars - row - 2;
906 }
907 wk[col] = total;
908 hii = smartAdd(hii, (total * total) / d[col]);
909 }
910 }
911 return hii;
912 }
913
914 /**
915 * Gets the order of the regressors, useful if some type of reordering
916 * has been called. Calling regress with int[]{} args will trigger
917 * a reordering.
918 *
919 * @return int[] with the current order of the regressors
920 */
921 public int[] getOrderOfRegressors(){
922 return MathArrays.copyOf(vorder);
923 }
924
925 /**
926 * Conducts a regression on the data in the model, using all regressors.
927 *
928 * @return RegressionResults the structure holding all regression results
929 * @exception ModelSpecificationException - thrown if number of observations is
930 * less than the number of variables
931 */
932 public RegressionResults regress() throws ModelSpecificationException {
933 return regress(this.nvars);
934 }
935
936 /**
937 * Conducts a regression on the data in the model, using a subset of regressors.
938 *
939 * @param numberOfRegressors many of the regressors to include (either in canonical
940 * order, or in the current reordered state)
941 * @return RegressionResults the structure holding all regression results
942 * @exception ModelSpecificationException - thrown if number of observations is
943 * less than the number of variables or number of regressors requested
944 * is greater than the regressors in the model
945 */
946 public RegressionResults regress(int numberOfRegressors) throws ModelSpecificationException {
947 if (this.nobs <= numberOfRegressors) {
948 throw new ModelSpecificationException(
949 LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
950 this.nobs, numberOfRegressors);
951 }
952 if( numberOfRegressors > this.nvars ){
953 throw new ModelSpecificationException(
954 LocalizedFormats.TOO_MANY_REGRESSORS, numberOfRegressors, this.nvars);
955 }
956 this.tolset();
957
958 this.singcheck();
959
960 double[] beta = this.regcf(numberOfRegressors);
961
962 this.ss();
963
964 double[] cov = this.cov(numberOfRegressors);
965
966 int rnk = 0;
967 for (int i = 0; i < this.lindep.length; i++) {
968 if (!this.lindep[i]) {
969 ++rnk;
970 }
971 }
972
973 boolean needsReorder = false;
974 for (int i = 0; i < numberOfRegressors; i++) {
975 if (this.vorder[i] != i) {
976 needsReorder = true;
977 break;
978 }
979 }
980 if (!needsReorder) {
981 return new RegressionResults(
982 beta, new double[][]{cov}, true, this.nobs, rnk,
983 this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
984 } else {
985 double[] betaNew = new double[beta.length];
986 double[] covNew = new double[cov.length];
987
988 int[] newIndices = new int[beta.length];
989 for (int i = 0; i < nvars; i++) {
990 for (int j = 0; j < numberOfRegressors; j++) {
991 if (this.vorder[j] == i) {
992 betaNew[i] = beta[ j];
993 newIndices[i] = j;
994 }
995 }
996 }
997
998 int idx1 = 0;
999 int idx2;
1000 int _i;
1001 int _j;
1002 for (int i = 0; i < beta.length; i++) {
1003 _i = newIndices[i];
1004 for (int j = 0; j <= i; j++, idx1++) {
1005 _j = newIndices[j];
1006 if (_i > _j) {
1007 idx2 = _i * (_i + 1) / 2 + _j;
1008 } else {
1009 idx2 = _j * (_j + 1) / 2 + _i;
1010 }
1011 covNew[idx1] = cov[idx2];
1012 }
1013 }
1014 return new RegressionResults(
1015 betaNew, new double[][]{covNew}, true, this.nobs, rnk,
1016 this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
1017 }
1018 }
1019
1020 /**
1021 * Conducts a regression on the data in the model, using regressors in array
1022 * Calling this method will change the internal order of the regressors
1023 * and care is required in interpreting the hatmatrix.
1024 *
1025 * @param variablesToInclude array of variables to include in regression
1026 * @return RegressionResults the structure holding all regression results
1027 * @exception ModelSpecificationException - thrown if number of observations is
1028 * less than the number of variables, the number of regressors requested
1029 * is greater than the regressors in the model or a regressor index in
1030 * regressor array does not exist
1031 */
1032 public RegressionResults regress(int[] variablesToInclude) throws ModelSpecificationException {
1033 if (variablesToInclude.length > this.nvars) {
1034 throw new ModelSpecificationException(
1035 LocalizedFormats.TOO_MANY_REGRESSORS, variablesToInclude.length, this.nvars);
1036 }
1037 if (this.nobs <= this.nvars) {
1038 throw new ModelSpecificationException(
1039 LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
1040 this.nobs, this.nvars);
1041 }
1042 Arrays.sort(variablesToInclude);
1043 int iExclude = 0;
1044 for (int i = 0; i < variablesToInclude.length; i++) {
1045 if (i >= this.nvars) {
1046 throw new ModelSpecificationException(
1047 LocalizedFormats.INDEX_LARGER_THAN_MAX, i, this.nvars);
1048 }
1049 if (i > 0 && variablesToInclude[i] == variablesToInclude[i - 1]) {
1050 variablesToInclude[i] = -1;
1051 ++iExclude;
1052 }
1053 }
1054 int[] series;
1055 if (iExclude > 0) {
1056 int j = 0;
1057 series = new int[variablesToInclude.length - iExclude];
1058 for (int i = 0; i < variablesToInclude.length; i++) {
1059 if (variablesToInclude[i] > -1) {
1060 series[j] = variablesToInclude[i];
1061 ++j;
1062 }
1063 }
1064 } else {
1065 series = variablesToInclude;
1066 }
1067
1068 this.reorderRegressors(series, 0);
1069
1070 this.tolset();
1071
1072 this.singcheck();
1073
1074 double[] beta = this.regcf(series.length);
1075
1076 this.ss();
1077
1078 double[] cov = this.cov(series.length);
1079
1080 int rnk = 0;
1081 for (int i = 0; i < this.lindep.length; i++) {
1082 if (!this.lindep[i]) {
1083 ++rnk;
1084 }
1085 }
1086
1087 boolean needsReorder = false;
1088 for (int i = 0; i < this.nvars; i++) {
1089 if (this.vorder[i] != series[i]) {
1090 needsReorder = true;
1091 break;
1092 }
1093 }
1094 if (!needsReorder) {
1095 return new RegressionResults(
1096 beta, new double[][]{cov}, true, this.nobs, rnk,
1097 this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
1098 } else {
1099 double[] betaNew = new double[beta.length];
1100 int[] newIndices = new int[beta.length];
1101 for (int i = 0; i < series.length; i++) {
1102 for (int j = 0; j < this.vorder.length; j++) {
1103 if (this.vorder[j] == series[i]) {
1104 betaNew[i] = beta[ j];
1105 newIndices[i] = j;
1106 }
1107 }
1108 }
1109 double[] covNew = new double[cov.length];
1110 int idx1 = 0;
1111 int idx2;
1112 int _i;
1113 int _j;
1114 for (int i = 0; i < beta.length; i++) {
1115 _i = newIndices[i];
1116 for (int j = 0; j <= i; j++, idx1++) {
1117 _j = newIndices[j];
1118 if (_i > _j) {
1119 idx2 = _i * (_i + 1) / 2 + _j;
1120 } else {
1121 idx2 = _j * (_j + 1) / 2 + _i;
1122 }
1123 covNew[idx1] = cov[idx2];
1124 }
1125 }
1126 return new RegressionResults(
1127 betaNew, new double[][]{covNew}, true, this.nobs, rnk,
1128 this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
1129 }
1130 }
1131 }