1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.math4.legacy.stat.regression;
18
19 import java.util.Arrays;
20
21 import org.apache.commons.math4.legacy.exception.util.LocalizedFormats;
22 import org.apache.commons.math4.core.jdkmath.JdkMath;
23 import org.apache.commons.numbers.core.Precision;
24
25 /**
26 * This class is a concrete implementation of the {@link UpdatingMultipleLinearRegression} interface.
27 *
28 * <p>The algorithm is described in: <pre>
29 * Algorithm AS 274: Least Squares Routines to Supplement Those of Gentleman
30 * Author(s): Alan J. Miller
31 * Source: Journal of the Royal Statistical Society.
32 * Series C (Applied Statistics), Vol. 41, No. 2
33 * (1992), pp. 458-478
34 * Published by: Blackwell Publishing for the Royal Statistical Society
35 * Stable URL: http://www.jstor.org/stable/2347583 </pre>
36 *
37 * <p>This method for multiple regression forms the solution to the OLS problem
38 * by updating the QR decomposition as described by Gentleman.</p>
39 *
40 * @since 3.0
41 */
42 public class MillerUpdatingRegression implements UpdatingMultipleLinearRegression {
43
44 /** number of variables in regression. */
45 private final int nvars;
46 /** diagonals of cross products matrix. */
47 private final double[] d;
48 /** the elements of the R`Y. */
49 private final double[] rhs;
50 /** the off diagonal portion of the R matrix. */
51 private final double[] r;
52 /** the tolerance for each of the variables. */
53 private final double[] tol;
54 /** residual sum of squares for all nested regressions. */
55 private final double[] rss;
56 /** order of the regressors. */
57 private final int[] vorder;
58 /** scratch space for tolerance calc. */
59 private final double[] workTolset;
60 /** number of observations entered. */
61 private long nobs;
62 /** sum of squared errors of largest regression. */
63 private double sserr;
64 /** has rss been called? */
65 private boolean rssSet;
66 /** has the tolerance setting method been called. */
67 private boolean tolSet;
68 /** flags for variables with linear dependency problems. */
69 private final boolean[] lindep;
70 /** singular x values. */
71 private final double[] xSing;
72 /** workspace for singularity method. */
73 private final double[] workSing;
74 /** summation of Y variable. */
75 private double sumy;
76 /** summation of squared Y values. */
77 private double sumsqy;
78 /** boolean flag whether a regression constant is added. */
79 private final boolean hasIntercept;
80 /** zero tolerance. */
81 private final double epsilon;
82 /**
83 * Set the default constructor to private access.
84 * to prevent inadvertent instantiation
85 */
86 @SuppressWarnings("unused")
87 private MillerUpdatingRegression() {
88 this(-1, false, Double.NaN);
89 }
90
91 /**
92 * This is the augmented constructor for the MillerUpdatingRegression class.
93 *
94 * @param numberOfVariables number of regressors to expect, not including constant
95 * @param includeConstant include a constant automatically
96 * @param errorTolerance zero tolerance, how machine zero is determined
97 * @throws ModelSpecificationException if {@code numberOfVariables is less than 1}
98 */
99 public MillerUpdatingRegression(int numberOfVariables, boolean includeConstant, double errorTolerance)
100 throws ModelSpecificationException {
101 if (numberOfVariables < 1) {
102 throw new ModelSpecificationException(LocalizedFormats.NO_REGRESSORS);
103 }
104 if (includeConstant) {
105 this.nvars = numberOfVariables + 1;
106 } else {
107 this.nvars = numberOfVariables;
108 }
109 this.hasIntercept = includeConstant;
110 this.nobs = 0;
111 this.d = new double[this.nvars];
112 this.rhs = new double[this.nvars];
113 this.r = new double[this.nvars * (this.nvars - 1) / 2];
114 this.tol = new double[this.nvars];
115 this.rss = new double[this.nvars];
116 this.vorder = new int[this.nvars];
117 this.xSing = new double[this.nvars];
118 this.workSing = new double[this.nvars];
119 this.workTolset = new double[this.nvars];
120 this.lindep = new boolean[this.nvars];
121 for (int i = 0; i < this.nvars; i++) {
122 vorder[i] = i;
123 }
124 if (errorTolerance > 0) {
125 this.epsilon = errorTolerance;
126 } else {
127 this.epsilon = -errorTolerance;
128 }
129 }
130
131 /**
132 * Primary constructor for the MillerUpdatingRegression.
133 *
134 * @param numberOfVariables maximum number of potential regressors
135 * @param includeConstant include a constant automatically
136 * @throws ModelSpecificationException if {@code numberOfVariables is less than 1}
137 */
138 public MillerUpdatingRegression(int numberOfVariables, boolean includeConstant)
139 throws ModelSpecificationException {
140 this(numberOfVariables, includeConstant, Precision.EPSILON);
141 }
142
143 /**
144 * A getter method which determines whether a constant is included.
145 * @return true regression has an intercept, false no intercept
146 */
147 @Override
148 public boolean hasIntercept() {
149 return this.hasIntercept;
150 }
151
152 /**
153 * Gets the number of observations added to the regression model.
154 * @return number of observations
155 */
156 @Override
157 public long getN() {
158 return this.nobs;
159 }
160
161 /**
162 * Adds an observation to the regression model.
163 * @param x the array with regressor values
164 * @param y the value of dependent variable given these regressors
165 * @exception ModelSpecificationException if the length of {@code x} does not equal
166 * the number of independent variables in the model
167 */
168 @Override
169 public void addObservation(final double[] x, final double y)
170 throws ModelSpecificationException {
171
172 if ((!this.hasIntercept && x.length != nvars) ||
173 (this.hasIntercept && x.length + 1 != nvars)) {
174 throw new ModelSpecificationException(LocalizedFormats.INVALID_REGRESSION_OBSERVATION,
175 x.length, nvars);
176 }
177 if (!this.hasIntercept) {
178 include(Arrays.copyOf(x, x.length), 1.0, y);
179 } else {
180 final double[] tmp = new double[x.length + 1];
181 System.arraycopy(x, 0, tmp, 1, x.length);
182 tmp[0] = 1.0;
183 include(tmp, 1.0, y);
184 }
185 ++nobs;
186 }
187
188 /**
189 * Adds multiple observations to the model.
190 * @param x observations on the regressors
191 * @param y observations on the regressand
192 * @throws ModelSpecificationException if {@code x} is not rectangular, does not match
193 * the length of {@code y} or does not contain sufficient data to estimate the model
194 */
195 @Override
196 public void addObservations(double[][] x, double[] y) throws ModelSpecificationException {
197 if (x == null || y == null || x.length != y.length) {
198 throw new ModelSpecificationException(
199 LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE,
200 (x == null) ? 0 : x.length,
201 (y == null) ? 0 : y.length);
202 }
203 if (x.length == 0) { // Must be no y data either
204 throw new ModelSpecificationException(
205 LocalizedFormats.NO_DATA);
206 }
207 if (x[0].length + 1 > x.length) {
208 throw new ModelSpecificationException(
209 LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
210 x.length, x[0].length);
211 }
212 for (int i = 0; i < x.length; i++) {
213 addObservation(x[i], y[i]);
214 }
215 }
216
217 /**
218 * The include method is where the QR decomposition occurs. This statement forms all
219 * intermediate data which will be used for all derivative measures.
220 * According to the miller paper, note that in the original implementation the x vector
221 * is overwritten. In this implementation, the include method is passed a copy of the
222 * original data vector so that there is no contamination of the data. Additionally,
223 * this method differs slightly from Gentleman's method, in that the assumption is
224 * of dense design matrices, there is some advantage in using the original gentleman algorithm
225 * on sparse matrices.
226 *
227 * @param x observations on the regressors
228 * @param wi weight of the this observation (-1,1)
229 * @param yi observation on the regressand
230 */
231 private void include(final double[] x, final double wi, final double yi) {
232 int nextr = 0;
233 double w = wi;
234 double y = yi;
235 double xi;
236 double di;
237 double wxi;
238 double dpi;
239 double xk;
240 double wPrev;
241 this.rssSet = false;
242 sumy = smartAdd(yi, sumy);
243 sumsqy = smartAdd(sumsqy, yi * yi);
244 for (int i = 0; i < x.length; i++) {
245 if (w == 0.0) {
246 return;
247 }
248 xi = x[i];
249
250 if (xi == 0.0) {
251 nextr += nvars - i - 1;
252 continue;
253 }
254 di = d[i];
255 wxi = w * xi;
256 wPrev = w;
257 if (di != 0.0) {
258 dpi = smartAdd(di, wxi * xi);
259 final double tmp = wxi * xi / di;
260 if (JdkMath.abs(tmp) > Precision.EPSILON) {
261 w = (di * w) / dpi;
262 }
263 } else {
264 dpi = wxi * xi;
265 w = 0.0;
266 }
267 d[i] = dpi;
268 for (int k = i + 1; k < nvars; k++) {
269 xk = x[k];
270 x[k] = smartAdd(xk, -xi * r[nextr]);
271 if (di != 0.0) {
272 r[nextr] = smartAdd(di * r[nextr], (wPrev * xi) * xk) / dpi;
273 } else {
274 r[nextr] = xk / xi;
275 }
276 ++nextr;
277 }
278 xk = y;
279 y = smartAdd(xk, -xi * rhs[i]);
280 if (di != 0.0) {
281 rhs[i] = smartAdd(di * rhs[i], wxi * xk) / dpi;
282 } else {
283 rhs[i] = xk / xi;
284 }
285 }
286 sserr = smartAdd(sserr, w * y * y);
287 }
288
289 /**
290 * Adds to number a and b such that the contamination due to
291 * numerical smallness of one addend does not corrupt the sum.
292 * @param a - an addend
293 * @param b - an addend
294 * @return the sum of the a and b
295 */
296 private double smartAdd(double a, double b) {
297 final double aa = JdkMath.abs(a);
298 final double ba = JdkMath.abs(b);
299 if (aa > ba) {
300 final double eps = aa * Precision.EPSILON;
301 if (ba > eps) {
302 return a + b;
303 }
304 return a;
305 } else {
306 final double eps = ba * Precision.EPSILON;
307 if (aa > eps) {
308 return a + b;
309 }
310 return b;
311 }
312 }
313
314 /**
315 * As the name suggests, clear wipes the internals and reorders everything in the
316 * canonical order.
317 */
318 @Override
319 public void clear() {
320 Arrays.fill(this.d, 0.0);
321 Arrays.fill(this.rhs, 0.0);
322 Arrays.fill(this.r, 0.0);
323 Arrays.fill(this.tol, 0.0);
324 Arrays.fill(this.rss, 0.0);
325 Arrays.fill(this.workTolset, 0.0);
326 Arrays.fill(this.workSing, 0.0);
327 Arrays.fill(this.xSing, 0.0);
328 Arrays.fill(this.lindep, false);
329 for (int i = 0; i < nvars; i++) {
330 this.vorder[i] = i;
331 }
332 this.nobs = 0;
333 this.sserr = 0.0;
334 this.sumy = 0.0;
335 this.sumsqy = 0.0;
336 this.rssSet = false;
337 this.tolSet = false;
338 }
339
340 /**
341 * This sets up tolerances for singularity testing.
342 */
343 private void tolset() {
344 int pos;
345 double total;
346 final double eps = this.epsilon;
347 for (int i = 0; i < nvars; i++) {
348 this.workTolset[i] = JdkMath.sqrt(d[i]);
349 }
350 tol[0] = eps * this.workTolset[0];
351 for (int col = 1; col < nvars; col++) {
352 pos = col - 1;
353 total = workTolset[col];
354 for (int row = 0; row < col; row++) {
355 total += JdkMath.abs(r[pos]) * workTolset[row];
356 pos += nvars - row - 2;
357 }
358 tol[col] = eps * total;
359 }
360 tolSet = true;
361 }
362
363 /**
364 * The regcf method conducts the linear regression and extracts the
365 * parameter vector. Notice that the algorithm can do subset regression
366 * with no alteration.
367 *
368 * @param nreq how many of the regressors to include (either in canonical
369 * order, or in the current reordered state)
370 * @return an array with the estimated slope coefficients
371 * @throws ModelSpecificationException if {@code nreq} is less than 1
372 * or greater than the number of independent variables
373 */
374 private double[] regcf(int nreq) throws ModelSpecificationException {
375 int nextr;
376 if (nreq < 1) {
377 throw new ModelSpecificationException(LocalizedFormats.NO_REGRESSORS);
378 }
379 if (nreq > this.nvars) {
380 throw new ModelSpecificationException(
381 LocalizedFormats.TOO_MANY_REGRESSORS, nreq, this.nvars);
382 }
383 if (!this.tolSet) {
384 tolset();
385 }
386 final double[] ret = new double[nreq];
387 boolean rankProblem = false;
388 for (int i = nreq - 1; i > -1; i--) {
389 if (JdkMath.sqrt(d[i]) < tol[i]) {
390 ret[i] = 0.0;
391 d[i] = 0.0;
392 rankProblem = true;
393 } else {
394 ret[i] = rhs[i];
395 nextr = i * (nvars + nvars - i - 1) / 2;
396 for (int j = i + 1; j < nreq; j++) {
397 ret[i] = smartAdd(ret[i], -r[nextr] * ret[j]);
398 ++nextr;
399 }
400 }
401 }
402 if (rankProblem) {
403 for (int i = 0; i < nreq; i++) {
404 if (this.lindep[i]) {
405 ret[i] = Double.NaN;
406 }
407 }
408 }
409 return ret;
410 }
411
412 /**
413 * The method which checks for singularities and then eliminates the offending
414 * columns.
415 */
416 private void singcheck() {
417 int pos;
418 for (int i = 0; i < nvars; i++) {
419 workSing[i] = JdkMath.sqrt(d[i]);
420 }
421 for (int col = 0; col < nvars; col++) {
422 // Set elements within R to zero if they are less than tol(col) in
423 // absolute value after being scaled by the square root of their row
424 // multiplier
425 final double temp = tol[col];
426 pos = col - 1;
427 for (int row = 0; row < col - 1; row++) {
428 if (JdkMath.abs(r[pos]) * workSing[row] < temp) {
429 r[pos] = 0.0;
430 }
431 pos += nvars - row - 2;
432 }
433 // If diagonal element is near zero, set it to zero, set appropriate
434 // element of LINDEP, and use INCLUD to augment the projections in
435 // the lower rows of the orthogonalization.
436 lindep[col] = false;
437 if (workSing[col] < temp) {
438 lindep[col] = true;
439 if (col < nvars - 1) {
440 Arrays.fill(xSing, 0.0);
441 int pi = col * (nvars + nvars - col - 1) / 2;
442 for (int xi = col + 1; xi < nvars; xi++, pi++) {
443 xSing[xi] = r[pi];
444 r[pi] = 0.0;
445 }
446 final double y = rhs[col];
447 final double weight = d[col];
448 d[col] = 0.0;
449 rhs[col] = 0.0;
450 this.include(xSing, weight, y);
451 } else {
452 sserr += d[col] * rhs[col] * rhs[col];
453 }
454 }
455 }
456 }
457
458 /**
459 * Calculates the sum of squared errors for the full regression.
460 * and all subsets in the following manner: <pre>
461 * rss[] ={
462 * ResidualSumOfSquares_allNvars,
463 * ResidualSumOfSquares_FirstNvars-1,
464 * ResidualSumOfSquares_FirstNvars-2,
465 * ..., ResidualSumOfSquares_FirstVariable} </pre>
466 */
467 private void ss() {
468 double total = sserr;
469 rss[nvars - 1] = sserr;
470 for (int i = nvars - 1; i > 0; i--) {
471 total += d[i] * rhs[i] * rhs[i];
472 rss[i - 1] = total;
473 }
474 rssSet = true;
475 }
476
477 /**
478 * Calculates the cov matrix assuming only the first nreq variables are
479 * included in the calculation. The returned array contains a symmetric
480 * matrix stored in lower triangular form. The matrix will have
481 * ( nreq + 1 ) * nreq / 2 elements. For illustration <pre>
482 * cov =
483 * {
484 * cov_00,
485 * cov_10, cov_11,
486 * cov_20, cov_21, cov22,
487 * ...
488 * } </pre>
489 *
490 * @param nreq how many of the regressors to include (either in canonical
491 * order, or in the current reordered state)
492 * @return an array with the variance covariance of the included
493 * regressors in lower triangular form
494 */
495 private double[] cov(int nreq) {
496 if (this.nobs <= nreq) {
497 return null;
498 }
499 double rnk = 0.0;
500 for (int i = 0; i < nreq; i++) {
501 if (!this.lindep[i]) {
502 rnk += 1.0;
503 }
504 }
505 final double var = rss[nreq - 1] / (nobs - rnk);
506 final double[] rinv = new double[nreq * (nreq - 1) / 2];
507 inverse(rinv, nreq);
508 final double[] covmat = new double[nreq * (nreq + 1) / 2];
509 Arrays.fill(covmat, Double.NaN);
510 int pos2;
511 int pos1;
512 int start = 0;
513 double total = 0;
514 for (int row = 0; row < nreq; row++) {
515 pos2 = start;
516 if (!this.lindep[row]) {
517 for (int col = row; col < nreq; col++) {
518 if (!this.lindep[col]) {
519 pos1 = start + col - row;
520 if (row == col) {
521 total = 1.0 / d[col];
522 } else {
523 total = rinv[pos1 - 1] / d[col];
524 }
525 for (int k = col + 1; k < nreq; k++) {
526 if (!this.lindep[k]) {
527 total += rinv[pos1] * rinv[pos2] / d[k];
528 }
529 ++pos1;
530 ++pos2;
531 }
532 covmat[ (col + 1) * col / 2 + row] = total * var;
533 } else {
534 pos2 += nreq - col - 1;
535 }
536 }
537 }
538 start += nreq - row - 1;
539 }
540 return covmat;
541 }
542
543 /**
544 * This internal method calculates the inverse of the upper-triangular portion
545 * of the R matrix.
546 * @param rinv the storage for the inverse of r
547 * @param nreq how many of the regressors to include (either in canonical
548 * order, or in the current reordered state)
549 */
550 private void inverse(double[] rinv, int nreq) {
551 int pos = nreq * (nreq - 1) / 2 - 1;
552 int pos1 = -1;
553 int pos2 = -1;
554 double total = 0.0;
555 Arrays.fill(rinv, Double.NaN);
556 for (int row = nreq - 1; row > 0; --row) {
557 if (!this.lindep[row]) {
558 final int start = (row - 1) * (nvars + nvars - row) / 2;
559 for (int col = nreq; col > row; --col) {
560 pos1 = start;
561 pos2 = pos;
562 total = 0.0;
563 for (int k = row; k < col - 1; k++) {
564 pos2 += nreq - k - 1;
565 if (!this.lindep[k]) {
566 total += -r[pos1] * rinv[pos2];
567 }
568 ++pos1;
569 }
570 rinv[pos] = total - r[pos1];
571 --pos;
572 }
573 } else {
574 pos -= nreq - row;
575 }
576 }
577 }
578
579 /**
580 * In the original algorithm only the partial correlations of the regressors
581 * is returned to the user. In this implementation, we have <pre>
582 * corr =
583 * {
584 * corrxx - lower triangular
585 * corrxy - bottom row of the matrix
586 * }
587 * Replaces subroutines PCORR and COR of:
588 * ALGORITHM AS274 APPL. STATIST. (1992) VOL.41, NO. 2 </pre>
589 *
590 * <p>Calculate partial correlations after the variables in rows
591 * 1, 2, ..., IN have been forced into the regression.
592 * If IN = 1, and the first row of R represents a constant in the
593 * model, then the usual simple correlations are returned.</p>
594 *
595 * <p>If IN = 0, the value returned in array CORMAT for the correlation
596 * of variables Xi & Xj is: <pre>
597 * sum ( Xi.Xj ) / Sqrt ( sum (Xi^2) . sum (Xj^2) )</pre>
598 *
599 * <p>On return, array CORMAT contains the upper triangle of the matrix of
600 * partial correlations stored by rows, excluding the 1's on the diagonal.
601 * e.g. if IN = 2, the consecutive elements returned are:
602 * (3,4) (3,5) ... (3,ncol), (4,5) (4,6) ... (4,ncol), etc.
603 * Array YCORR stores the partial correlations with the Y-variable
604 * starting with YCORR(IN+1) = partial correlation with the variable in
605 * position (IN+1). </p>
606 *
607 * @param in how many of the regressors to include (either in canonical
608 * order, or in the current reordered state)
609 * @return an array with the partial correlations of the remainder of
610 * regressors with each other and the regressand, in lower triangular form
611 */
612 public double[] getPartialCorrelations(int in) {
613 final double[] output = new double[(nvars - in + 1) * (nvars - in) / 2];
614 int pos;
615 int pos1;
616 int pos2;
617 final int rmsOff = -in;
618 final int wrkOff = -(in + 1);
619 final double[] rms = new double[nvars - in];
620 final double[] work = new double[nvars - in - 1];
621 double sumxx;
622 double sumxy;
623 double sumyy;
624 final int offXX = (nvars - in) * (nvars - in - 1) / 2;
625 if (in < -1 || in >= nvars) {
626 return null;
627 }
628 final int nvm = nvars - 1;
629 final int basePos = r.length - (nvm - in) * (nvm - in + 1) / 2;
630 if (d[in] > 0.0) {
631 rms[in + rmsOff] = 1.0 / JdkMath.sqrt(d[in]);
632 }
633 for (int col = in + 1; col < nvars; col++) {
634 pos = basePos + col - 1 - in;
635 sumxx = d[col];
636 for (int row = in; row < col; row++) {
637 sumxx += d[row] * r[pos] * r[pos];
638 pos += nvars - row - 2;
639 }
640 if (sumxx > 0.0) {
641 rms[col + rmsOff] = 1.0 / JdkMath.sqrt(sumxx);
642 } else {
643 rms[col + rmsOff] = 0.0;
644 }
645 }
646 sumyy = sserr;
647 for (int row = in; row < nvars; row++) {
648 sumyy += d[row] * rhs[row] * rhs[row];
649 }
650 if (sumyy > 0.0) {
651 sumyy = 1.0 / JdkMath.sqrt(sumyy);
652 }
653 pos = 0;
654 for (int col1 = in; col1 < nvars; col1++) {
655 sumxy = 0.0;
656 Arrays.fill(work, 0.0);
657 pos1 = basePos + col1 - in - 1;
658 for (int row = in; row < col1; row++) {
659 pos2 = pos1 + 1;
660 for (int col2 = col1 + 1; col2 < nvars; col2++) {
661 work[col2 + wrkOff] += d[row] * r[pos1] * r[pos2];
662 pos2++;
663 }
664 sumxy += d[row] * r[pos1] * rhs[row];
665 pos1 += nvars - row - 2;
666 }
667 pos2 = pos1 + 1;
668 for (int col2 = col1 + 1; col2 < nvars; col2++) {
669 work[col2 + wrkOff] += d[col1] * r[pos2];
670 ++pos2;
671 output[ (col2 - 1 - in) * (col2 - in) / 2 + col1 - in] =
672 work[col2 + wrkOff] * rms[col1 + rmsOff] * rms[col2 + rmsOff];
673 ++pos;
674 }
675 sumxy += d[col1] * rhs[col1];
676 output[col1 + rmsOff + offXX] = sumxy * rms[col1 + rmsOff] * sumyy;
677 }
678
679 return output;
680 }
681
682 /**
683 * ALGORITHM AS274 APPL. STATIST. (1992) VOL.41, NO. 2.
684 * Move variable from position FROM to position TO in an
685 * orthogonal reduction produced by AS75.1.
686 *
687 * @param from initial position
688 * @param to destination
689 */
690 private void vmove(int from, int to) {
691 double d1;
692 double d2;
693 double x;
694 double d1new;
695 double d2new;
696 double cbar;
697 double sbar;
698 double y;
699 int first;
700 int inc;
701 int m1;
702 int m2;
703 int mp1;
704 int pos;
705 boolean bSkipTo40 = false;
706 if (from == to) {
707 return;
708 }
709 if (!this.rssSet) {
710 ss();
711 }
712 int count = 0;
713 if (from < to) {
714 first = from;
715 inc = 1;
716 count = to - from;
717 } else {
718 first = from - 1;
719 inc = -1;
720 count = from - to;
721 }
722
723 int m = first;
724 int idx = 0;
725 while (idx < count) {
726 m1 = m * (nvars + nvars - m - 1) / 2;
727 m2 = m1 + nvars - m - 1;
728 mp1 = m + 1;
729
730 d1 = d[m];
731 d2 = d[mp1];
732 // Special cases.
733 if (d1 > this.epsilon || d2 > this.epsilon) {
734 x = r[m1];
735 if (JdkMath.abs(x) * JdkMath.sqrt(d1) < tol[mp1]) {
736 x = 0.0;
737 }
738 if (d1 < this.epsilon || JdkMath.abs(x) < this.epsilon) {
739 d[m] = d2;
740 d[mp1] = d1;
741 r[m1] = 0.0;
742 for (int col = m + 2; col < nvars; col++) {
743 ++m1;
744 x = r[m1];
745 r[m1] = r[m2];
746 r[m2] = x;
747 ++m2;
748 }
749 x = rhs[m];
750 rhs[m] = rhs[mp1];
751 rhs[mp1] = x;
752 bSkipTo40 = true;
753 //break;
754 } else if (d2 < this.epsilon) {
755 d[m] = d1 * x * x;
756 r[m1] = 1.0 / x;
757 for (int i = m1 + 1; i < m1 + nvars - m - 1; i++) {
758 r[i] /= x;
759 }
760 rhs[m] /= x;
761 bSkipTo40 = true;
762 //break;
763 }
764 if (!bSkipTo40) {
765 d1new = d2 + d1 * x * x;
766 cbar = d2 / d1new;
767 sbar = x * d1 / d1new;
768 d2new = d1 * cbar;
769 d[m] = d1new;
770 d[mp1] = d2new;
771 r[m1] = sbar;
772 for (int col = m + 2; col < nvars; col++) {
773 ++m1;
774 y = r[m1];
775 r[m1] = cbar * r[m2] + sbar * y;
776 r[m2] = y - x * r[m2];
777 ++m2;
778 }
779 y = rhs[m];
780 rhs[m] = cbar * rhs[mp1] + sbar * y;
781 rhs[mp1] = y - x * rhs[mp1];
782 }
783 }
784 if (m > 0) {
785 pos = m;
786 for (int row = 0; row < m; row++) {
787 x = r[pos];
788 r[pos] = r[pos - 1];
789 r[pos - 1] = x;
790 pos += nvars - row - 2;
791 }
792 }
793 // Adjust variable order (VORDER), the tolerances (TOL) and
794 // the vector of residual sums of squares (RSS).
795 m1 = vorder[m];
796 vorder[m] = vorder[mp1];
797 vorder[mp1] = m1;
798 x = tol[m];
799 tol[m] = tol[mp1];
800 tol[mp1] = x;
801 rss[m] = rss[mp1] + d[mp1] * rhs[mp1] * rhs[mp1];
802
803 m += inc;
804 ++idx;
805 }
806 }
807
808 /**
809 * ALGORITHM AS274 APPL. STATIST. (1992) VOL.41, NO. 2
810 *
811 * <p> Re-order the variables in an orthogonal reduction produced by
812 * AS75.1 so that the N variables in LIST start at position POS1,
813 * though will not necessarily be in the same order as in LIST.
814 * Any variables in VORDER before position POS1 are not moved.
815 * Auxiliary routine called: VMOVE. </p>
816 *
817 * <p>This internal method reorders the regressors.</p>
818 *
819 * @param list the regressors to move
820 * @param pos1 where the list will be placed
821 * @return -1 error, 0 everything ok
822 */
823 private int reorderRegressors(int[] list, int pos1) {
824 int next;
825 int i;
826 int l;
827 if (list.length < 1 || list.length > nvars + 1 - pos1) {
828 return -1;
829 }
830 next = pos1;
831 i = pos1;
832 while (i < nvars) {
833 l = vorder[i];
834 for (int j = 0; j < list.length; j++) {
835 if (l == list[j] && i > next) {
836 this.vmove(i, next);
837 ++next;
838 if (next >= list.length + pos1) {
839 return 0;
840 } else {
841 break;
842 }
843 }
844 }
845 ++i;
846 }
847 return 0;
848 }
849
850 /**
851 * Gets the diagonal of the Hat matrix also known as the leverage matrix.
852 *
853 * @param rowData returns the diagonal of the hat matrix for this observation
854 * @return the diagonal element of the hatmatrix
855 */
856 public double getDiagonalOfHatMatrix(double[] rowData) {
857 double[] wk = new double[this.nvars];
858 int pos;
859 double total;
860
861 if (rowData.length > nvars) {
862 return Double.NaN;
863 }
864 double[] xrow;
865 if (this.hasIntercept) {
866 xrow = new double[rowData.length + 1];
867 xrow[0] = 1.0;
868 System.arraycopy(rowData, 0, xrow, 1, rowData.length);
869 } else {
870 xrow = rowData;
871 }
872 double hii = 0.0;
873 for (int col = 0; col < xrow.length; col++) {
874 if (JdkMath.sqrt(d[col]) < tol[col]) {
875 wk[col] = 0.0;
876 } else {
877 pos = col - 1;
878 total = xrow[col];
879 for (int row = 0; row < col; row++) {
880 total = smartAdd(total, -wk[row] * r[pos]);
881 pos += nvars - row - 2;
882 }
883 wk[col] = total;
884 hii = smartAdd(hii, (total * total) / d[col]);
885 }
886 }
887 return hii;
888 }
889
890 /**
891 * Gets the order of the regressors, useful if some type of reordering
892 * has been called. Calling regress with int[]{} args will trigger
893 * a reordering.
894 *
895 * @return int[] with the current order of the regressors
896 */
897 public int[] getOrderOfRegressors(){
898 return Arrays.copyOf(vorder, vorder.length);
899 }
900
901 /**
902 * Conducts a regression on the data in the model, using all regressors.
903 *
904 * @return RegressionResults the structure holding all regression results
905 * @exception ModelSpecificationException - thrown if number of observations is
906 * less than the number of variables
907 */
908 @Override
909 public RegressionResults regress() throws ModelSpecificationException {
910 return regress(this.nvars);
911 }
912
913 /**
914 * Conducts a regression on the data in the model, using a subset of regressors.
915 *
916 * @param numberOfRegressors many of the regressors to include (either in canonical
917 * order, or in the current reordered state)
918 * @return RegressionResults the structure holding all regression results
919 * @exception ModelSpecificationException - thrown if number of observations is
920 * less than the number of variables or number of regressors requested
921 * is greater than the regressors in the model
922 */
923 public RegressionResults regress(int numberOfRegressors) throws ModelSpecificationException {
924 if (this.nobs <= numberOfRegressors) {
925 throw new ModelSpecificationException(
926 LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
927 this.nobs, numberOfRegressors);
928 }
929 if( numberOfRegressors > this.nvars ){
930 throw new ModelSpecificationException(
931 LocalizedFormats.TOO_MANY_REGRESSORS, numberOfRegressors, this.nvars);
932 }
933
934 tolset();
935 singcheck();
936
937 double[] beta = this.regcf(numberOfRegressors);
938
939 ss();
940
941 double[] cov = this.cov(numberOfRegressors);
942
943 int rnk = 0;
944 for (int i = 0; i < this.lindep.length; i++) {
945 if (!this.lindep[i]) {
946 ++rnk;
947 }
948 }
949
950 boolean needsReorder = false;
951 for (int i = 0; i < numberOfRegressors; i++) {
952 if (this.vorder[i] != i) {
953 needsReorder = true;
954 break;
955 }
956 }
957 if (!needsReorder) {
958 return new RegressionResults(
959 beta, new double[][]{cov}, true, this.nobs, rnk,
960 this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
961 } else {
962 double[] betaNew = new double[beta.length];
963 double[] covNew = new double[cov.length];
964
965 int[] newIndices = new int[beta.length];
966 for (int i = 0; i < nvars; i++) {
967 for (int j = 0; j < numberOfRegressors; j++) {
968 if (this.vorder[j] == i) {
969 betaNew[i] = beta[ j];
970 newIndices[i] = j;
971 }
972 }
973 }
974
975 int idx1 = 0;
976 int idx2;
977 int ii;
978 int jj;
979 for (int i = 0; i < beta.length; i++) {
980 ii = newIndices[i];
981 for (int j = 0; j <= i; j++, idx1++) {
982 jj = newIndices[j];
983 if (ii > jj) {
984 idx2 = ii * (ii + 1) / 2 + jj;
985 } else {
986 idx2 = jj * (jj + 1) / 2 + ii;
987 }
988 covNew[idx1] = cov[idx2];
989 }
990 }
991 return new RegressionResults(
992 betaNew, new double[][]{covNew}, true, this.nobs, rnk,
993 this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
994 }
995 }
996
997 /**
998 * Conducts a regression on the data in the model, using regressors in array
999 * Calling this method will change the internal order of the regressors
1000 * and care is required in interpreting the hatmatrix.
1001 *
1002 * @param variablesToInclude array of variables to include in regression
1003 * @return RegressionResults the structure holding all regression results
1004 * @exception ModelSpecificationException - thrown if number of observations is
1005 * less than the number of variables, the number of regressors requested
1006 * is greater than the regressors in the model or a regressor index in
1007 * regressor array does not exist
1008 */
1009 @Override
1010 public RegressionResults regress(int[] variablesToInclude) throws ModelSpecificationException {
1011 if (variablesToInclude.length > this.nvars) {
1012 throw new ModelSpecificationException(
1013 LocalizedFormats.TOO_MANY_REGRESSORS, variablesToInclude.length, this.nvars);
1014 }
1015 if (this.nobs <= this.nvars) {
1016 throw new ModelSpecificationException(
1017 LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
1018 this.nobs, this.nvars);
1019 }
1020 Arrays.sort(variablesToInclude);
1021 int iExclude = 0;
1022 for (int i = 0; i < variablesToInclude.length; i++) {
1023 if (i >= this.nvars) {
1024 throw new ModelSpecificationException(
1025 LocalizedFormats.INDEX_LARGER_THAN_MAX, i, this.nvars);
1026 }
1027 if (i > 0 && variablesToInclude[i] == variablesToInclude[i - 1]) {
1028 variablesToInclude[i] = -1;
1029 ++iExclude;
1030 }
1031 }
1032 int[] series;
1033 if (iExclude > 0) {
1034 int j = 0;
1035 series = new int[variablesToInclude.length - iExclude];
1036 for (int i = 0; i < variablesToInclude.length; i++) {
1037 if (variablesToInclude[i] > -1) {
1038 series[j] = variablesToInclude[i];
1039 ++j;
1040 }
1041 }
1042 } else {
1043 series = variablesToInclude;
1044 }
1045
1046 reorderRegressors(series, 0);
1047 tolset();
1048 singcheck();
1049
1050 double[] beta = this.regcf(series.length);
1051
1052 ss();
1053
1054 double[] cov = this.cov(series.length);
1055
1056 int rnk = 0;
1057 for (int i = 0; i < this.lindep.length; i++) {
1058 if (!this.lindep[i]) {
1059 ++rnk;
1060 }
1061 }
1062
1063 boolean needsReorder = false;
1064 for (int i = 0; i < series.length; i++) {
1065 if (this.vorder[i] != series[i]) {
1066 needsReorder = true;
1067 break;
1068 }
1069 }
1070 if (!needsReorder) {
1071 return new RegressionResults(
1072 beta, new double[][]{cov}, true, this.nobs, rnk,
1073 this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
1074 } else {
1075 double[] betaNew = new double[beta.length];
1076 int[] newIndices = new int[beta.length];
1077 for (int i = 0; i < series.length; i++) {
1078 for (int j = 0; j < this.vorder.length; j++) {
1079 if (this.vorder[j] == series[i]) {
1080 betaNew[i] = beta[ j];
1081 newIndices[i] = j;
1082 }
1083 }
1084 }
1085 double[] covNew = new double[cov.length];
1086 int idx1 = 0;
1087 int idx2;
1088 int ii;
1089 int jj;
1090 for (int i = 0; i < beta.length; i++) {
1091 ii = newIndices[i];
1092 for (int j = 0; j <= i; j++, idx1++) {
1093 jj = newIndices[j];
1094 if (ii > jj) {
1095 idx2 = ii * (ii + 1) / 2 + jj;
1096 } else {
1097 idx2 = jj * (jj + 1) / 2 + ii;
1098 }
1099 covNew[idx1] = cov[idx2];
1100 }
1101 }
1102 return new RegressionResults(
1103 betaNew, new double[][]{covNew}, true, this.nobs, rnk,
1104 this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
1105 }
1106 }
1107 }