001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.math3.stat.regression; 018 019import java.util.Arrays; 020import org.apache.commons.math3.exception.util.LocalizedFormats; 021import org.apache.commons.math3.util.FastMath; 022import org.apache.commons.math3.util.Precision; 023import org.apache.commons.math3.util.MathArrays; 024 025/** 026 * This class is a concrete implementation of the {@link UpdatingMultipleLinearRegression} interface. 027 * 028 * <p>The algorithm is described in: <pre> 029 * Algorithm AS 274: Least Squares Routines to Supplement Those of Gentleman 030 * Author(s): Alan J. Miller 031 * Source: Journal of the Royal Statistical Society. 032 * Series C (Applied Statistics), Vol. 41, No. 2 033 * (1992), pp. 458-478 034 * Published by: Blackwell Publishing for the Royal Statistical Society 035 * Stable URL: http://www.jstor.org/stable/2347583 </pre></p> 036 * 037 * <p>This method for multiple regression forms the solution to the OLS problem 038 * by updating the QR decomposition as described by Gentleman.</p> 039 * 040 * @since 3.0 041 */ 042public class MillerUpdatingRegression implements UpdatingMultipleLinearRegression { 043 044 /** number of variables in regression */ 045 private final int nvars; 046 /** diagonals of cross products matrix */ 047 private final double[] d; 048 /** the elements of the R`Y */ 049 private final double[] rhs; 050 /** the off diagonal portion of the R matrix */ 051 private final double[] r; 052 /** the tolerance for each of the variables */ 053 private final double[] tol; 054 /** residual sum of squares for all nested regressions */ 055 private final double[] rss; 056 /** order of the regressors */ 057 private final int[] vorder; 058 /** scratch space for tolerance calc */ 059 private final double[] work_tolset; 060 /** number of observations entered */ 061 private long nobs = 0; 062 /** sum of squared errors of largest regression */ 063 private double sserr = 0.0; 064 /** has rss been called? */ 065 private boolean rss_set = false; 066 /** has the tolerance setting method been called */ 067 private boolean tol_set = false; 068 /** flags for variables with linear dependency problems */ 069 private final boolean[] lindep; 070 /** singular x values */ 071 private final double[] x_sing; 072 /** workspace for singularity method */ 073 private final double[] work_sing; 074 /** summation of Y variable */ 075 private double sumy = 0.0; 076 /** summation of squared Y values */ 077 private double sumsqy = 0.0; 078 /** boolean flag whether a regression constant is added */ 079 private boolean hasIntercept; 080 /** zero tolerance */ 081 private final double epsilon; 082 /** 083 * Set the default constructor to private access 084 * to prevent inadvertent instantiation 085 */ 086 @SuppressWarnings("unused") 087 private MillerUpdatingRegression() { 088 this(-1, false, Double.NaN); 089 } 090 091 /** 092 * This is the augmented constructor for the MillerUpdatingRegression class. 093 * 094 * @param numberOfVariables number of regressors to expect, not including constant 095 * @param includeConstant include a constant automatically 096 * @param errorTolerance zero tolerance, how machine zero is determined 097 * @throws ModelSpecificationException if {@code numberOfVariables is less than 1} 098 */ 099 public MillerUpdatingRegression(int numberOfVariables, boolean includeConstant, double errorTolerance) 100 throws ModelSpecificationException { 101 if (numberOfVariables < 1) { 102 throw new ModelSpecificationException(LocalizedFormats.NO_REGRESSORS); 103 } 104 if (includeConstant) { 105 this.nvars = numberOfVariables + 1; 106 } else { 107 this.nvars = numberOfVariables; 108 } 109 this.hasIntercept = includeConstant; 110 this.nobs = 0; 111 this.d = new double[this.nvars]; 112 this.rhs = new double[this.nvars]; 113 this.r = new double[this.nvars * (this.nvars - 1) / 2]; 114 this.tol = new double[this.nvars]; 115 this.rss = new double[this.nvars]; 116 this.vorder = new int[this.nvars]; 117 this.x_sing = new double[this.nvars]; 118 this.work_sing = new double[this.nvars]; 119 this.work_tolset = new double[this.nvars]; 120 this.lindep = new boolean[this.nvars]; 121 for (int i = 0; i < this.nvars; i++) { 122 vorder[i] = i; 123 } 124 if (errorTolerance > 0) { 125 this.epsilon = errorTolerance; 126 } else { 127 this.epsilon = -errorTolerance; 128 } 129 } 130 131 /** 132 * Primary constructor for the MillerUpdatingRegression. 133 * 134 * @param numberOfVariables maximum number of potential regressors 135 * @param includeConstant include a constant automatically 136 * @throws ModelSpecificationException if {@code numberOfVariables is less than 1} 137 */ 138 public MillerUpdatingRegression(int numberOfVariables, boolean includeConstant) 139 throws ModelSpecificationException { 140 this(numberOfVariables, includeConstant, Precision.EPSILON); 141 } 142 143 /** 144 * A getter method which determines whether a constant is included. 145 * @return true regression has an intercept, false no intercept 146 */ 147 public boolean hasIntercept() { 148 return this.hasIntercept; 149 } 150 151 /** 152 * Gets the number of observations added to the regression model. 153 * @return number of observations 154 */ 155 public long getN() { 156 return this.nobs; 157 } 158 159 /** 160 * Adds an observation to the regression model. 161 * @param x the array with regressor values 162 * @param y the value of dependent variable given these regressors 163 * @exception ModelSpecificationException if the length of {@code x} does not equal 164 * the number of independent variables in the model 165 */ 166 public void addObservation(final double[] x, final double y) 167 throws ModelSpecificationException { 168 169 if ((!this.hasIntercept && x.length != nvars) || 170 (this.hasIntercept && x.length + 1 != nvars)) { 171 throw new ModelSpecificationException(LocalizedFormats.INVALID_REGRESSION_OBSERVATION, 172 x.length, nvars); 173 } 174 if (!this.hasIntercept) { 175 include(MathArrays.copyOf(x, x.length), 1.0, y); 176 } else { 177 final double[] tmp = new double[x.length + 1]; 178 System.arraycopy(x, 0, tmp, 1, x.length); 179 tmp[0] = 1.0; 180 include(tmp, 1.0, y); 181 } 182 ++nobs; 183 184 } 185 186 /** 187 * Adds multiple observations to the model. 188 * @param x observations on the regressors 189 * @param y observations on the regressand 190 * @throws ModelSpecificationException if {@code x} is not rectangular, does not match 191 * the length of {@code y} or does not contain sufficient data to estimate the model 192 */ 193 public void addObservations(double[][] x, double[] y) throws ModelSpecificationException { 194 if ((x == null) || (y == null) || (x.length != y.length)) { 195 throw new ModelSpecificationException( 196 LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE, 197 (x == null) ? 0 : x.length, 198 (y == null) ? 0 : y.length); 199 } 200 if (x.length == 0) { // Must be no y data either 201 throw new ModelSpecificationException( 202 LocalizedFormats.NO_DATA); 203 } 204 if (x[0].length + 1 > x.length) { 205 throw new ModelSpecificationException( 206 LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS, 207 x.length, x[0].length); 208 } 209 for (int i = 0; i < x.length; i++) { 210 addObservation(x[i], y[i]); 211 } 212 } 213 214 /** 215 * The include method is where the QR decomposition occurs. This statement forms all 216 * intermediate data which will be used for all derivative measures. 217 * According to the miller paper, note that in the original implementation the x vector 218 * is overwritten. In this implementation, the include method is passed a copy of the 219 * original data vector so that there is no contamination of the data. Additionally, 220 * this method differs slightly from Gentleman's method, in that the assumption is 221 * of dense design matrices, there is some advantage in using the original gentleman algorithm 222 * on sparse matrices. 223 * 224 * @param x observations on the regressors 225 * @param wi weight of the this observation (-1,1) 226 * @param yi observation on the regressand 227 */ 228 private void include(final double[] x, final double wi, final double yi) { 229 int nextr = 0; 230 double w = wi; 231 double y = yi; 232 double xi; 233 double di; 234 double wxi; 235 double dpi; 236 double xk; 237 double _w; 238 this.rss_set = false; 239 sumy = smartAdd(yi, sumy); 240 sumsqy = smartAdd(sumsqy, yi * yi); 241 for (int i = 0; i < x.length; i++) { 242 if (w == 0.0) { 243 return; 244 } 245 xi = x[i]; 246 247 if (xi == 0.0) { 248 nextr += nvars - i - 1; 249 continue; 250 } 251 di = d[i]; 252 wxi = w * xi; 253 _w = w; 254 if (di != 0.0) { 255 dpi = smartAdd(di, wxi * xi); 256 final double tmp = wxi * xi / di; 257 if (FastMath.abs(tmp) > Precision.EPSILON) { 258 w = (di * w) / dpi; 259 } 260 } else { 261 dpi = wxi * xi; 262 w = 0.0; 263 } 264 d[i] = dpi; 265 for (int k = i + 1; k < nvars; k++) { 266 xk = x[k]; 267 x[k] = smartAdd(xk, -xi * r[nextr]); 268 if (di != 0.0) { 269 r[nextr] = smartAdd(di * r[nextr], (_w * xi) * xk) / dpi; 270 } else { 271 r[nextr] = xk / xi; 272 } 273 ++nextr; 274 } 275 xk = y; 276 y = smartAdd(xk, -xi * rhs[i]); 277 if (di != 0.0) { 278 rhs[i] = smartAdd(di * rhs[i], wxi * xk) / dpi; 279 } else { 280 rhs[i] = xk / xi; 281 } 282 } 283 sserr = smartAdd(sserr, w * y * y); 284 } 285 286 /** 287 * Adds to number a and b such that the contamination due to 288 * numerical smallness of one addend does not corrupt the sum. 289 * @param a - an addend 290 * @param b - an addend 291 * @return the sum of the a and b 292 */ 293 private double smartAdd(double a, double b) { 294 final double _a = FastMath.abs(a); 295 final double _b = FastMath.abs(b); 296 if (_a > _b) { 297 final double eps = _a * Precision.EPSILON; 298 if (_b > eps) { 299 return a + b; 300 } 301 return a; 302 } else { 303 final double eps = _b * Precision.EPSILON; 304 if (_a > eps) { 305 return a + b; 306 } 307 return b; 308 } 309 } 310 311 /** 312 * As the name suggests, clear wipes the internals and reorders everything in the 313 * canonical order. 314 */ 315 public void clear() { 316 Arrays.fill(this.d, 0.0); 317 Arrays.fill(this.rhs, 0.0); 318 Arrays.fill(this.r, 0.0); 319 Arrays.fill(this.tol, 0.0); 320 Arrays.fill(this.rss, 0.0); 321 Arrays.fill(this.work_tolset, 0.0); 322 Arrays.fill(this.work_sing, 0.0); 323 Arrays.fill(this.x_sing, 0.0); 324 Arrays.fill(this.lindep, false); 325 for (int i = 0; i < nvars; i++) { 326 this.vorder[i] = i; 327 } 328 this.nobs = 0; 329 this.sserr = 0.0; 330 this.sumy = 0.0; 331 this.sumsqy = 0.0; 332 this.rss_set = false; 333 this.tol_set = false; 334 } 335 336 /** 337 * This sets up tolerances for singularity testing. 338 */ 339 private void tolset() { 340 int pos; 341 double total; 342 final double eps = this.epsilon; 343 for (int i = 0; i < nvars; i++) { 344 this.work_tolset[i] = FastMath.sqrt(d[i]); 345 } 346 tol[0] = eps * this.work_tolset[0]; 347 for (int col = 1; col < nvars; col++) { 348 pos = col - 1; 349 total = work_tolset[col]; 350 for (int row = 0; row < col; row++) { 351 total += FastMath.abs(r[pos]) * work_tolset[row]; 352 pos += nvars - row - 2; 353 } 354 tol[col] = eps * total; 355 } 356 tol_set = true; 357 } 358 359 /** 360 * The regcf method conducts the linear regression and extracts the 361 * parameter vector. Notice that the algorithm can do subset regression 362 * with no alteration. 363 * 364 * @param nreq how many of the regressors to include (either in canonical 365 * order, or in the current reordered state) 366 * @return an array with the estimated slope coefficients 367 * @throws ModelSpecificationException if {@code nreq} is less than 1 368 * or greater than the number of independent variables 369 */ 370 private double[] regcf(int nreq) throws ModelSpecificationException { 371 int nextr; 372 if (nreq < 1) { 373 throw new ModelSpecificationException(LocalizedFormats.NO_REGRESSORS); 374 } 375 if (nreq > this.nvars) { 376 throw new ModelSpecificationException( 377 LocalizedFormats.TOO_MANY_REGRESSORS, nreq, this.nvars); 378 } 379 if (!this.tol_set) { 380 tolset(); 381 } 382 final double[] ret = new double[nreq]; 383 boolean rankProblem = false; 384 for (int i = nreq - 1; i > -1; i--) { 385 if (FastMath.sqrt(d[i]) < tol[i]) { 386 ret[i] = 0.0; 387 d[i] = 0.0; 388 rankProblem = true; 389 } else { 390 ret[i] = rhs[i]; 391 nextr = i * (nvars + nvars - i - 1) / 2; 392 for (int j = i + 1; j < nreq; j++) { 393 ret[i] = smartAdd(ret[i], -r[nextr] * ret[j]); 394 ++nextr; 395 } 396 } 397 } 398 if (rankProblem) { 399 for (int i = 0; i < nreq; i++) { 400 if (this.lindep[i]) { 401 ret[i] = Double.NaN; 402 } 403 } 404 } 405 return ret; 406 } 407 408 /** 409 * The method which checks for singularities and then eliminates the offending 410 * columns. 411 */ 412 private void singcheck() { 413 int pos; 414 for (int i = 0; i < nvars; i++) { 415 work_sing[i] = FastMath.sqrt(d[i]); 416 } 417 for (int col = 0; col < nvars; col++) { 418 // Set elements within R to zero if they are less than tol(col) in 419 // absolute value after being scaled by the square root of their row 420 // multiplier 421 final double temp = tol[col]; 422 pos = col - 1; 423 for (int row = 0; row < col - 1; row++) { 424 if (FastMath.abs(r[pos]) * work_sing[row] < temp) { 425 r[pos] = 0.0; 426 } 427 pos += nvars - row - 2; 428 } 429 // If diagonal element is near zero, set it to zero, set appropriate 430 // element of LINDEP, and use INCLUD to augment the projections in 431 // the lower rows of the orthogonalization. 432 lindep[col] = false; 433 if (work_sing[col] < temp) { 434 lindep[col] = true; 435 if (col < nvars - 1) { 436 Arrays.fill(x_sing, 0.0); 437 int _pi = col * (nvars + nvars - col - 1) / 2; 438 for (int _xi = col + 1; _xi < nvars; _xi++, _pi++) { 439 x_sing[_xi] = r[_pi]; 440 r[_pi] = 0.0; 441 } 442 final double y = rhs[col]; 443 final double weight = d[col]; 444 d[col] = 0.0; 445 rhs[col] = 0.0; 446 this.include(x_sing, weight, y); 447 } else { 448 sserr += d[col] * rhs[col] * rhs[col]; 449 } 450 } 451 } 452 } 453 454 /** 455 * Calculates the sum of squared errors for the full regression 456 * and all subsets in the following manner: <pre> 457 * rss[] ={ 458 * ResidualSumOfSquares_allNvars, 459 * ResidualSumOfSquares_FirstNvars-1, 460 * ResidualSumOfSquares_FirstNvars-2, 461 * ..., ResidualSumOfSquares_FirstVariable} </pre> 462 */ 463 private void ss() { 464 double total = sserr; 465 rss[nvars - 1] = sserr; 466 for (int i = nvars - 1; i > 0; i--) { 467 total += d[i] * rhs[i] * rhs[i]; 468 rss[i - 1] = total; 469 } 470 rss_set = true; 471 } 472 473 /** 474 * Calculates the cov matrix assuming only the first nreq variables are 475 * included in the calculation. The returned array contains a symmetric 476 * matrix stored in lower triangular form. The matrix will have 477 * ( nreq + 1 ) * nreq / 2 elements. For illustration <pre> 478 * cov = 479 * { 480 * cov_00, 481 * cov_10, cov_11, 482 * cov_20, cov_21, cov22, 483 * ... 484 * } </pre> 485 * 486 * @param nreq how many of the regressors to include (either in canonical 487 * order, or in the current reordered state) 488 * @return an array with the variance covariance of the included 489 * regressors in lower triangular form 490 */ 491 private double[] cov(int nreq) { 492 if (this.nobs <= nreq) { 493 return null; 494 } 495 double rnk = 0.0; 496 for (int i = 0; i < nreq; i++) { 497 if (!this.lindep[i]) { 498 rnk += 1.0; 499 } 500 } 501 final double var = rss[nreq - 1] / (nobs - rnk); 502 final double[] rinv = new double[nreq * (nreq - 1) / 2]; 503 inverse(rinv, nreq); 504 final double[] covmat = new double[nreq * (nreq + 1) / 2]; 505 Arrays.fill(covmat, Double.NaN); 506 int pos2; 507 int pos1; 508 int start = 0; 509 double total = 0; 510 for (int row = 0; row < nreq; row++) { 511 pos2 = start; 512 if (!this.lindep[row]) { 513 for (int col = row; col < nreq; col++) { 514 if (!this.lindep[col]) { 515 pos1 = start + col - row; 516 if (row == col) { 517 total = 1.0 / d[col]; 518 } else { 519 total = rinv[pos1 - 1] / d[col]; 520 } 521 for (int k = col + 1; k < nreq; k++) { 522 if (!this.lindep[k]) { 523 total += rinv[pos1] * rinv[pos2] / d[k]; 524 } 525 ++pos1; 526 ++pos2; 527 } 528 covmat[ (col + 1) * col / 2 + row] = total * var; 529 } else { 530 pos2 += nreq - col - 1; 531 } 532 } 533 } 534 start += nreq - row - 1; 535 } 536 return covmat; 537 } 538 539 /** 540 * This internal method calculates the inverse of the upper-triangular portion 541 * of the R matrix. 542 * @param rinv the storage for the inverse of r 543 * @param nreq how many of the regressors to include (either in canonical 544 * order, or in the current reordered state) 545 */ 546 private void inverse(double[] rinv, int nreq) { 547 int pos = nreq * (nreq - 1) / 2 - 1; 548 int pos1 = -1; 549 int pos2 = -1; 550 double total = 0.0; 551 Arrays.fill(rinv, Double.NaN); 552 for (int row = nreq - 1; row > 0; --row) { 553 if (!this.lindep[row]) { 554 final int start = (row - 1) * (nvars + nvars - row) / 2; 555 for (int col = nreq; col > row; --col) { 556 pos1 = start; 557 pos2 = pos; 558 total = 0.0; 559 for (int k = row; k < col - 1; k++) { 560 pos2 += nreq - k - 1; 561 if (!this.lindep[k]) { 562 total += -r[pos1] * rinv[pos2]; 563 } 564 ++pos1; 565 } 566 rinv[pos] = total - r[pos1]; 567 --pos; 568 } 569 } else { 570 pos -= nreq - row; 571 } 572 } 573 } 574 575 /** 576 * In the original algorithm only the partial correlations of the regressors 577 * is returned to the user. In this implementation, we have <pre> 578 * corr = 579 * { 580 * corrxx - lower triangular 581 * corrxy - bottom row of the matrix 582 * } 583 * Replaces subroutines PCORR and COR of: 584 * ALGORITHM AS274 APPL. STATIST. (1992) VOL.41, NO. 2 </pre> 585 * 586 * <p>Calculate partial correlations after the variables in rows 587 * 1, 2, ..., IN have been forced into the regression. 588 * If IN = 1, and the first row of R represents a constant in the 589 * model, then the usual simple correlations are returned.</p> 590 * 591 * <p>If IN = 0, the value returned in array CORMAT for the correlation 592 * of variables Xi & Xj is: <pre> 593 * sum ( Xi.Xj ) / Sqrt ( sum (Xi^2) . sum (Xj^2) )</pre></p> 594 * 595 * <p>On return, array CORMAT contains the upper triangle of the matrix of 596 * partial correlations stored by rows, excluding the 1's on the diagonal. 597 * e.g. if IN = 2, the consecutive elements returned are: 598 * (3,4) (3,5) ... (3,ncol), (4,5) (4,6) ... (4,ncol), etc. 599 * Array YCORR stores the partial correlations with the Y-variable 600 * starting with YCORR(IN+1) = partial correlation with the variable in 601 * position (IN+1). </p> 602 * 603 * @param in how many of the regressors to include (either in canonical 604 * order, or in the current reordered state) 605 * @return an array with the partial correlations of the remainder of 606 * regressors with each other and the regressand, in lower triangular form 607 */ 608 public double[] getPartialCorrelations(int in) { 609 final double[] output = new double[(nvars - in + 1) * (nvars - in) / 2]; 610 int pos; 611 int pos1; 612 int pos2; 613 final int rms_off = -in; 614 final int wrk_off = -(in + 1); 615 final double[] rms = new double[nvars - in]; 616 final double[] work = new double[nvars - in - 1]; 617 double sumxx; 618 double sumxy; 619 double sumyy; 620 final int offXX = (nvars - in) * (nvars - in - 1) / 2; 621 if (in < -1 || in >= nvars) { 622 return null; 623 } 624 final int nvm = nvars - 1; 625 final int base_pos = r.length - (nvm - in) * (nvm - in + 1) / 2; 626 if (d[in] > 0.0) { 627 rms[in + rms_off] = 1.0 / FastMath.sqrt(d[in]); 628 } 629 for (int col = in + 1; col < nvars; col++) { 630 pos = base_pos + col - 1 - in; 631 sumxx = d[col]; 632 for (int row = in; row < col; row++) { 633 sumxx += d[row] * r[pos] * r[pos]; 634 pos += nvars - row - 2; 635 } 636 if (sumxx > 0.0) { 637 rms[col + rms_off] = 1.0 / FastMath.sqrt(sumxx); 638 } else { 639 rms[col + rms_off] = 0.0; 640 } 641 } 642 sumyy = sserr; 643 for (int row = in; row < nvars; row++) { 644 sumyy += d[row] * rhs[row] * rhs[row]; 645 } 646 if (sumyy > 0.0) { 647 sumyy = 1.0 / FastMath.sqrt(sumyy); 648 } 649 pos = 0; 650 for (int col1 = in; col1 < nvars; col1++) { 651 sumxy = 0.0; 652 Arrays.fill(work, 0.0); 653 pos1 = base_pos + col1 - in - 1; 654 for (int row = in; row < col1; row++) { 655 pos2 = pos1 + 1; 656 for (int col2 = col1 + 1; col2 < nvars; col2++) { 657 work[col2 + wrk_off] += d[row] * r[pos1] * r[pos2]; 658 pos2++; 659 } 660 sumxy += d[row] * r[pos1] * rhs[row]; 661 pos1 += nvars - row - 2; 662 } 663 pos2 = pos1 + 1; 664 for (int col2 = col1 + 1; col2 < nvars; col2++) { 665 work[col2 + wrk_off] += d[col1] * r[pos2]; 666 ++pos2; 667 output[ (col2 - 1 - in) * (col2 - in) / 2 + col1 - in] = 668 work[col2 + wrk_off] * rms[col1 + rms_off] * rms[col2 + rms_off]; 669 ++pos; 670 } 671 sumxy += d[col1] * rhs[col1]; 672 output[col1 + rms_off + offXX] = sumxy * rms[col1 + rms_off] * sumyy; 673 } 674 675 return output; 676 } 677 678 /** 679 * ALGORITHM AS274 APPL. STATIST. (1992) VOL.41, NO. 2. 680 * Move variable from position FROM to position TO in an 681 * orthogonal reduction produced by AS75.1. 682 * 683 * @param from initial position 684 * @param to destination 685 */ 686 private void vmove(int from, int to) { 687 double d1; 688 double d2; 689 double X; 690 double d1new; 691 double d2new; 692 double cbar; 693 double sbar; 694 double Y; 695 int first; 696 int inc; 697 int m1; 698 int m2; 699 int mp1; 700 int pos; 701 boolean bSkipTo40 = false; 702 if (from == to) { 703 return; 704 } 705 if (!this.rss_set) { 706 ss(); 707 } 708 int count = 0; 709 if (from < to) { 710 first = from; 711 inc = 1; 712 count = to - from; 713 } else { 714 first = from - 1; 715 inc = -1; 716 count = from - to; 717 } 718 719 int m = first; 720 int idx = 0; 721 while (idx < count) { 722 m1 = m * (nvars + nvars - m - 1) / 2; 723 m2 = m1 + nvars - m - 1; 724 mp1 = m + 1; 725 726 d1 = d[m]; 727 d2 = d[mp1]; 728 // Special cases. 729 if (d1 > this.epsilon || d2 > this.epsilon) { 730 X = r[m1]; 731 if (FastMath.abs(X) * FastMath.sqrt(d1) < tol[mp1]) { 732 X = 0.0; 733 } 734 if (d1 < this.epsilon || FastMath.abs(X) < this.epsilon) { 735 d[m] = d2; 736 d[mp1] = d1; 737 r[m1] = 0.0; 738 for (int col = m + 2; col < nvars; col++) { 739 ++m1; 740 X = r[m1]; 741 r[m1] = r[m2]; 742 r[m2] = X; 743 ++m2; 744 } 745 X = rhs[m]; 746 rhs[m] = rhs[mp1]; 747 rhs[mp1] = X; 748 bSkipTo40 = true; 749 //break; 750 } else if (d2 < this.epsilon) { 751 d[m] = d1 * X * X; 752 r[m1] = 1.0 / X; 753 for (int _i = m1 + 1; _i < m1 + nvars - m - 1; _i++) { 754 r[_i] /= X; 755 } 756 rhs[m] /= X; 757 bSkipTo40 = true; 758 //break; 759 } 760 if (!bSkipTo40) { 761 d1new = d2 + d1 * X * X; 762 cbar = d2 / d1new; 763 sbar = X * d1 / d1new; 764 d2new = d1 * cbar; 765 d[m] = d1new; 766 d[mp1] = d2new; 767 r[m1] = sbar; 768 for (int col = m + 2; col < nvars; col++) { 769 ++m1; 770 Y = r[m1]; 771 r[m1] = cbar * r[m2] + sbar * Y; 772 r[m2] = Y - X * r[m2]; 773 ++m2; 774 } 775 Y = rhs[m]; 776 rhs[m] = cbar * rhs[mp1] + sbar * Y; 777 rhs[mp1] = Y - X * rhs[mp1]; 778 } 779 } 780 if (m > 0) { 781 pos = m; 782 for (int row = 0; row < m; row++) { 783 X = r[pos]; 784 r[pos] = r[pos - 1]; 785 r[pos - 1] = X; 786 pos += nvars - row - 2; 787 } 788 } 789 // Adjust variable order (VORDER), the tolerances (TOL) and 790 // the vector of residual sums of squares (RSS). 791 m1 = vorder[m]; 792 vorder[m] = vorder[mp1]; 793 vorder[mp1] = m1; 794 X = tol[m]; 795 tol[m] = tol[mp1]; 796 tol[mp1] = X; 797 rss[m] = rss[mp1] + d[mp1] * rhs[mp1] * rhs[mp1]; 798 799 m += inc; 800 ++idx; 801 } 802 } 803 804 /** 805 * ALGORITHM AS274 APPL. STATIST. (1992) VOL.41, NO. 2 806 * 807 * <p> Re-order the variables in an orthogonal reduction produced by 808 * AS75.1 so that the N variables in LIST start at position POS1, 809 * though will not necessarily be in the same order as in LIST. 810 * Any variables in VORDER before position POS1 are not moved. 811 * Auxiliary routine called: VMOVE. </p> 812 * 813 * <p>This internal method reorders the regressors.</p> 814 * 815 * @param list the regressors to move 816 * @param pos1 where the list will be placed 817 * @return -1 error, 0 everything ok 818 */ 819 private int reorderRegressors(int[] list, int pos1) { 820 int next; 821 int i; 822 int l; 823 if (list.length < 1 || list.length > nvars + 1 - pos1) { 824 return -1; 825 } 826 next = pos1; 827 i = pos1; 828 while (i < nvars) { 829 l = vorder[i]; 830 for (int j = 0; j < list.length; j++) { 831 if (l == list[j] && i > next) { 832 this.vmove(i, next); 833 ++next; 834 if (next >= list.length + pos1) { 835 return 0; 836 } else { 837 break; 838 } 839 } 840 } 841 ++i; 842 } 843 return 0; 844 } 845 846 /** 847 * Gets the diagonal of the Hat matrix also known as the leverage matrix. 848 * 849 * @param row_data returns the diagonal of the hat matrix for this observation 850 * @return the diagonal element of the hatmatrix 851 */ 852 public double getDiagonalOfHatMatrix(double[] row_data) { 853 double[] wk = new double[this.nvars]; 854 int pos; 855 double total; 856 857 if (row_data.length > nvars) { 858 return Double.NaN; 859 } 860 double[] xrow; 861 if (this.hasIntercept) { 862 xrow = new double[row_data.length + 1]; 863 xrow[0] = 1.0; 864 System.arraycopy(row_data, 0, xrow, 1, row_data.length); 865 } else { 866 xrow = row_data; 867 } 868 double hii = 0.0; 869 for (int col = 0; col < xrow.length; col++) { 870 if (FastMath.sqrt(d[col]) < tol[col]) { 871 wk[col] = 0.0; 872 } else { 873 pos = col - 1; 874 total = xrow[col]; 875 for (int row = 0; row < col; row++) { 876 total = smartAdd(total, -wk[row] * r[pos]); 877 pos += nvars - row - 2; 878 } 879 wk[col] = total; 880 hii = smartAdd(hii, (total * total) / d[col]); 881 } 882 } 883 return hii; 884 } 885 886 /** 887 * Gets the order of the regressors, useful if some type of reordering 888 * has been called. Calling regress with int[]{} args will trigger 889 * a reordering. 890 * 891 * @return int[] with the current order of the regressors 892 */ 893 public int[] getOrderOfRegressors(){ 894 return MathArrays.copyOf(vorder); 895 } 896 897 /** 898 * Conducts a regression on the data in the model, using all regressors. 899 * 900 * @return RegressionResults the structure holding all regression results 901 * @exception ModelSpecificationException - thrown if number of observations is 902 * less than the number of variables 903 */ 904 public RegressionResults regress() throws ModelSpecificationException { 905 return regress(this.nvars); 906 } 907 908 /** 909 * Conducts a regression on the data in the model, using a subset of regressors. 910 * 911 * @param numberOfRegressors many of the regressors to include (either in canonical 912 * order, or in the current reordered state) 913 * @return RegressionResults the structure holding all regression results 914 * @exception ModelSpecificationException - thrown if number of observations is 915 * less than the number of variables or number of regressors requested 916 * is greater than the regressors in the model 917 */ 918 public RegressionResults regress(int numberOfRegressors) throws ModelSpecificationException { 919 if (this.nobs <= numberOfRegressors) { 920 throw new ModelSpecificationException( 921 LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS, 922 this.nobs, numberOfRegressors); 923 } 924 if( numberOfRegressors > this.nvars ){ 925 throw new ModelSpecificationException( 926 LocalizedFormats.TOO_MANY_REGRESSORS, numberOfRegressors, this.nvars); 927 } 928 929 tolset(); 930 singcheck(); 931 932 double[] beta = this.regcf(numberOfRegressors); 933 934 ss(); 935 936 double[] cov = this.cov(numberOfRegressors); 937 938 int rnk = 0; 939 for (int i = 0; i < this.lindep.length; i++) { 940 if (!this.lindep[i]) { 941 ++rnk; 942 } 943 } 944 945 boolean needsReorder = false; 946 for (int i = 0; i < numberOfRegressors; i++) { 947 if (this.vorder[i] != i) { 948 needsReorder = true; 949 break; 950 } 951 } 952 if (!needsReorder) { 953 return new RegressionResults( 954 beta, new double[][]{cov}, true, this.nobs, rnk, 955 this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false); 956 } else { 957 double[] betaNew = new double[beta.length]; 958 double[] covNew = new double[cov.length]; 959 960 int[] newIndices = new int[beta.length]; 961 for (int i = 0; i < nvars; i++) { 962 for (int j = 0; j < numberOfRegressors; j++) { 963 if (this.vorder[j] == i) { 964 betaNew[i] = beta[ j]; 965 newIndices[i] = j; 966 } 967 } 968 } 969 970 int idx1 = 0; 971 int idx2; 972 int _i; 973 int _j; 974 for (int i = 0; i < beta.length; i++) { 975 _i = newIndices[i]; 976 for (int j = 0; j <= i; j++, idx1++) { 977 _j = newIndices[j]; 978 if (_i > _j) { 979 idx2 = _i * (_i + 1) / 2 + _j; 980 } else { 981 idx2 = _j * (_j + 1) / 2 + _i; 982 } 983 covNew[idx1] = cov[idx2]; 984 } 985 } 986 return new RegressionResults( 987 betaNew, new double[][]{covNew}, true, this.nobs, rnk, 988 this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false); 989 } 990 } 991 992 /** 993 * Conducts a regression on the data in the model, using regressors in array 994 * Calling this method will change the internal order of the regressors 995 * and care is required in interpreting the hatmatrix. 996 * 997 * @param variablesToInclude array of variables to include in regression 998 * @return RegressionResults the structure holding all regression results 999 * @exception ModelSpecificationException - thrown if number of observations is 1000 * less than the number of variables, the number of regressors requested 1001 * is greater than the regressors in the model or a regressor index in 1002 * regressor array does not exist 1003 */ 1004 public RegressionResults regress(int[] variablesToInclude) throws ModelSpecificationException { 1005 if (variablesToInclude.length > this.nvars) { 1006 throw new ModelSpecificationException( 1007 LocalizedFormats.TOO_MANY_REGRESSORS, variablesToInclude.length, this.nvars); 1008 } 1009 if (this.nobs <= this.nvars) { 1010 throw new ModelSpecificationException( 1011 LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS, 1012 this.nobs, this.nvars); 1013 } 1014 Arrays.sort(variablesToInclude); 1015 int iExclude = 0; 1016 for (int i = 0; i < variablesToInclude.length; i++) { 1017 if (i >= this.nvars) { 1018 throw new ModelSpecificationException( 1019 LocalizedFormats.INDEX_LARGER_THAN_MAX, i, this.nvars); 1020 } 1021 if (i > 0 && variablesToInclude[i] == variablesToInclude[i - 1]) { 1022 variablesToInclude[i] = -1; 1023 ++iExclude; 1024 } 1025 } 1026 int[] series; 1027 if (iExclude > 0) { 1028 int j = 0; 1029 series = new int[variablesToInclude.length - iExclude]; 1030 for (int i = 0; i < variablesToInclude.length; i++) { 1031 if (variablesToInclude[i] > -1) { 1032 series[j] = variablesToInclude[i]; 1033 ++j; 1034 } 1035 } 1036 } else { 1037 series = variablesToInclude; 1038 } 1039 1040 reorderRegressors(series, 0); 1041 tolset(); 1042 singcheck(); 1043 1044 double[] beta = this.regcf(series.length); 1045 1046 ss(); 1047 1048 double[] cov = this.cov(series.length); 1049 1050 int rnk = 0; 1051 for (int i = 0; i < this.lindep.length; i++) { 1052 if (!this.lindep[i]) { 1053 ++rnk; 1054 } 1055 } 1056 1057 boolean needsReorder = false; 1058 for (int i = 0; i < this.nvars; i++) { 1059 if (this.vorder[i] != series[i]) { 1060 needsReorder = true; 1061 break; 1062 } 1063 } 1064 if (!needsReorder) { 1065 return new RegressionResults( 1066 beta, new double[][]{cov}, true, this.nobs, rnk, 1067 this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false); 1068 } else { 1069 double[] betaNew = new double[beta.length]; 1070 int[] newIndices = new int[beta.length]; 1071 for (int i = 0; i < series.length; i++) { 1072 for (int j = 0; j < this.vorder.length; j++) { 1073 if (this.vorder[j] == series[i]) { 1074 betaNew[i] = beta[ j]; 1075 newIndices[i] = j; 1076 } 1077 } 1078 } 1079 double[] covNew = new double[cov.length]; 1080 int idx1 = 0; 1081 int idx2; 1082 int _i; 1083 int _j; 1084 for (int i = 0; i < beta.length; i++) { 1085 _i = newIndices[i]; 1086 for (int j = 0; j <= i; j++, idx1++) { 1087 _j = newIndices[j]; 1088 if (_i > _j) { 1089 idx2 = _i * (_i + 1) / 2 + _j; 1090 } else { 1091 idx2 = _j * (_j + 1) / 2 + _i; 1092 } 1093 covNew[idx1] = cov[idx2]; 1094 } 1095 } 1096 return new RegressionResults( 1097 betaNew, new double[][]{covNew}, true, this.nobs, rnk, 1098 this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false); 1099 } 1100 } 1101}