001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.math4.legacy.stat.regression; 018 019import java.util.Arrays; 020 021import org.apache.commons.math4.legacy.exception.util.LocalizedFormats; 022import org.apache.commons.math4.core.jdkmath.JdkMath; 023import org.apache.commons.numbers.core.Precision; 024 025/** 026 * This class is a concrete implementation of the {@link UpdatingMultipleLinearRegression} interface. 027 * 028 * <p>The algorithm is described in: <pre> 029 * Algorithm AS 274: Least Squares Routines to Supplement Those of Gentleman 030 * Author(s): Alan J. Miller 031 * Source: Journal of the Royal Statistical Society. 032 * Series C (Applied Statistics), Vol. 41, No. 2 033 * (1992), pp. 458-478 034 * Published by: Blackwell Publishing for the Royal Statistical Society 035 * Stable URL: http://www.jstor.org/stable/2347583 </pre> 036 * 037 * <p>This method for multiple regression forms the solution to the OLS problem 038 * by updating the QR decomposition as described by Gentleman.</p> 039 * 040 * @since 3.0 041 */ 042public class MillerUpdatingRegression implements UpdatingMultipleLinearRegression { 043 044 /** number of variables in regression. */ 045 private final int nvars; 046 /** diagonals of cross products matrix. */ 047 private final double[] d; 048 /** the elements of the R`Y. */ 049 private final double[] rhs; 050 /** the off diagonal portion of the R matrix. */ 051 private final double[] r; 052 /** the tolerance for each of the variables. */ 053 private final double[] tol; 054 /** residual sum of squares for all nested regressions. */ 055 private final double[] rss; 056 /** order of the regressors. */ 057 private final int[] vorder; 058 /** scratch space for tolerance calc. */ 059 private final double[] workTolset; 060 /** number of observations entered. */ 061 private long nobs; 062 /** sum of squared errors of largest regression. */ 063 private double sserr; 064 /** has rss been called? */ 065 private boolean rssSet; 066 /** has the tolerance setting method been called. */ 067 private boolean tolSet; 068 /** flags for variables with linear dependency problems. */ 069 private final boolean[] lindep; 070 /** singular x values. */ 071 private final double[] xSing; 072 /** workspace for singularity method. */ 073 private final double[] workSing; 074 /** summation of Y variable. */ 075 private double sumy; 076 /** summation of squared Y values. */ 077 private double sumsqy; 078 /** boolean flag whether a regression constant is added. */ 079 private final boolean hasIntercept; 080 /** zero tolerance. */ 081 private final double epsilon; 082 /** 083 * Set the default constructor to private access. 084 * to prevent inadvertent instantiation 085 */ 086 @SuppressWarnings("unused") 087 private MillerUpdatingRegression() { 088 this(-1, false, Double.NaN); 089 } 090 091 /** 092 * This is the augmented constructor for the MillerUpdatingRegression class. 093 * 094 * @param numberOfVariables number of regressors to expect, not including constant 095 * @param includeConstant include a constant automatically 096 * @param errorTolerance zero tolerance, how machine zero is determined 097 * @throws ModelSpecificationException if {@code numberOfVariables is less than 1} 098 */ 099 public MillerUpdatingRegression(int numberOfVariables, boolean includeConstant, double errorTolerance) 100 throws ModelSpecificationException { 101 if (numberOfVariables < 1) { 102 throw new ModelSpecificationException(LocalizedFormats.NO_REGRESSORS); 103 } 104 if (includeConstant) { 105 this.nvars = numberOfVariables + 1; 106 } else { 107 this.nvars = numberOfVariables; 108 } 109 this.hasIntercept = includeConstant; 110 this.nobs = 0; 111 this.d = new double[this.nvars]; 112 this.rhs = new double[this.nvars]; 113 this.r = new double[this.nvars * (this.nvars - 1) / 2]; 114 this.tol = new double[this.nvars]; 115 this.rss = new double[this.nvars]; 116 this.vorder = new int[this.nvars]; 117 this.xSing = new double[this.nvars]; 118 this.workSing = new double[this.nvars]; 119 this.workTolset = new double[this.nvars]; 120 this.lindep = new boolean[this.nvars]; 121 for (int i = 0; i < this.nvars; i++) { 122 vorder[i] = i; 123 } 124 if (errorTolerance > 0) { 125 this.epsilon = errorTolerance; 126 } else { 127 this.epsilon = -errorTolerance; 128 } 129 } 130 131 /** 132 * Primary constructor for the MillerUpdatingRegression. 133 * 134 * @param numberOfVariables maximum number of potential regressors 135 * @param includeConstant include a constant automatically 136 * @throws ModelSpecificationException if {@code numberOfVariables is less than 1} 137 */ 138 public MillerUpdatingRegression(int numberOfVariables, boolean includeConstant) 139 throws ModelSpecificationException { 140 this(numberOfVariables, includeConstant, Precision.EPSILON); 141 } 142 143 /** 144 * A getter method which determines whether a constant is included. 145 * @return true regression has an intercept, false no intercept 146 */ 147 @Override 148 public boolean hasIntercept() { 149 return this.hasIntercept; 150 } 151 152 /** 153 * Gets the number of observations added to the regression model. 154 * @return number of observations 155 */ 156 @Override 157 public long getN() { 158 return this.nobs; 159 } 160 161 /** 162 * Adds an observation to the regression model. 163 * @param x the array with regressor values 164 * @param y the value of dependent variable given these regressors 165 * @exception ModelSpecificationException if the length of {@code x} does not equal 166 * the number of independent variables in the model 167 */ 168 @Override 169 public void addObservation(final double[] x, final double y) 170 throws ModelSpecificationException { 171 172 if ((!this.hasIntercept && x.length != nvars) || 173 (this.hasIntercept && x.length + 1 != nvars)) { 174 throw new ModelSpecificationException(LocalizedFormats.INVALID_REGRESSION_OBSERVATION, 175 x.length, nvars); 176 } 177 if (!this.hasIntercept) { 178 include(Arrays.copyOf(x, x.length), 1.0, y); 179 } else { 180 final double[] tmp = new double[x.length + 1]; 181 System.arraycopy(x, 0, tmp, 1, x.length); 182 tmp[0] = 1.0; 183 include(tmp, 1.0, y); 184 } 185 ++nobs; 186 } 187 188 /** 189 * Adds multiple observations to the model. 190 * @param x observations on the regressors 191 * @param y observations on the regressand 192 * @throws ModelSpecificationException if {@code x} is not rectangular, does not match 193 * the length of {@code y} or does not contain sufficient data to estimate the model 194 */ 195 @Override 196 public void addObservations(double[][] x, double[] y) throws ModelSpecificationException { 197 if (x == null || y == null || x.length != y.length) { 198 throw new ModelSpecificationException( 199 LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE, 200 (x == null) ? 0 : x.length, 201 (y == null) ? 0 : y.length); 202 } 203 if (x.length == 0) { // Must be no y data either 204 throw new ModelSpecificationException( 205 LocalizedFormats.NO_DATA); 206 } 207 if (x[0].length + 1 > x.length) { 208 throw new ModelSpecificationException( 209 LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS, 210 x.length, x[0].length); 211 } 212 for (int i = 0; i < x.length; i++) { 213 addObservation(x[i], y[i]); 214 } 215 } 216 217 /** 218 * The include method is where the QR decomposition occurs. This statement forms all 219 * intermediate data which will be used for all derivative measures. 220 * According to the miller paper, note that in the original implementation the x vector 221 * is overwritten. In this implementation, the include method is passed a copy of the 222 * original data vector so that there is no contamination of the data. Additionally, 223 * this method differs slightly from Gentleman's method, in that the assumption is 224 * of dense design matrices, there is some advantage in using the original gentleman algorithm 225 * on sparse matrices. 226 * 227 * @param x observations on the regressors 228 * @param wi weight of the this observation (-1,1) 229 * @param yi observation on the regressand 230 */ 231 private void include(final double[] x, final double wi, final double yi) { 232 int nextr = 0; 233 double w = wi; 234 double y = yi; 235 double xi; 236 double di; 237 double wxi; 238 double dpi; 239 double xk; 240 double wPrev; 241 this.rssSet = false; 242 sumy = smartAdd(yi, sumy); 243 sumsqy = smartAdd(sumsqy, yi * yi); 244 for (int i = 0; i < x.length; i++) { 245 if (w == 0.0) { 246 return; 247 } 248 xi = x[i]; 249 250 if (xi == 0.0) { 251 nextr += nvars - i - 1; 252 continue; 253 } 254 di = d[i]; 255 wxi = w * xi; 256 wPrev = w; 257 if (di != 0.0) { 258 dpi = smartAdd(di, wxi * xi); 259 final double tmp = wxi * xi / di; 260 if (JdkMath.abs(tmp) > Precision.EPSILON) { 261 w = (di * w) / dpi; 262 } 263 } else { 264 dpi = wxi * xi; 265 w = 0.0; 266 } 267 d[i] = dpi; 268 for (int k = i + 1; k < nvars; k++) { 269 xk = x[k]; 270 x[k] = smartAdd(xk, -xi * r[nextr]); 271 if (di != 0.0) { 272 r[nextr] = smartAdd(di * r[nextr], (wPrev * xi) * xk) / dpi; 273 } else { 274 r[nextr] = xk / xi; 275 } 276 ++nextr; 277 } 278 xk = y; 279 y = smartAdd(xk, -xi * rhs[i]); 280 if (di != 0.0) { 281 rhs[i] = smartAdd(di * rhs[i], wxi * xk) / dpi; 282 } else { 283 rhs[i] = xk / xi; 284 } 285 } 286 sserr = smartAdd(sserr, w * y * y); 287 } 288 289 /** 290 * Adds to number a and b such that the contamination due to 291 * numerical smallness of one addend does not corrupt the sum. 292 * @param a - an addend 293 * @param b - an addend 294 * @return the sum of the a and b 295 */ 296 private double smartAdd(double a, double b) { 297 final double aa = JdkMath.abs(a); 298 final double ba = JdkMath.abs(b); 299 if (aa > ba) { 300 final double eps = aa * Precision.EPSILON; 301 if (ba > eps) { 302 return a + b; 303 } 304 return a; 305 } else { 306 final double eps = ba * Precision.EPSILON; 307 if (aa > eps) { 308 return a + b; 309 } 310 return b; 311 } 312 } 313 314 /** 315 * As the name suggests, clear wipes the internals and reorders everything in the 316 * canonical order. 317 */ 318 @Override 319 public void clear() { 320 Arrays.fill(this.d, 0.0); 321 Arrays.fill(this.rhs, 0.0); 322 Arrays.fill(this.r, 0.0); 323 Arrays.fill(this.tol, 0.0); 324 Arrays.fill(this.rss, 0.0); 325 Arrays.fill(this.workTolset, 0.0); 326 Arrays.fill(this.workSing, 0.0); 327 Arrays.fill(this.xSing, 0.0); 328 Arrays.fill(this.lindep, false); 329 for (int i = 0; i < nvars; i++) { 330 this.vorder[i] = i; 331 } 332 this.nobs = 0; 333 this.sserr = 0.0; 334 this.sumy = 0.0; 335 this.sumsqy = 0.0; 336 this.rssSet = false; 337 this.tolSet = false; 338 } 339 340 /** 341 * This sets up tolerances for singularity testing. 342 */ 343 private void tolset() { 344 int pos; 345 double total; 346 final double eps = this.epsilon; 347 for (int i = 0; i < nvars; i++) { 348 this.workTolset[i] = JdkMath.sqrt(d[i]); 349 } 350 tol[0] = eps * this.workTolset[0]; 351 for (int col = 1; col < nvars; col++) { 352 pos = col - 1; 353 total = workTolset[col]; 354 for (int row = 0; row < col; row++) { 355 total += JdkMath.abs(r[pos]) * workTolset[row]; 356 pos += nvars - row - 2; 357 } 358 tol[col] = eps * total; 359 } 360 tolSet = true; 361 } 362 363 /** 364 * The regcf method conducts the linear regression and extracts the 365 * parameter vector. Notice that the algorithm can do subset regression 366 * with no alteration. 367 * 368 * @param nreq how many of the regressors to include (either in canonical 369 * order, or in the current reordered state) 370 * @return an array with the estimated slope coefficients 371 * @throws ModelSpecificationException if {@code nreq} is less than 1 372 * or greater than the number of independent variables 373 */ 374 private double[] regcf(int nreq) throws ModelSpecificationException { 375 int nextr; 376 if (nreq < 1) { 377 throw new ModelSpecificationException(LocalizedFormats.NO_REGRESSORS); 378 } 379 if (nreq > this.nvars) { 380 throw new ModelSpecificationException( 381 LocalizedFormats.TOO_MANY_REGRESSORS, nreq, this.nvars); 382 } 383 if (!this.tolSet) { 384 tolset(); 385 } 386 final double[] ret = new double[nreq]; 387 boolean rankProblem = false; 388 for (int i = nreq - 1; i > -1; i--) { 389 if (JdkMath.sqrt(d[i]) < tol[i]) { 390 ret[i] = 0.0; 391 d[i] = 0.0; 392 rankProblem = true; 393 } else { 394 ret[i] = rhs[i]; 395 nextr = i * (nvars + nvars - i - 1) / 2; 396 for (int j = i + 1; j < nreq; j++) { 397 ret[i] = smartAdd(ret[i], -r[nextr] * ret[j]); 398 ++nextr; 399 } 400 } 401 } 402 if (rankProblem) { 403 for (int i = 0; i < nreq; i++) { 404 if (this.lindep[i]) { 405 ret[i] = Double.NaN; 406 } 407 } 408 } 409 return ret; 410 } 411 412 /** 413 * The method which checks for singularities and then eliminates the offending 414 * columns. 415 */ 416 private void singcheck() { 417 int pos; 418 for (int i = 0; i < nvars; i++) { 419 workSing[i] = JdkMath.sqrt(d[i]); 420 } 421 for (int col = 0; col < nvars; col++) { 422 // Set elements within R to zero if they are less than tol(col) in 423 // absolute value after being scaled by the square root of their row 424 // multiplier 425 final double temp = tol[col]; 426 pos = col - 1; 427 for (int row = 0; row < col - 1; row++) { 428 if (JdkMath.abs(r[pos]) * workSing[row] < temp) { 429 r[pos] = 0.0; 430 } 431 pos += nvars - row - 2; 432 } 433 // If diagonal element is near zero, set it to zero, set appropriate 434 // element of LINDEP, and use INCLUD to augment the projections in 435 // the lower rows of the orthogonalization. 436 lindep[col] = false; 437 if (workSing[col] < temp) { 438 lindep[col] = true; 439 if (col < nvars - 1) { 440 Arrays.fill(xSing, 0.0); 441 int pi = col * (nvars + nvars - col - 1) / 2; 442 for (int xi = col + 1; xi < nvars; xi++, pi++) { 443 xSing[xi] = r[pi]; 444 r[pi] = 0.0; 445 } 446 final double y = rhs[col]; 447 final double weight = d[col]; 448 d[col] = 0.0; 449 rhs[col] = 0.0; 450 this.include(xSing, weight, y); 451 } else { 452 sserr += d[col] * rhs[col] * rhs[col]; 453 } 454 } 455 } 456 } 457 458 /** 459 * Calculates the sum of squared errors for the full regression. 460 * and all subsets in the following manner: <pre> 461 * rss[] ={ 462 * ResidualSumOfSquares_allNvars, 463 * ResidualSumOfSquares_FirstNvars-1, 464 * ResidualSumOfSquares_FirstNvars-2, 465 * ..., ResidualSumOfSquares_FirstVariable} </pre> 466 */ 467 private void ss() { 468 double total = sserr; 469 rss[nvars - 1] = sserr; 470 for (int i = nvars - 1; i > 0; i--) { 471 total += d[i] * rhs[i] * rhs[i]; 472 rss[i - 1] = total; 473 } 474 rssSet = true; 475 } 476 477 /** 478 * Calculates the cov matrix assuming only the first nreq variables are 479 * included in the calculation. The returned array contains a symmetric 480 * matrix stored in lower triangular form. The matrix will have 481 * ( nreq + 1 ) * nreq / 2 elements. For illustration <pre> 482 * cov = 483 * { 484 * cov_00, 485 * cov_10, cov_11, 486 * cov_20, cov_21, cov22, 487 * ... 488 * } </pre> 489 * 490 * @param nreq how many of the regressors to include (either in canonical 491 * order, or in the current reordered state) 492 * @return an array with the variance covariance of the included 493 * regressors in lower triangular form 494 */ 495 private double[] cov(int nreq) { 496 if (this.nobs <= nreq) { 497 return null; 498 } 499 double rnk = 0.0; 500 for (int i = 0; i < nreq; i++) { 501 if (!this.lindep[i]) { 502 rnk += 1.0; 503 } 504 } 505 final double var = rss[nreq - 1] / (nobs - rnk); 506 final double[] rinv = new double[nreq * (nreq - 1) / 2]; 507 inverse(rinv, nreq); 508 final double[] covmat = new double[nreq * (nreq + 1) / 2]; 509 Arrays.fill(covmat, Double.NaN); 510 int pos2; 511 int pos1; 512 int start = 0; 513 double total = 0; 514 for (int row = 0; row < nreq; row++) { 515 pos2 = start; 516 if (!this.lindep[row]) { 517 for (int col = row; col < nreq; col++) { 518 if (!this.lindep[col]) { 519 pos1 = start + col - row; 520 if (row == col) { 521 total = 1.0 / d[col]; 522 } else { 523 total = rinv[pos1 - 1] / d[col]; 524 } 525 for (int k = col + 1; k < nreq; k++) { 526 if (!this.lindep[k]) { 527 total += rinv[pos1] * rinv[pos2] / d[k]; 528 } 529 ++pos1; 530 ++pos2; 531 } 532 covmat[ (col + 1) * col / 2 + row] = total * var; 533 } else { 534 pos2 += nreq - col - 1; 535 } 536 } 537 } 538 start += nreq - row - 1; 539 } 540 return covmat; 541 } 542 543 /** 544 * This internal method calculates the inverse of the upper-triangular portion 545 * of the R matrix. 546 * @param rinv the storage for the inverse of r 547 * @param nreq how many of the regressors to include (either in canonical 548 * order, or in the current reordered state) 549 */ 550 private void inverse(double[] rinv, int nreq) { 551 int pos = nreq * (nreq - 1) / 2 - 1; 552 int pos1 = -1; 553 int pos2 = -1; 554 double total = 0.0; 555 Arrays.fill(rinv, Double.NaN); 556 for (int row = nreq - 1; row > 0; --row) { 557 if (!this.lindep[row]) { 558 final int start = (row - 1) * (nvars + nvars - row) / 2; 559 for (int col = nreq; col > row; --col) { 560 pos1 = start; 561 pos2 = pos; 562 total = 0.0; 563 for (int k = row; k < col - 1; k++) { 564 pos2 += nreq - k - 1; 565 if (!this.lindep[k]) { 566 total += -r[pos1] * rinv[pos2]; 567 } 568 ++pos1; 569 } 570 rinv[pos] = total - r[pos1]; 571 --pos; 572 } 573 } else { 574 pos -= nreq - row; 575 } 576 } 577 } 578 579 /** 580 * In the original algorithm only the partial correlations of the regressors 581 * is returned to the user. In this implementation, we have <pre> 582 * corr = 583 * { 584 * corrxx - lower triangular 585 * corrxy - bottom row of the matrix 586 * } 587 * Replaces subroutines PCORR and COR of: 588 * ALGORITHM AS274 APPL. STATIST. (1992) VOL.41, NO. 2 </pre> 589 * 590 * <p>Calculate partial correlations after the variables in rows 591 * 1, 2, ..., IN have been forced into the regression. 592 * If IN = 1, and the first row of R represents a constant in the 593 * model, then the usual simple correlations are returned.</p> 594 * 595 * <p>If IN = 0, the value returned in array CORMAT for the correlation 596 * of variables Xi & Xj is: <pre> 597 * sum ( Xi.Xj ) / Sqrt ( sum (Xi^2) . sum (Xj^2) )</pre> 598 * 599 * <p>On return, array CORMAT contains the upper triangle of the matrix of 600 * partial correlations stored by rows, excluding the 1's on the diagonal. 601 * e.g. if IN = 2, the consecutive elements returned are: 602 * (3,4) (3,5) ... (3,ncol), (4,5) (4,6) ... (4,ncol), etc. 603 * Array YCORR stores the partial correlations with the Y-variable 604 * starting with YCORR(IN+1) = partial correlation with the variable in 605 * position (IN+1). </p> 606 * 607 * @param in how many of the regressors to include (either in canonical 608 * order, or in the current reordered state) 609 * @return an array with the partial correlations of the remainder of 610 * regressors with each other and the regressand, in lower triangular form 611 */ 612 public double[] getPartialCorrelations(int in) { 613 final double[] output = new double[(nvars - in + 1) * (nvars - in) / 2]; 614 int pos; 615 int pos1; 616 int pos2; 617 final int rmsOff = -in; 618 final int wrkOff = -(in + 1); 619 final double[] rms = new double[nvars - in]; 620 final double[] work = new double[nvars - in - 1]; 621 double sumxx; 622 double sumxy; 623 double sumyy; 624 final int offXX = (nvars - in) * (nvars - in - 1) / 2; 625 if (in < -1 || in >= nvars) { 626 return null; 627 } 628 final int nvm = nvars - 1; 629 final int basePos = r.length - (nvm - in) * (nvm - in + 1) / 2; 630 if (d[in] > 0.0) { 631 rms[in + rmsOff] = 1.0 / JdkMath.sqrt(d[in]); 632 } 633 for (int col = in + 1; col < nvars; col++) { 634 pos = basePos + col - 1 - in; 635 sumxx = d[col]; 636 for (int row = in; row < col; row++) { 637 sumxx += d[row] * r[pos] * r[pos]; 638 pos += nvars - row - 2; 639 } 640 if (sumxx > 0.0) { 641 rms[col + rmsOff] = 1.0 / JdkMath.sqrt(sumxx); 642 } else { 643 rms[col + rmsOff] = 0.0; 644 } 645 } 646 sumyy = sserr; 647 for (int row = in; row < nvars; row++) { 648 sumyy += d[row] * rhs[row] * rhs[row]; 649 } 650 if (sumyy > 0.0) { 651 sumyy = 1.0 / JdkMath.sqrt(sumyy); 652 } 653 pos = 0; 654 for (int col1 = in; col1 < nvars; col1++) { 655 sumxy = 0.0; 656 Arrays.fill(work, 0.0); 657 pos1 = basePos + col1 - in - 1; 658 for (int row = in; row < col1; row++) { 659 pos2 = pos1 + 1; 660 for (int col2 = col1 + 1; col2 < nvars; col2++) { 661 work[col2 + wrkOff] += d[row] * r[pos1] * r[pos2]; 662 pos2++; 663 } 664 sumxy += d[row] * r[pos1] * rhs[row]; 665 pos1 += nvars - row - 2; 666 } 667 pos2 = pos1 + 1; 668 for (int col2 = col1 + 1; col2 < nvars; col2++) { 669 work[col2 + wrkOff] += d[col1] * r[pos2]; 670 ++pos2; 671 output[ (col2 - 1 - in) * (col2 - in) / 2 + col1 - in] = 672 work[col2 + wrkOff] * rms[col1 + rmsOff] * rms[col2 + rmsOff]; 673 ++pos; 674 } 675 sumxy += d[col1] * rhs[col1]; 676 output[col1 + rmsOff + offXX] = sumxy * rms[col1 + rmsOff] * sumyy; 677 } 678 679 return output; 680 } 681 682 /** 683 * ALGORITHM AS274 APPL. STATIST. (1992) VOL.41, NO. 2. 684 * Move variable from position FROM to position TO in an 685 * orthogonal reduction produced by AS75.1. 686 * 687 * @param from initial position 688 * @param to destination 689 */ 690 private void vmove(int from, int to) { 691 double d1; 692 double d2; 693 double x; 694 double d1new; 695 double d2new; 696 double cbar; 697 double sbar; 698 double y; 699 int first; 700 int inc; 701 int m1; 702 int m2; 703 int mp1; 704 int pos; 705 boolean bSkipTo40 = false; 706 if (from == to) { 707 return; 708 } 709 if (!this.rssSet) { 710 ss(); 711 } 712 int count = 0; 713 if (from < to) { 714 first = from; 715 inc = 1; 716 count = to - from; 717 } else { 718 first = from - 1; 719 inc = -1; 720 count = from - to; 721 } 722 723 int m = first; 724 int idx = 0; 725 while (idx < count) { 726 m1 = m * (nvars + nvars - m - 1) / 2; 727 m2 = m1 + nvars - m - 1; 728 mp1 = m + 1; 729 730 d1 = d[m]; 731 d2 = d[mp1]; 732 // Special cases. 733 if (d1 > this.epsilon || d2 > this.epsilon) { 734 x = r[m1]; 735 if (JdkMath.abs(x) * JdkMath.sqrt(d1) < tol[mp1]) { 736 x = 0.0; 737 } 738 if (d1 < this.epsilon || JdkMath.abs(x) < this.epsilon) { 739 d[m] = d2; 740 d[mp1] = d1; 741 r[m1] = 0.0; 742 for (int col = m + 2; col < nvars; col++) { 743 ++m1; 744 x = r[m1]; 745 r[m1] = r[m2]; 746 r[m2] = x; 747 ++m2; 748 } 749 x = rhs[m]; 750 rhs[m] = rhs[mp1]; 751 rhs[mp1] = x; 752 bSkipTo40 = true; 753 //break; 754 } else if (d2 < this.epsilon) { 755 d[m] = d1 * x * x; 756 r[m1] = 1.0 / x; 757 for (int i = m1 + 1; i < m1 + nvars - m - 1; i++) { 758 r[i] /= x; 759 } 760 rhs[m] /= x; 761 bSkipTo40 = true; 762 //break; 763 } 764 if (!bSkipTo40) { 765 d1new = d2 + d1 * x * x; 766 cbar = d2 / d1new; 767 sbar = x * d1 / d1new; 768 d2new = d1 * cbar; 769 d[m] = d1new; 770 d[mp1] = d2new; 771 r[m1] = sbar; 772 for (int col = m + 2; col < nvars; col++) { 773 ++m1; 774 y = r[m1]; 775 r[m1] = cbar * r[m2] + sbar * y; 776 r[m2] = y - x * r[m2]; 777 ++m2; 778 } 779 y = rhs[m]; 780 rhs[m] = cbar * rhs[mp1] + sbar * y; 781 rhs[mp1] = y - x * rhs[mp1]; 782 } 783 } 784 if (m > 0) { 785 pos = m; 786 for (int row = 0; row < m; row++) { 787 x = r[pos]; 788 r[pos] = r[pos - 1]; 789 r[pos - 1] = x; 790 pos += nvars - row - 2; 791 } 792 } 793 // Adjust variable order (VORDER), the tolerances (TOL) and 794 // the vector of residual sums of squares (RSS). 795 m1 = vorder[m]; 796 vorder[m] = vorder[mp1]; 797 vorder[mp1] = m1; 798 x = tol[m]; 799 tol[m] = tol[mp1]; 800 tol[mp1] = x; 801 rss[m] = rss[mp1] + d[mp1] * rhs[mp1] * rhs[mp1]; 802 803 m += inc; 804 ++idx; 805 } 806 } 807 808 /** 809 * ALGORITHM AS274 APPL. STATIST. (1992) VOL.41, NO. 2 810 * 811 * <p> Re-order the variables in an orthogonal reduction produced by 812 * AS75.1 so that the N variables in LIST start at position POS1, 813 * though will not necessarily be in the same order as in LIST. 814 * Any variables in VORDER before position POS1 are not moved. 815 * Auxiliary routine called: VMOVE. </p> 816 * 817 * <p>This internal method reorders the regressors.</p> 818 * 819 * @param list the regressors to move 820 * @param pos1 where the list will be placed 821 * @return -1 error, 0 everything ok 822 */ 823 private int reorderRegressors(int[] list, int pos1) { 824 int next; 825 int i; 826 int l; 827 if (list.length < 1 || list.length > nvars + 1 - pos1) { 828 return -1; 829 } 830 next = pos1; 831 i = pos1; 832 while (i < nvars) { 833 l = vorder[i]; 834 for (int j = 0; j < list.length; j++) { 835 if (l == list[j] && i > next) { 836 this.vmove(i, next); 837 ++next; 838 if (next >= list.length + pos1) { 839 return 0; 840 } else { 841 break; 842 } 843 } 844 } 845 ++i; 846 } 847 return 0; 848 } 849 850 /** 851 * Gets the diagonal of the Hat matrix also known as the leverage matrix. 852 * 853 * @param rowData returns the diagonal of the hat matrix for this observation 854 * @return the diagonal element of the hatmatrix 855 */ 856 public double getDiagonalOfHatMatrix(double[] rowData) { 857 double[] wk = new double[this.nvars]; 858 int pos; 859 double total; 860 861 if (rowData.length > nvars) { 862 return Double.NaN; 863 } 864 double[] xrow; 865 if (this.hasIntercept) { 866 xrow = new double[rowData.length + 1]; 867 xrow[0] = 1.0; 868 System.arraycopy(rowData, 0, xrow, 1, rowData.length); 869 } else { 870 xrow = rowData; 871 } 872 double hii = 0.0; 873 for (int col = 0; col < xrow.length; col++) { 874 if (JdkMath.sqrt(d[col]) < tol[col]) { 875 wk[col] = 0.0; 876 } else { 877 pos = col - 1; 878 total = xrow[col]; 879 for (int row = 0; row < col; row++) { 880 total = smartAdd(total, -wk[row] * r[pos]); 881 pos += nvars - row - 2; 882 } 883 wk[col] = total; 884 hii = smartAdd(hii, (total * total) / d[col]); 885 } 886 } 887 return hii; 888 } 889 890 /** 891 * Gets the order of the regressors, useful if some type of reordering 892 * has been called. Calling regress with int[]{} args will trigger 893 * a reordering. 894 * 895 * @return int[] with the current order of the regressors 896 */ 897 public int[] getOrderOfRegressors(){ 898 return Arrays.copyOf(vorder, vorder.length); 899 } 900 901 /** 902 * Conducts a regression on the data in the model, using all regressors. 903 * 904 * @return RegressionResults the structure holding all regression results 905 * @exception ModelSpecificationException - thrown if number of observations is 906 * less than the number of variables 907 */ 908 @Override 909 public RegressionResults regress() throws ModelSpecificationException { 910 return regress(this.nvars); 911 } 912 913 /** 914 * Conducts a regression on the data in the model, using a subset of regressors. 915 * 916 * @param numberOfRegressors many of the regressors to include (either in canonical 917 * order, or in the current reordered state) 918 * @return RegressionResults the structure holding all regression results 919 * @exception ModelSpecificationException - thrown if number of observations is 920 * less than the number of variables or number of regressors requested 921 * is greater than the regressors in the model 922 */ 923 public RegressionResults regress(int numberOfRegressors) throws ModelSpecificationException { 924 if (this.nobs <= numberOfRegressors) { 925 throw new ModelSpecificationException( 926 LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS, 927 this.nobs, numberOfRegressors); 928 } 929 if( numberOfRegressors > this.nvars ){ 930 throw new ModelSpecificationException( 931 LocalizedFormats.TOO_MANY_REGRESSORS, numberOfRegressors, this.nvars); 932 } 933 934 tolset(); 935 singcheck(); 936 937 double[] beta = this.regcf(numberOfRegressors); 938 939 ss(); 940 941 double[] cov = this.cov(numberOfRegressors); 942 943 int rnk = 0; 944 for (int i = 0; i < this.lindep.length; i++) { 945 if (!this.lindep[i]) { 946 ++rnk; 947 } 948 } 949 950 boolean needsReorder = false; 951 for (int i = 0; i < numberOfRegressors; i++) { 952 if (this.vorder[i] != i) { 953 needsReorder = true; 954 break; 955 } 956 } 957 if (!needsReorder) { 958 return new RegressionResults( 959 beta, new double[][]{cov}, true, this.nobs, rnk, 960 this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false); 961 } else { 962 double[] betaNew = new double[beta.length]; 963 double[] covNew = new double[cov.length]; 964 965 int[] newIndices = new int[beta.length]; 966 for (int i = 0; i < nvars; i++) { 967 for (int j = 0; j < numberOfRegressors; j++) { 968 if (this.vorder[j] == i) { 969 betaNew[i] = beta[ j]; 970 newIndices[i] = j; 971 } 972 } 973 } 974 975 int idx1 = 0; 976 int idx2; 977 int ii; 978 int jj; 979 for (int i = 0; i < beta.length; i++) { 980 ii = newIndices[i]; 981 for (int j = 0; j <= i; j++, idx1++) { 982 jj = newIndices[j]; 983 if (ii > jj) { 984 idx2 = ii * (ii + 1) / 2 + jj; 985 } else { 986 idx2 = jj * (jj + 1) / 2 + ii; 987 } 988 covNew[idx1] = cov[idx2]; 989 } 990 } 991 return new RegressionResults( 992 betaNew, new double[][]{covNew}, true, this.nobs, rnk, 993 this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false); 994 } 995 } 996 997 /** 998 * Conducts a regression on the data in the model, using regressors in array 999 * Calling this method will change the internal order of the regressors 1000 * and care is required in interpreting the hatmatrix. 1001 * 1002 * @param variablesToInclude array of variables to include in regression 1003 * @return RegressionResults the structure holding all regression results 1004 * @exception ModelSpecificationException - thrown if number of observations is 1005 * less than the number of variables, the number of regressors requested 1006 * is greater than the regressors in the model or a regressor index in 1007 * regressor array does not exist 1008 */ 1009 @Override 1010 public RegressionResults regress(int[] variablesToInclude) throws ModelSpecificationException { 1011 if (variablesToInclude.length > this.nvars) { 1012 throw new ModelSpecificationException( 1013 LocalizedFormats.TOO_MANY_REGRESSORS, variablesToInclude.length, this.nvars); 1014 } 1015 if (this.nobs <= this.nvars) { 1016 throw new ModelSpecificationException( 1017 LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS, 1018 this.nobs, this.nvars); 1019 } 1020 Arrays.sort(variablesToInclude); 1021 int iExclude = 0; 1022 for (int i = 0; i < variablesToInclude.length; i++) { 1023 if (i >= this.nvars) { 1024 throw new ModelSpecificationException( 1025 LocalizedFormats.INDEX_LARGER_THAN_MAX, i, this.nvars); 1026 } 1027 if (i > 0 && variablesToInclude[i] == variablesToInclude[i - 1]) { 1028 variablesToInclude[i] = -1; 1029 ++iExclude; 1030 } 1031 } 1032 int[] series; 1033 if (iExclude > 0) { 1034 int j = 0; 1035 series = new int[variablesToInclude.length - iExclude]; 1036 for (int i = 0; i < variablesToInclude.length; i++) { 1037 if (variablesToInclude[i] > -1) { 1038 series[j] = variablesToInclude[i]; 1039 ++j; 1040 } 1041 } 1042 } else { 1043 series = variablesToInclude; 1044 } 1045 1046 reorderRegressors(series, 0); 1047 tolset(); 1048 singcheck(); 1049 1050 double[] beta = this.regcf(series.length); 1051 1052 ss(); 1053 1054 double[] cov = this.cov(series.length); 1055 1056 int rnk = 0; 1057 for (int i = 0; i < this.lindep.length; i++) { 1058 if (!this.lindep[i]) { 1059 ++rnk; 1060 } 1061 } 1062 1063 boolean needsReorder = false; 1064 for (int i = 0; i < series.length; i++) { 1065 if (this.vorder[i] != series[i]) { 1066 needsReorder = true; 1067 break; 1068 } 1069 } 1070 if (!needsReorder) { 1071 return new RegressionResults( 1072 beta, new double[][]{cov}, true, this.nobs, rnk, 1073 this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false); 1074 } else { 1075 double[] betaNew = new double[beta.length]; 1076 int[] newIndices = new int[beta.length]; 1077 for (int i = 0; i < series.length; i++) { 1078 for (int j = 0; j < this.vorder.length; j++) { 1079 if (this.vorder[j] == series[i]) { 1080 betaNew[i] = beta[ j]; 1081 newIndices[i] = j; 1082 } 1083 } 1084 } 1085 double[] covNew = new double[cov.length]; 1086 int idx1 = 0; 1087 int idx2; 1088 int ii; 1089 int jj; 1090 for (int i = 0; i < beta.length; i++) { 1091 ii = newIndices[i]; 1092 for (int j = 0; j <= i; j++, idx1++) { 1093 jj = newIndices[j]; 1094 if (ii > jj) { 1095 idx2 = ii * (ii + 1) / 2 + jj; 1096 } else { 1097 idx2 = jj * (jj + 1) / 2 + ii; 1098 } 1099 covNew[idx1] = cov[idx2]; 1100 } 1101 } 1102 return new RegressionResults( 1103 betaNew, new double[][]{covNew}, true, this.nobs, rnk, 1104 this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false); 1105 } 1106 } 1107}