001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 package org.apache.commons.math3.optim.nonlinear.vector.jacobian; 018 019 import java.util.Arrays; 020 import org.apache.commons.math3.exception.ConvergenceException; 021 import org.apache.commons.math3.exception.util.LocalizedFormats; 022 import org.apache.commons.math3.optim.PointVectorValuePair; 023 import org.apache.commons.math3.optim.ConvergenceChecker; 024 import org.apache.commons.math3.linear.RealMatrix; 025 import org.apache.commons.math3.util.Precision; 026 import org.apache.commons.math3.util.FastMath; 027 028 029 /** 030 * This class solves a least-squares problem using the Levenberg-Marquardt algorithm. 031 * 032 * <p>This implementation <em>should</em> work even for over-determined systems 033 * (i.e. systems having more point than equations). Over-determined systems 034 * are solved by ignoring the point which have the smallest impact according 035 * to their jacobian column norm. Only the rank of the matrix and some loop bounds 036 * are changed to implement this.</p> 037 * 038 * <p>The resolution engine is a simple translation of the MINPACK <a 039 * href="http://www.netlib.org/minpack/lmder.f">lmder</a> routine with minor 040 * changes. The changes include the over-determined resolution, the use of 041 * inherited convergence checker and the Q.R. decomposition which has been 042 * rewritten following the algorithm described in the 043 * P. Lascaux and R. Theodor book <i>Analyse numérique matricielle 044 * appliquée à l'art de l'ingénieur</i>, Masson 1986.</p> 045 * <p>The authors of the original fortran version are: 046 * <ul> 047 * <li>Argonne National Laboratory. MINPACK project. March 1980</li> 048 * <li>Burton S. Garbow</li> 049 * <li>Kenneth E. Hillstrom</li> 050 * <li>Jorge J. More</li> 051 * </ul> 052 * The redistribution policy for MINPACK is available <a 053 * href="http://www.netlib.org/minpack/disclaimer">here</a>, for convenience, it 054 * is reproduced below.</p> 055 * 056 * <table border="0" width="80%" cellpadding="10" align="center" bgcolor="#E0E0E0"> 057 * <tr><td> 058 * Minpack Copyright Notice (1999) University of Chicago. 059 * All rights reserved 060 * </td></tr> 061 * <tr><td> 062 * Redistribution and use in source and binary forms, with or without 063 * modification, are permitted provided that the following conditions 064 * are met: 065 * <ol> 066 * <li>Redistributions of source code must retain the above copyright 067 * notice, this list of conditions and the following disclaimer.</li> 068 * <li>Redistributions in binary form must reproduce the above 069 * copyright notice, this list of conditions and the following 070 * disclaimer in the documentation and/or other materials provided 071 * with the distribution.</li> 072 * <li>The end-user documentation included with the redistribution, if any, 073 * must include the following acknowledgment: 074 * <code>This product includes software developed by the University of 075 * Chicago, as Operator of Argonne National Laboratory.</code> 076 * Alternately, this acknowledgment may appear in the software itself, 077 * if and wherever such third-party acknowledgments normally appear.</li> 078 * <li><strong>WARRANTY DISCLAIMER. THE SOFTWARE IS SUPPLIED "AS IS" 079 * WITHOUT WARRANTY OF ANY KIND. THE COPYRIGHT HOLDER, THE 080 * UNITED STATES, THE UNITED STATES DEPARTMENT OF ENERGY, AND 081 * THEIR EMPLOYEES: (1) DISCLAIM ANY WARRANTIES, EXPRESS OR 082 * IMPLIED, INCLUDING BUT NOT LIMITED TO ANY IMPLIED WARRANTIES 083 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE 084 * OR NON-INFRINGEMENT, (2) DO NOT ASSUME ANY LEGAL LIABILITY 085 * OR RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR 086 * USEFULNESS OF THE SOFTWARE, (3) DO NOT REPRESENT THAT USE OF 087 * THE SOFTWARE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS, (4) 088 * DO NOT WARRANT THAT THE SOFTWARE WILL FUNCTION 089 * UNINTERRUPTED, THAT IT IS ERROR-FREE OR THAT ANY ERRORS WILL 090 * BE CORRECTED.</strong></li> 091 * <li><strong>LIMITATION OF LIABILITY. IN NO EVENT WILL THE COPYRIGHT 092 * HOLDER, THE UNITED STATES, THE UNITED STATES DEPARTMENT OF 093 * ENERGY, OR THEIR EMPLOYEES: BE LIABLE FOR ANY INDIRECT, 094 * INCIDENTAL, CONSEQUENTIAL, SPECIAL OR PUNITIVE DAMAGES OF 095 * ANY KIND OR NATURE, INCLUDING BUT NOT LIMITED TO LOSS OF 096 * PROFITS OR LOSS OF DATA, FOR ANY REASON WHATSOEVER, WHETHER 097 * SUCH LIABILITY IS ASSERTED ON THE BASIS OF CONTRACT, TORT 098 * (INCLUDING NEGLIGENCE OR STRICT LIABILITY), OR OTHERWISE, 099 * EVEN IF ANY OF SAID PARTIES HAS BEEN WARNED OF THE 100 * POSSIBILITY OF SUCH LOSS OR DAMAGES.</strong></li> 101 * <ol></td></tr> 102 * </table> 103 * 104 * @version $Id: LevenbergMarquardtOptimizer.java 1416643 2012-12-03 19:37:14Z tn $ 105 * @since 2.0 106 */ 107 public class LevenbergMarquardtOptimizer 108 extends AbstractLeastSquaresOptimizer { 109 /** Number of solved point. */ 110 private int solvedCols; 111 /** Diagonal elements of the R matrix in the Q.R. decomposition. */ 112 private double[] diagR; 113 /** Norms of the columns of the jacobian matrix. */ 114 private double[] jacNorm; 115 /** Coefficients of the Householder transforms vectors. */ 116 private double[] beta; 117 /** Columns permutation array. */ 118 private int[] permutation; 119 /** Rank of the jacobian matrix. */ 120 private int rank; 121 /** Levenberg-Marquardt parameter. */ 122 private double lmPar; 123 /** Parameters evolution direction associated with lmPar. */ 124 private double[] lmDir; 125 /** Positive input variable used in determining the initial step bound. */ 126 private final double initialStepBoundFactor; 127 /** Desired relative error in the sum of squares. */ 128 private final double costRelativeTolerance; 129 /** Desired relative error in the approximate solution parameters. */ 130 private final double parRelativeTolerance; 131 /** Desired max cosine on the orthogonality between the function vector 132 * and the columns of the jacobian. */ 133 private final double orthoTolerance; 134 /** Threshold for QR ranking. */ 135 private final double qrRankingThreshold; 136 /** Weighted residuals. */ 137 private double[] weightedResidual; 138 /** Weighted Jacobian. */ 139 private double[][] weightedJacobian; 140 141 /** 142 * Build an optimizer for least squares problems with default values 143 * for all the tuning parameters (see the {@link 144 * #LevenbergMarquardtOptimizer(double,double,double,double,double) 145 * other contructor}. 146 * The default values for the algorithm settings are: 147 * <ul> 148 * <li>Initial step bound factor: 100</li> 149 * <li>Cost relative tolerance: 1e-10</li> 150 * <li>Parameters relative tolerance: 1e-10</li> 151 * <li>Orthogonality tolerance: 1e-10</li> 152 * <li>QR ranking threshold: {@link Precision#SAFE_MIN}</li> 153 * </ul> 154 */ 155 public LevenbergMarquardtOptimizer() { 156 this(100, 1e-10, 1e-10, 1e-10, Precision.SAFE_MIN); 157 } 158 159 /** 160 * Constructor that allows the specification of a custom convergence 161 * checker. 162 * Note that all the usual convergence checks will be <em>disabled</em>. 163 * The default values for the algorithm settings are: 164 * <ul> 165 * <li>Initial step bound factor: 100</li> 166 * <li>Cost relative tolerance: 1e-10</li> 167 * <li>Parameters relative tolerance: 1e-10</li> 168 * <li>Orthogonality tolerance: 1e-10</li> 169 * <li>QR ranking threshold: {@link Precision#SAFE_MIN}</li> 170 * </ul> 171 * 172 * @param checker Convergence checker. 173 */ 174 public LevenbergMarquardtOptimizer(ConvergenceChecker<PointVectorValuePair> checker) { 175 this(100, checker, 1e-10, 1e-10, 1e-10, Precision.SAFE_MIN); 176 } 177 178 /** 179 * Constructor that allows the specification of a custom convergence 180 * checker, in addition to the standard ones. 181 * 182 * @param initialStepBoundFactor Positive input variable used in 183 * determining the initial step bound. This bound is set to the 184 * product of initialStepBoundFactor and the euclidean norm of 185 * {@code diag * x} if non-zero, or else to {@code initialStepBoundFactor} 186 * itself. In most cases factor should lie in the interval 187 * {@code (0.1, 100.0)}. {@code 100} is a generally recommended value. 188 * @param checker Convergence checker. 189 * @param costRelativeTolerance Desired relative error in the sum of 190 * squares. 191 * @param parRelativeTolerance Desired relative error in the approximate 192 * solution parameters. 193 * @param orthoTolerance Desired max cosine on the orthogonality between 194 * the function vector and the columns of the Jacobian. 195 * @param threshold Desired threshold for QR ranking. If the squared norm 196 * of a column vector is smaller or equal to this threshold during QR 197 * decomposition, it is considered to be a zero vector and hence the rank 198 * of the matrix is reduced. 199 */ 200 public LevenbergMarquardtOptimizer(double initialStepBoundFactor, 201 ConvergenceChecker<PointVectorValuePair> checker, 202 double costRelativeTolerance, 203 double parRelativeTolerance, 204 double orthoTolerance, 205 double threshold) { 206 super(checker); 207 this.initialStepBoundFactor = initialStepBoundFactor; 208 this.costRelativeTolerance = costRelativeTolerance; 209 this.parRelativeTolerance = parRelativeTolerance; 210 this.orthoTolerance = orthoTolerance; 211 this.qrRankingThreshold = threshold; 212 } 213 214 /** 215 * Build an optimizer for least squares problems with default values 216 * for some of the tuning parameters (see the {@link 217 * #LevenbergMarquardtOptimizer(double,double,double,double,double) 218 * other contructor}. 219 * The default values for the algorithm settings are: 220 * <ul> 221 * <li>Initial step bound factor}: 100</li> 222 * <li>QR ranking threshold}: {@link Precision#SAFE_MIN}</li> 223 * </ul> 224 * 225 * @param costRelativeTolerance Desired relative error in the sum of 226 * squares. 227 * @param parRelativeTolerance Desired relative error in the approximate 228 * solution parameters. 229 * @param orthoTolerance Desired max cosine on the orthogonality between 230 * the function vector and the columns of the Jacobian. 231 */ 232 public LevenbergMarquardtOptimizer(double costRelativeTolerance, 233 double parRelativeTolerance, 234 double orthoTolerance) { 235 this(100, 236 costRelativeTolerance, parRelativeTolerance, orthoTolerance, 237 Precision.SAFE_MIN); 238 } 239 240 /** 241 * The arguments control the behaviour of the default convergence checking 242 * procedure. 243 * Additional criteria can defined through the setting of a {@link 244 * ConvergenceChecker}. 245 * 246 * @param initialStepBoundFactor Positive input variable used in 247 * determining the initial step bound. This bound is set to the 248 * product of initialStepBoundFactor and the euclidean norm of 249 * {@code diag * x} if non-zero, or else to {@code initialStepBoundFactor} 250 * itself. In most cases factor should lie in the interval 251 * {@code (0.1, 100.0)}. {@code 100} is a generally recommended value. 252 * @param costRelativeTolerance Desired relative error in the sum of 253 * squares. 254 * @param parRelativeTolerance Desired relative error in the approximate 255 * solution parameters. 256 * @param orthoTolerance Desired max cosine on the orthogonality between 257 * the function vector and the columns of the Jacobian. 258 * @param threshold Desired threshold for QR ranking. If the squared norm 259 * of a column vector is smaller or equal to this threshold during QR 260 * decomposition, it is considered to be a zero vector and hence the rank 261 * of the matrix is reduced. 262 */ 263 public LevenbergMarquardtOptimizer(double initialStepBoundFactor, 264 double costRelativeTolerance, 265 double parRelativeTolerance, 266 double orthoTolerance, 267 double threshold) { 268 super(null); // No custom convergence criterion. 269 this.initialStepBoundFactor = initialStepBoundFactor; 270 this.costRelativeTolerance = costRelativeTolerance; 271 this.parRelativeTolerance = parRelativeTolerance; 272 this.orthoTolerance = orthoTolerance; 273 this.qrRankingThreshold = threshold; 274 } 275 276 /** {@inheritDoc} */ 277 @Override 278 protected PointVectorValuePair doOptimize() { 279 final int nR = getTarget().length; // Number of observed data. 280 final double[] currentPoint = getStartPoint(); 281 final int nC = currentPoint.length; // Number of parameters. 282 283 // arrays shared with the other private methods 284 solvedCols = FastMath.min(nR, nC); 285 diagR = new double[nC]; 286 jacNorm = new double[nC]; 287 beta = new double[nC]; 288 permutation = new int[nC]; 289 lmDir = new double[nC]; 290 291 // local point 292 double delta = 0; 293 double xNorm = 0; 294 double[] diag = new double[nC]; 295 double[] oldX = new double[nC]; 296 double[] oldRes = new double[nR]; 297 double[] oldObj = new double[nR]; 298 double[] qtf = new double[nR]; 299 double[] work1 = new double[nC]; 300 double[] work2 = new double[nC]; 301 double[] work3 = new double[nC]; 302 303 final RealMatrix weightMatrixSqrt = getWeightSquareRoot(); 304 305 // Evaluate the function at the starting point and calculate its norm. 306 double[] currentObjective = computeObjectiveValue(currentPoint); 307 double[] currentResiduals = computeResiduals(currentObjective); 308 PointVectorValuePair current = new PointVectorValuePair(currentPoint, currentObjective); 309 double currentCost = computeCost(currentResiduals); 310 311 // Outer loop. 312 lmPar = 0; 313 boolean firstIteration = true; 314 int iter = 0; 315 final ConvergenceChecker<PointVectorValuePair> checker = getConvergenceChecker(); 316 while (true) { 317 ++iter; 318 final PointVectorValuePair previous = current; 319 320 // QR decomposition of the jacobian matrix 321 qrDecomposition(computeWeightedJacobian(currentPoint)); 322 323 weightedResidual = weightMatrixSqrt.operate(currentResiduals); 324 for (int i = 0; i < nR; i++) { 325 qtf[i] = weightedResidual[i]; 326 } 327 328 // compute Qt.res 329 qTy(qtf); 330 331 // now we don't need Q anymore, 332 // so let jacobian contain the R matrix with its diagonal elements 333 for (int k = 0; k < solvedCols; ++k) { 334 int pk = permutation[k]; 335 weightedJacobian[k][pk] = diagR[pk]; 336 } 337 338 if (firstIteration) { 339 // scale the point according to the norms of the columns 340 // of the initial jacobian 341 xNorm = 0; 342 for (int k = 0; k < nC; ++k) { 343 double dk = jacNorm[k]; 344 if (dk == 0) { 345 dk = 1.0; 346 } 347 double xk = dk * currentPoint[k]; 348 xNorm += xk * xk; 349 diag[k] = dk; 350 } 351 xNorm = FastMath.sqrt(xNorm); 352 353 // initialize the step bound delta 354 delta = (xNorm == 0) ? initialStepBoundFactor : (initialStepBoundFactor * xNorm); 355 } 356 357 // check orthogonality between function vector and jacobian columns 358 double maxCosine = 0; 359 if (currentCost != 0) { 360 for (int j = 0; j < solvedCols; ++j) { 361 int pj = permutation[j]; 362 double s = jacNorm[pj]; 363 if (s != 0) { 364 double sum = 0; 365 for (int i = 0; i <= j; ++i) { 366 sum += weightedJacobian[i][pj] * qtf[i]; 367 } 368 maxCosine = FastMath.max(maxCosine, FastMath.abs(sum) / (s * currentCost)); 369 } 370 } 371 } 372 if (maxCosine <= orthoTolerance) { 373 // Convergence has been reached. 374 setCost(currentCost); 375 return current; 376 } 377 378 // rescale if necessary 379 for (int j = 0; j < nC; ++j) { 380 diag[j] = FastMath.max(diag[j], jacNorm[j]); 381 } 382 383 // Inner loop. 384 for (double ratio = 0; ratio < 1.0e-4;) { 385 386 // save the state 387 for (int j = 0; j < solvedCols; ++j) { 388 int pj = permutation[j]; 389 oldX[pj] = currentPoint[pj]; 390 } 391 final double previousCost = currentCost; 392 double[] tmpVec = weightedResidual; 393 weightedResidual = oldRes; 394 oldRes = tmpVec; 395 tmpVec = currentObjective; 396 currentObjective = oldObj; 397 oldObj = tmpVec; 398 399 // determine the Levenberg-Marquardt parameter 400 determineLMParameter(qtf, delta, diag, work1, work2, work3); 401 402 // compute the new point and the norm of the evolution direction 403 double lmNorm = 0; 404 for (int j = 0; j < solvedCols; ++j) { 405 int pj = permutation[j]; 406 lmDir[pj] = -lmDir[pj]; 407 currentPoint[pj] = oldX[pj] + lmDir[pj]; 408 double s = diag[pj] * lmDir[pj]; 409 lmNorm += s * s; 410 } 411 lmNorm = FastMath.sqrt(lmNorm); 412 // on the first iteration, adjust the initial step bound. 413 if (firstIteration) { 414 delta = FastMath.min(delta, lmNorm); 415 } 416 417 // Evaluate the function at x + p and calculate its norm. 418 currentObjective = computeObjectiveValue(currentPoint); 419 currentResiduals = computeResiduals(currentObjective); 420 current = new PointVectorValuePair(currentPoint, currentObjective); 421 currentCost = computeCost(currentResiduals); 422 423 // compute the scaled actual reduction 424 double actRed = -1.0; 425 if (0.1 * currentCost < previousCost) { 426 double r = currentCost / previousCost; 427 actRed = 1.0 - r * r; 428 } 429 430 // compute the scaled predicted reduction 431 // and the scaled directional derivative 432 for (int j = 0; j < solvedCols; ++j) { 433 int pj = permutation[j]; 434 double dirJ = lmDir[pj]; 435 work1[j] = 0; 436 for (int i = 0; i <= j; ++i) { 437 work1[i] += weightedJacobian[i][pj] * dirJ; 438 } 439 } 440 double coeff1 = 0; 441 for (int j = 0; j < solvedCols; ++j) { 442 coeff1 += work1[j] * work1[j]; 443 } 444 double pc2 = previousCost * previousCost; 445 coeff1 = coeff1 / pc2; 446 double coeff2 = lmPar * lmNorm * lmNorm / pc2; 447 double preRed = coeff1 + 2 * coeff2; 448 double dirDer = -(coeff1 + coeff2); 449 450 // ratio of the actual to the predicted reduction 451 ratio = (preRed == 0) ? 0 : (actRed / preRed); 452 453 // update the step bound 454 if (ratio <= 0.25) { 455 double tmp = 456 (actRed < 0) ? (0.5 * dirDer / (dirDer + 0.5 * actRed)) : 0.5; 457 if ((0.1 * currentCost >= previousCost) || (tmp < 0.1)) { 458 tmp = 0.1; 459 } 460 delta = tmp * FastMath.min(delta, 10.0 * lmNorm); 461 lmPar /= tmp; 462 } else if ((lmPar == 0) || (ratio >= 0.75)) { 463 delta = 2 * lmNorm; 464 lmPar *= 0.5; 465 } 466 467 // test for successful iteration. 468 if (ratio >= 1.0e-4) { 469 // successful iteration, update the norm 470 firstIteration = false; 471 xNorm = 0; 472 for (int k = 0; k < nC; ++k) { 473 double xK = diag[k] * currentPoint[k]; 474 xNorm += xK * xK; 475 } 476 xNorm = FastMath.sqrt(xNorm); 477 478 // tests for convergence. 479 if (checker != null) { 480 // we use the vectorial convergence checker 481 if (checker.converged(iter, previous, current)) { 482 setCost(currentCost); 483 return current; 484 } 485 } 486 } else { 487 // failed iteration, reset the previous values 488 currentCost = previousCost; 489 for (int j = 0; j < solvedCols; ++j) { 490 int pj = permutation[j]; 491 currentPoint[pj] = oldX[pj]; 492 } 493 tmpVec = weightedResidual; 494 weightedResidual = oldRes; 495 oldRes = tmpVec; 496 tmpVec = currentObjective; 497 currentObjective = oldObj; 498 oldObj = tmpVec; 499 // Reset "current" to previous values. 500 current = new PointVectorValuePair(currentPoint, currentObjective); 501 } 502 503 // Default convergence criteria. 504 if ((FastMath.abs(actRed) <= costRelativeTolerance && 505 preRed <= costRelativeTolerance && 506 ratio <= 2.0) || 507 delta <= parRelativeTolerance * xNorm) { 508 setCost(currentCost); 509 return current; 510 } 511 512 // tests for termination and stringent tolerances 513 // (2.2204e-16 is the machine epsilon for IEEE754) 514 if ((FastMath.abs(actRed) <= 2.2204e-16) && (preRed <= 2.2204e-16) && (ratio <= 2.0)) { 515 throw new ConvergenceException(LocalizedFormats.TOO_SMALL_COST_RELATIVE_TOLERANCE, 516 costRelativeTolerance); 517 } else if (delta <= 2.2204e-16 * xNorm) { 518 throw new ConvergenceException(LocalizedFormats.TOO_SMALL_PARAMETERS_RELATIVE_TOLERANCE, 519 parRelativeTolerance); 520 } else if (maxCosine <= 2.2204e-16) { 521 throw new ConvergenceException(LocalizedFormats.TOO_SMALL_ORTHOGONALITY_TOLERANCE, 522 orthoTolerance); 523 } 524 } 525 } 526 } 527 528 /** 529 * Determine the Levenberg-Marquardt parameter. 530 * <p>This implementation is a translation in Java of the MINPACK 531 * <a href="http://www.netlib.org/minpack/lmpar.f">lmpar</a> 532 * routine.</p> 533 * <p>This method sets the lmPar and lmDir attributes.</p> 534 * <p>The authors of the original fortran function are:</p> 535 * <ul> 536 * <li>Argonne National Laboratory. MINPACK project. March 1980</li> 537 * <li>Burton S. Garbow</li> 538 * <li>Kenneth E. Hillstrom</li> 539 * <li>Jorge J. More</li> 540 * </ul> 541 * <p>Luc Maisonobe did the Java translation.</p> 542 * 543 * @param qy array containing qTy 544 * @param delta upper bound on the euclidean norm of diagR * lmDir 545 * @param diag diagonal matrix 546 * @param work1 work array 547 * @param work2 work array 548 * @param work3 work array 549 */ 550 private void determineLMParameter(double[] qy, double delta, double[] diag, 551 double[] work1, double[] work2, double[] work3) { 552 final int nC = weightedJacobian[0].length; 553 554 // compute and store in x the gauss-newton direction, if the 555 // jacobian is rank-deficient, obtain a least squares solution 556 for (int j = 0; j < rank; ++j) { 557 lmDir[permutation[j]] = qy[j]; 558 } 559 for (int j = rank; j < nC; ++j) { 560 lmDir[permutation[j]] = 0; 561 } 562 for (int k = rank - 1; k >= 0; --k) { 563 int pk = permutation[k]; 564 double ypk = lmDir[pk] / diagR[pk]; 565 for (int i = 0; i < k; ++i) { 566 lmDir[permutation[i]] -= ypk * weightedJacobian[i][pk]; 567 } 568 lmDir[pk] = ypk; 569 } 570 571 // evaluate the function at the origin, and test 572 // for acceptance of the Gauss-Newton direction 573 double dxNorm = 0; 574 for (int j = 0; j < solvedCols; ++j) { 575 int pj = permutation[j]; 576 double s = diag[pj] * lmDir[pj]; 577 work1[pj] = s; 578 dxNorm += s * s; 579 } 580 dxNorm = FastMath.sqrt(dxNorm); 581 double fp = dxNorm - delta; 582 if (fp <= 0.1 * delta) { 583 lmPar = 0; 584 return; 585 } 586 587 // if the jacobian is not rank deficient, the Newton step provides 588 // a lower bound, parl, for the zero of the function, 589 // otherwise set this bound to zero 590 double sum2; 591 double parl = 0; 592 if (rank == solvedCols) { 593 for (int j = 0; j < solvedCols; ++j) { 594 int pj = permutation[j]; 595 work1[pj] *= diag[pj] / dxNorm; 596 } 597 sum2 = 0; 598 for (int j = 0; j < solvedCols; ++j) { 599 int pj = permutation[j]; 600 double sum = 0; 601 for (int i = 0; i < j; ++i) { 602 sum += weightedJacobian[i][pj] * work1[permutation[i]]; 603 } 604 double s = (work1[pj] - sum) / diagR[pj]; 605 work1[pj] = s; 606 sum2 += s * s; 607 } 608 parl = fp / (delta * sum2); 609 } 610 611 // calculate an upper bound, paru, for the zero of the function 612 sum2 = 0; 613 for (int j = 0; j < solvedCols; ++j) { 614 int pj = permutation[j]; 615 double sum = 0; 616 for (int i = 0; i <= j; ++i) { 617 sum += weightedJacobian[i][pj] * qy[i]; 618 } 619 sum /= diag[pj]; 620 sum2 += sum * sum; 621 } 622 double gNorm = FastMath.sqrt(sum2); 623 double paru = gNorm / delta; 624 if (paru == 0) { 625 // 2.2251e-308 is the smallest positive real for IEE754 626 paru = 2.2251e-308 / FastMath.min(delta, 0.1); 627 } 628 629 // if the input par lies outside of the interval (parl,paru), 630 // set par to the closer endpoint 631 lmPar = FastMath.min(paru, FastMath.max(lmPar, parl)); 632 if (lmPar == 0) { 633 lmPar = gNorm / dxNorm; 634 } 635 636 for (int countdown = 10; countdown >= 0; --countdown) { 637 638 // evaluate the function at the current value of lmPar 639 if (lmPar == 0) { 640 lmPar = FastMath.max(2.2251e-308, 0.001 * paru); 641 } 642 double sPar = FastMath.sqrt(lmPar); 643 for (int j = 0; j < solvedCols; ++j) { 644 int pj = permutation[j]; 645 work1[pj] = sPar * diag[pj]; 646 } 647 determineLMDirection(qy, work1, work2, work3); 648 649 dxNorm = 0; 650 for (int j = 0; j < solvedCols; ++j) { 651 int pj = permutation[j]; 652 double s = diag[pj] * lmDir[pj]; 653 work3[pj] = s; 654 dxNorm += s * s; 655 } 656 dxNorm = FastMath.sqrt(dxNorm); 657 double previousFP = fp; 658 fp = dxNorm - delta; 659 660 // if the function is small enough, accept the current value 661 // of lmPar, also test for the exceptional cases where parl is zero 662 if ((FastMath.abs(fp) <= 0.1 * delta) || 663 ((parl == 0) && (fp <= previousFP) && (previousFP < 0))) { 664 return; 665 } 666 667 // compute the Newton correction 668 for (int j = 0; j < solvedCols; ++j) { 669 int pj = permutation[j]; 670 work1[pj] = work3[pj] * diag[pj] / dxNorm; 671 } 672 for (int j = 0; j < solvedCols; ++j) { 673 int pj = permutation[j]; 674 work1[pj] /= work2[j]; 675 double tmp = work1[pj]; 676 for (int i = j + 1; i < solvedCols; ++i) { 677 work1[permutation[i]] -= weightedJacobian[i][pj] * tmp; 678 } 679 } 680 sum2 = 0; 681 for (int j = 0; j < solvedCols; ++j) { 682 double s = work1[permutation[j]]; 683 sum2 += s * s; 684 } 685 double correction = fp / (delta * sum2); 686 687 // depending on the sign of the function, update parl or paru. 688 if (fp > 0) { 689 parl = FastMath.max(parl, lmPar); 690 } else if (fp < 0) { 691 paru = FastMath.min(paru, lmPar); 692 } 693 694 // compute an improved estimate for lmPar 695 lmPar = FastMath.max(parl, lmPar + correction); 696 697 } 698 } 699 700 /** 701 * Solve a*x = b and d*x = 0 in the least squares sense. 702 * <p>This implementation is a translation in Java of the MINPACK 703 * <a href="http://www.netlib.org/minpack/qrsolv.f">qrsolv</a> 704 * routine.</p> 705 * <p>This method sets the lmDir and lmDiag attributes.</p> 706 * <p>The authors of the original fortran function are:</p> 707 * <ul> 708 * <li>Argonne National Laboratory. MINPACK project. March 1980</li> 709 * <li>Burton S. Garbow</li> 710 * <li>Kenneth E. Hillstrom</li> 711 * <li>Jorge J. More</li> 712 * </ul> 713 * <p>Luc Maisonobe did the Java translation.</p> 714 * 715 * @param qy array containing qTy 716 * @param diag diagonal matrix 717 * @param lmDiag diagonal elements associated with lmDir 718 * @param work work array 719 */ 720 private void determineLMDirection(double[] qy, double[] diag, 721 double[] lmDiag, double[] work) { 722 723 // copy R and Qty to preserve input and initialize s 724 // in particular, save the diagonal elements of R in lmDir 725 for (int j = 0; j < solvedCols; ++j) { 726 int pj = permutation[j]; 727 for (int i = j + 1; i < solvedCols; ++i) { 728 weightedJacobian[i][pj] = weightedJacobian[j][permutation[i]]; 729 } 730 lmDir[j] = diagR[pj]; 731 work[j] = qy[j]; 732 } 733 734 // eliminate the diagonal matrix d using a Givens rotation 735 for (int j = 0; j < solvedCols; ++j) { 736 737 // prepare the row of d to be eliminated, locating the 738 // diagonal element using p from the Q.R. factorization 739 int pj = permutation[j]; 740 double dpj = diag[pj]; 741 if (dpj != 0) { 742 Arrays.fill(lmDiag, j + 1, lmDiag.length, 0); 743 } 744 lmDiag[j] = dpj; 745 746 // the transformations to eliminate the row of d 747 // modify only a single element of Qty 748 // beyond the first n, which is initially zero. 749 double qtbpj = 0; 750 for (int k = j; k < solvedCols; ++k) { 751 int pk = permutation[k]; 752 753 // determine a Givens rotation which eliminates the 754 // appropriate element in the current row of d 755 if (lmDiag[k] != 0) { 756 757 final double sin; 758 final double cos; 759 double rkk = weightedJacobian[k][pk]; 760 if (FastMath.abs(rkk) < FastMath.abs(lmDiag[k])) { 761 final double cotan = rkk / lmDiag[k]; 762 sin = 1.0 / FastMath.sqrt(1.0 + cotan * cotan); 763 cos = sin * cotan; 764 } else { 765 final double tan = lmDiag[k] / rkk; 766 cos = 1.0 / FastMath.sqrt(1.0 + tan * tan); 767 sin = cos * tan; 768 } 769 770 // compute the modified diagonal element of R and 771 // the modified element of (Qty,0) 772 weightedJacobian[k][pk] = cos * rkk + sin * lmDiag[k]; 773 final double temp = cos * work[k] + sin * qtbpj; 774 qtbpj = -sin * work[k] + cos * qtbpj; 775 work[k] = temp; 776 777 // accumulate the tranformation in the row of s 778 for (int i = k + 1; i < solvedCols; ++i) { 779 double rik = weightedJacobian[i][pk]; 780 final double temp2 = cos * rik + sin * lmDiag[i]; 781 lmDiag[i] = -sin * rik + cos * lmDiag[i]; 782 weightedJacobian[i][pk] = temp2; 783 } 784 } 785 } 786 787 // store the diagonal element of s and restore 788 // the corresponding diagonal element of R 789 lmDiag[j] = weightedJacobian[j][permutation[j]]; 790 weightedJacobian[j][permutation[j]] = lmDir[j]; 791 } 792 793 // solve the triangular system for z, if the system is 794 // singular, then obtain a least squares solution 795 int nSing = solvedCols; 796 for (int j = 0; j < solvedCols; ++j) { 797 if ((lmDiag[j] == 0) && (nSing == solvedCols)) { 798 nSing = j; 799 } 800 if (nSing < solvedCols) { 801 work[j] = 0; 802 } 803 } 804 if (nSing > 0) { 805 for (int j = nSing - 1; j >= 0; --j) { 806 int pj = permutation[j]; 807 double sum = 0; 808 for (int i = j + 1; i < nSing; ++i) { 809 sum += weightedJacobian[i][pj] * work[i]; 810 } 811 work[j] = (work[j] - sum) / lmDiag[j]; 812 } 813 } 814 815 // permute the components of z back to components of lmDir 816 for (int j = 0; j < lmDir.length; ++j) { 817 lmDir[permutation[j]] = work[j]; 818 } 819 } 820 821 /** 822 * Decompose a matrix A as A.P = Q.R using Householder transforms. 823 * <p>As suggested in the P. Lascaux and R. Theodor book 824 * <i>Analyse numérique matricielle appliquée à 825 * l'art de l'ingénieur</i> (Masson, 1986), instead of representing 826 * the Householder transforms with u<sub>k</sub> unit vectors such that: 827 * <pre> 828 * H<sub>k</sub> = I - 2u<sub>k</sub>.u<sub>k</sub><sup>t</sup> 829 * </pre> 830 * we use <sub>k</sub> non-unit vectors such that: 831 * <pre> 832 * H<sub>k</sub> = I - beta<sub>k</sub>v<sub>k</sub>.v<sub>k</sub><sup>t</sup> 833 * </pre> 834 * where v<sub>k</sub> = a<sub>k</sub> - alpha<sub>k</sub> e<sub>k</sub>. 835 * The beta<sub>k</sub> coefficients are provided upon exit as recomputing 836 * them from the v<sub>k</sub> vectors would be costly.</p> 837 * <p>This decomposition handles rank deficient cases since the tranformations 838 * are performed in non-increasing columns norms order thanks to columns 839 * pivoting. The diagonal elements of the R matrix are therefore also in 840 * non-increasing absolute values order.</p> 841 * 842 * @param jacobian Weighted Jacobian matrix at the current point. 843 * @exception ConvergenceException if the decomposition cannot be performed 844 */ 845 private void qrDecomposition(RealMatrix jacobian) throws ConvergenceException { 846 // Code in this class assumes that the weighted Jacobian is -(W^(1/2) J), 847 // hence the multiplication by -1. 848 weightedJacobian = jacobian.scalarMultiply(-1).getData(); 849 850 final int nR = weightedJacobian.length; 851 final int nC = weightedJacobian[0].length; 852 853 // initializations 854 for (int k = 0; k < nC; ++k) { 855 permutation[k] = k; 856 double norm2 = 0; 857 for (int i = 0; i < nR; ++i) { 858 double akk = weightedJacobian[i][k]; 859 norm2 += akk * akk; 860 } 861 jacNorm[k] = FastMath.sqrt(norm2); 862 } 863 864 // transform the matrix column after column 865 for (int k = 0; k < nC; ++k) { 866 867 // select the column with the greatest norm on active components 868 int nextColumn = -1; 869 double ak2 = Double.NEGATIVE_INFINITY; 870 for (int i = k; i < nC; ++i) { 871 double norm2 = 0; 872 for (int j = k; j < nR; ++j) { 873 double aki = weightedJacobian[j][permutation[i]]; 874 norm2 += aki * aki; 875 } 876 if (Double.isInfinite(norm2) || Double.isNaN(norm2)) { 877 throw new ConvergenceException(LocalizedFormats.UNABLE_TO_PERFORM_QR_DECOMPOSITION_ON_JACOBIAN, 878 nR, nC); 879 } 880 if (norm2 > ak2) { 881 nextColumn = i; 882 ak2 = norm2; 883 } 884 } 885 if (ak2 <= qrRankingThreshold) { 886 rank = k; 887 return; 888 } 889 int pk = permutation[nextColumn]; 890 permutation[nextColumn] = permutation[k]; 891 permutation[k] = pk; 892 893 // choose alpha such that Hk.u = alpha ek 894 double akk = weightedJacobian[k][pk]; 895 double alpha = (akk > 0) ? -FastMath.sqrt(ak2) : FastMath.sqrt(ak2); 896 double betak = 1.0 / (ak2 - akk * alpha); 897 beta[pk] = betak; 898 899 // transform the current column 900 diagR[pk] = alpha; 901 weightedJacobian[k][pk] -= alpha; 902 903 // transform the remaining columns 904 for (int dk = nC - 1 - k; dk > 0; --dk) { 905 double gamma = 0; 906 for (int j = k; j < nR; ++j) { 907 gamma += weightedJacobian[j][pk] * weightedJacobian[j][permutation[k + dk]]; 908 } 909 gamma *= betak; 910 for (int j = k; j < nR; ++j) { 911 weightedJacobian[j][permutation[k + dk]] -= gamma * weightedJacobian[j][pk]; 912 } 913 } 914 } 915 rank = solvedCols; 916 } 917 918 /** 919 * Compute the product Qt.y for some Q.R. decomposition. 920 * 921 * @param y vector to multiply (will be overwritten with the result) 922 */ 923 private void qTy(double[] y) { 924 final int nR = weightedJacobian.length; 925 final int nC = weightedJacobian[0].length; 926 927 for (int k = 0; k < nC; ++k) { 928 int pk = permutation[k]; 929 double gamma = 0; 930 for (int i = k; i < nR; ++i) { 931 gamma += weightedJacobian[i][pk] * y[i]; 932 } 933 gamma *= beta[pk]; 934 for (int i = k; i < nR; ++i) { 935 y[i] -= gamma * weightedJacobian[i][pk]; 936 } 937 } 938 } 939 }