001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.math3.optimization.general; 018 019import java.util.Arrays; 020 021import org.apache.commons.math3.exception.ConvergenceException; 022import org.apache.commons.math3.exception.util.LocalizedFormats; 023import org.apache.commons.math3.optimization.PointVectorValuePair; 024import org.apache.commons.math3.optimization.ConvergenceChecker; 025import org.apache.commons.math3.linear.RealMatrix; 026import org.apache.commons.math3.util.Precision; 027import org.apache.commons.math3.util.FastMath; 028 029 030/** 031 * This class solves a least squares problem using the Levenberg-Marquardt algorithm. 032 * 033 * <p>This implementation <em>should</em> work even for over-determined systems 034 * (i.e. systems having more point than equations). Over-determined systems 035 * are solved by ignoring the point which have the smallest impact according 036 * to their jacobian column norm. Only the rank of the matrix and some loop bounds 037 * are changed to implement this.</p> 038 * 039 * <p>The resolution engine is a simple translation of the MINPACK <a 040 * href="http://www.netlib.org/minpack/lmder.f">lmder</a> routine with minor 041 * changes. The changes include the over-determined resolution, the use of 042 * inherited convergence checker and the Q.R. decomposition which has been 043 * rewritten following the algorithm described in the 044 * P. Lascaux and R. Theodor book <i>Analyse numérique matricielle 045 * appliquée à l'art de l'ingénieur</i>, Masson 1986.</p> 046 * <p>The authors of the original fortran version are: 047 * <ul> 048 * <li>Argonne National Laboratory. MINPACK project. March 1980</li> 049 * <li>Burton S. Garbow</li> 050 * <li>Kenneth E. Hillstrom</li> 051 * <li>Jorge J. More</li> 052 * </ul> 053 * The redistribution policy for MINPACK is available <a 054 * href="http://www.netlib.org/minpack/disclaimer">here</a>, for convenience, it 055 * is reproduced below.</p> 056 * 057 * <table border="0" width="80%" cellpadding="10" align="center" bgcolor="#E0E0E0"> 058 * <tr><td> 059 * Minpack Copyright Notice (1999) University of Chicago. 060 * All rights reserved 061 * </td></tr> 062 * <tr><td> 063 * Redistribution and use in source and binary forms, with or without 064 * modification, are permitted provided that the following conditions 065 * are met: 066 * <ol> 067 * <li>Redistributions of source code must retain the above copyright 068 * notice, this list of conditions and the following disclaimer.</li> 069 * <li>Redistributions in binary form must reproduce the above 070 * copyright notice, this list of conditions and the following 071 * disclaimer in the documentation and/or other materials provided 072 * with the distribution.</li> 073 * <li>The end-user documentation included with the redistribution, if any, 074 * must include the following acknowledgment: 075 * <code>This product includes software developed by the University of 076 * Chicago, as Operator of Argonne National Laboratory.</code> 077 * Alternately, this acknowledgment may appear in the software itself, 078 * if and wherever such third-party acknowledgments normally appear.</li> 079 * <li><strong>WARRANTY DISCLAIMER. THE SOFTWARE IS SUPPLIED "AS IS" 080 * WITHOUT WARRANTY OF ANY KIND. THE COPYRIGHT HOLDER, THE 081 * UNITED STATES, THE UNITED STATES DEPARTMENT OF ENERGY, AND 082 * THEIR EMPLOYEES: (1) DISCLAIM ANY WARRANTIES, EXPRESS OR 083 * IMPLIED, INCLUDING BUT NOT LIMITED TO ANY IMPLIED WARRANTIES 084 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE 085 * OR NON-INFRINGEMENT, (2) DO NOT ASSUME ANY LEGAL LIABILITY 086 * OR RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR 087 * USEFULNESS OF THE SOFTWARE, (3) DO NOT REPRESENT THAT USE OF 088 * THE SOFTWARE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS, (4) 089 * DO NOT WARRANT THAT THE SOFTWARE WILL FUNCTION 090 * UNINTERRUPTED, THAT IT IS ERROR-FREE OR THAT ANY ERRORS WILL 091 * BE CORRECTED.</strong></li> 092 * <li><strong>LIMITATION OF LIABILITY. IN NO EVENT WILL THE COPYRIGHT 093 * HOLDER, THE UNITED STATES, THE UNITED STATES DEPARTMENT OF 094 * ENERGY, OR THEIR EMPLOYEES: BE LIABLE FOR ANY INDIRECT, 095 * INCIDENTAL, CONSEQUENTIAL, SPECIAL OR PUNITIVE DAMAGES OF 096 * ANY KIND OR NATURE, INCLUDING BUT NOT LIMITED TO LOSS OF 097 * PROFITS OR LOSS OF DATA, FOR ANY REASON WHATSOEVER, WHETHER 098 * SUCH LIABILITY IS ASSERTED ON THE BASIS OF CONTRACT, TORT 099 * (INCLUDING NEGLIGENCE OR STRICT LIABILITY), OR OTHERWISE, 100 * EVEN IF ANY OF SAID PARTIES HAS BEEN WARNED OF THE 101 * POSSIBILITY OF SUCH LOSS OR DAMAGES.</strong></li> 102 * <ol></td></tr> 103 * </table> 104 * @deprecated As of 3.1 (to be removed in 4.0). 105 * @since 2.0 106 * 107 */ 108@Deprecated 109public class LevenbergMarquardtOptimizer extends AbstractLeastSquaresOptimizer { 110 /** Number of solved point. */ 111 private int solvedCols; 112 /** Diagonal elements of the R matrix in the Q.R. decomposition. */ 113 private double[] diagR; 114 /** Norms of the columns of the jacobian matrix. */ 115 private double[] jacNorm; 116 /** Coefficients of the Householder transforms vectors. */ 117 private double[] beta; 118 /** Columns permutation array. */ 119 private int[] permutation; 120 /** Rank of the jacobian matrix. */ 121 private int rank; 122 /** Levenberg-Marquardt parameter. */ 123 private double lmPar; 124 /** Parameters evolution direction associated with lmPar. */ 125 private double[] lmDir; 126 /** Positive input variable used in determining the initial step bound. */ 127 private final double initialStepBoundFactor; 128 /** Desired relative error in the sum of squares. */ 129 private final double costRelativeTolerance; 130 /** Desired relative error in the approximate solution parameters. */ 131 private final double parRelativeTolerance; 132 /** Desired max cosine on the orthogonality between the function vector 133 * and the columns of the jacobian. */ 134 private final double orthoTolerance; 135 /** Threshold for QR ranking. */ 136 private final double qrRankingThreshold; 137 /** Weighted residuals. */ 138 private double[] weightedResidual; 139 /** Weighted Jacobian. */ 140 private double[][] weightedJacobian; 141 142 /** 143 * Build an optimizer for least squares problems with default values 144 * for all the tuning parameters (see the {@link 145 * #LevenbergMarquardtOptimizer(double,double,double,double,double) 146 * other contructor}. 147 * The default values for the algorithm settings are: 148 * <ul> 149 * <li>Initial step bound factor: 100</li> 150 * <li>Cost relative tolerance: 1e-10</li> 151 * <li>Parameters relative tolerance: 1e-10</li> 152 * <li>Orthogonality tolerance: 1e-10</li> 153 * <li>QR ranking threshold: {@link Precision#SAFE_MIN}</li> 154 * </ul> 155 */ 156 public LevenbergMarquardtOptimizer() { 157 this(100, 1e-10, 1e-10, 1e-10, Precision.SAFE_MIN); 158 } 159 160 /** 161 * Constructor that allows the specification of a custom convergence 162 * checker. 163 * Note that all the usual convergence checks will be <em>disabled</em>. 164 * The default values for the algorithm settings are: 165 * <ul> 166 * <li>Initial step bound factor: 100</li> 167 * <li>Cost relative tolerance: 1e-10</li> 168 * <li>Parameters relative tolerance: 1e-10</li> 169 * <li>Orthogonality tolerance: 1e-10</li> 170 * <li>QR ranking threshold: {@link Precision#SAFE_MIN}</li> 171 * </ul> 172 * 173 * @param checker Convergence checker. 174 */ 175 public LevenbergMarquardtOptimizer(ConvergenceChecker<PointVectorValuePair> checker) { 176 this(100, checker, 1e-10, 1e-10, 1e-10, Precision.SAFE_MIN); 177 } 178 179 /** 180 * Constructor that allows the specification of a custom convergence 181 * checker, in addition to the standard ones. 182 * 183 * @param initialStepBoundFactor Positive input variable used in 184 * determining the initial step bound. This bound is set to the 185 * product of initialStepBoundFactor and the euclidean norm of 186 * {@code diag * x} if non-zero, or else to {@code initialStepBoundFactor} 187 * itself. In most cases factor should lie in the interval 188 * {@code (0.1, 100.0)}. {@code 100} is a generally recommended value. 189 * @param checker Convergence checker. 190 * @param costRelativeTolerance Desired relative error in the sum of 191 * squares. 192 * @param parRelativeTolerance Desired relative error in the approximate 193 * solution parameters. 194 * @param orthoTolerance Desired max cosine on the orthogonality between 195 * the function vector and the columns of the Jacobian. 196 * @param threshold Desired threshold for QR ranking. If the squared norm 197 * of a column vector is smaller or equal to this threshold during QR 198 * decomposition, it is considered to be a zero vector and hence the rank 199 * of the matrix is reduced. 200 */ 201 public LevenbergMarquardtOptimizer(double initialStepBoundFactor, 202 ConvergenceChecker<PointVectorValuePair> checker, 203 double costRelativeTolerance, 204 double parRelativeTolerance, 205 double orthoTolerance, 206 double threshold) { 207 super(checker); 208 this.initialStepBoundFactor = initialStepBoundFactor; 209 this.costRelativeTolerance = costRelativeTolerance; 210 this.parRelativeTolerance = parRelativeTolerance; 211 this.orthoTolerance = orthoTolerance; 212 this.qrRankingThreshold = threshold; 213 } 214 215 /** 216 * Build an optimizer for least squares problems with default values 217 * for some of the tuning parameters (see the {@link 218 * #LevenbergMarquardtOptimizer(double,double,double,double,double) 219 * other contructor}. 220 * The default values for the algorithm settings are: 221 * <ul> 222 * <li>Initial step bound factor}: 100</li> 223 * <li>QR ranking threshold}: {@link Precision#SAFE_MIN}</li> 224 * </ul> 225 * 226 * @param costRelativeTolerance Desired relative error in the sum of 227 * squares. 228 * @param parRelativeTolerance Desired relative error in the approximate 229 * solution parameters. 230 * @param orthoTolerance Desired max cosine on the orthogonality between 231 * the function vector and the columns of the Jacobian. 232 */ 233 public LevenbergMarquardtOptimizer(double costRelativeTolerance, 234 double parRelativeTolerance, 235 double orthoTolerance) { 236 this(100, 237 costRelativeTolerance, parRelativeTolerance, orthoTolerance, 238 Precision.SAFE_MIN); 239 } 240 241 /** 242 * The arguments control the behaviour of the default convergence checking 243 * procedure. 244 * Additional criteria can defined through the setting of a {@link 245 * ConvergenceChecker}. 246 * 247 * @param initialStepBoundFactor Positive input variable used in 248 * determining the initial step bound. This bound is set to the 249 * product of initialStepBoundFactor and the euclidean norm of 250 * {@code diag * x} if non-zero, or else to {@code initialStepBoundFactor} 251 * itself. In most cases factor should lie in the interval 252 * {@code (0.1, 100.0)}. {@code 100} is a generally recommended value. 253 * @param costRelativeTolerance Desired relative error in the sum of 254 * squares. 255 * @param parRelativeTolerance Desired relative error in the approximate 256 * solution parameters. 257 * @param orthoTolerance Desired max cosine on the orthogonality between 258 * the function vector and the columns of the Jacobian. 259 * @param threshold Desired threshold for QR ranking. If the squared norm 260 * of a column vector is smaller or equal to this threshold during QR 261 * decomposition, it is considered to be a zero vector and hence the rank 262 * of the matrix is reduced. 263 */ 264 public LevenbergMarquardtOptimizer(double initialStepBoundFactor, 265 double costRelativeTolerance, 266 double parRelativeTolerance, 267 double orthoTolerance, 268 double threshold) { 269 super(null); // No custom convergence criterion. 270 this.initialStepBoundFactor = initialStepBoundFactor; 271 this.costRelativeTolerance = costRelativeTolerance; 272 this.parRelativeTolerance = parRelativeTolerance; 273 this.orthoTolerance = orthoTolerance; 274 this.qrRankingThreshold = threshold; 275 } 276 277 /** {@inheritDoc} */ 278 @Override 279 protected PointVectorValuePair doOptimize() { 280 final int nR = getTarget().length; // Number of observed data. 281 final double[] currentPoint = getStartPoint(); 282 final int nC = currentPoint.length; // Number of parameters. 283 284 // arrays shared with the other private methods 285 solvedCols = FastMath.min(nR, nC); 286 diagR = new double[nC]; 287 jacNorm = new double[nC]; 288 beta = new double[nC]; 289 permutation = new int[nC]; 290 lmDir = new double[nC]; 291 292 // local point 293 double delta = 0; 294 double xNorm = 0; 295 double[] diag = new double[nC]; 296 double[] oldX = new double[nC]; 297 double[] oldRes = new double[nR]; 298 double[] oldObj = new double[nR]; 299 double[] qtf = new double[nR]; 300 double[] work1 = new double[nC]; 301 double[] work2 = new double[nC]; 302 double[] work3 = new double[nC]; 303 304 final RealMatrix weightMatrixSqrt = getWeightSquareRoot(); 305 306 // Evaluate the function at the starting point and calculate its norm. 307 double[] currentObjective = computeObjectiveValue(currentPoint); 308 double[] currentResiduals = computeResiduals(currentObjective); 309 PointVectorValuePair current = new PointVectorValuePair(currentPoint, currentObjective); 310 double currentCost = computeCost(currentResiduals); 311 312 // Outer loop. 313 lmPar = 0; 314 boolean firstIteration = true; 315 int iter = 0; 316 final ConvergenceChecker<PointVectorValuePair> checker = getConvergenceChecker(); 317 while (true) { 318 ++iter; 319 final PointVectorValuePair previous = current; 320 321 // QR decomposition of the jacobian matrix 322 qrDecomposition(computeWeightedJacobian(currentPoint)); 323 324 weightedResidual = weightMatrixSqrt.operate(currentResiduals); 325 for (int i = 0; i < nR; i++) { 326 qtf[i] = weightedResidual[i]; 327 } 328 329 // compute Qt.res 330 qTy(qtf); 331 332 // now we don't need Q anymore, 333 // so let jacobian contain the R matrix with its diagonal elements 334 for (int k = 0; k < solvedCols; ++k) { 335 int pk = permutation[k]; 336 weightedJacobian[k][pk] = diagR[pk]; 337 } 338 339 if (firstIteration) { 340 // scale the point according to the norms of the columns 341 // of the initial jacobian 342 xNorm = 0; 343 for (int k = 0; k < nC; ++k) { 344 double dk = jacNorm[k]; 345 if (dk == 0) { 346 dk = 1.0; 347 } 348 double xk = dk * currentPoint[k]; 349 xNorm += xk * xk; 350 diag[k] = dk; 351 } 352 xNorm = FastMath.sqrt(xNorm); 353 354 // initialize the step bound delta 355 delta = (xNorm == 0) ? initialStepBoundFactor : (initialStepBoundFactor * xNorm); 356 } 357 358 // check orthogonality between function vector and jacobian columns 359 double maxCosine = 0; 360 if (currentCost != 0) { 361 for (int j = 0; j < solvedCols; ++j) { 362 int pj = permutation[j]; 363 double s = jacNorm[pj]; 364 if (s != 0) { 365 double sum = 0; 366 for (int i = 0; i <= j; ++i) { 367 sum += weightedJacobian[i][pj] * qtf[i]; 368 } 369 maxCosine = FastMath.max(maxCosine, FastMath.abs(sum) / (s * currentCost)); 370 } 371 } 372 } 373 if (maxCosine <= orthoTolerance) { 374 // Convergence has been reached. 375 setCost(currentCost); 376 // Update (deprecated) "point" field. 377 point = current.getPoint(); 378 return current; 379 } 380 381 // rescale if necessary 382 for (int j = 0; j < nC; ++j) { 383 diag[j] = FastMath.max(diag[j], jacNorm[j]); 384 } 385 386 // Inner loop. 387 for (double ratio = 0; ratio < 1.0e-4;) { 388 389 // save the state 390 for (int j = 0; j < solvedCols; ++j) { 391 int pj = permutation[j]; 392 oldX[pj] = currentPoint[pj]; 393 } 394 final double previousCost = currentCost; 395 double[] tmpVec = weightedResidual; 396 weightedResidual = oldRes; 397 oldRes = tmpVec; 398 tmpVec = currentObjective; 399 currentObjective = oldObj; 400 oldObj = tmpVec; 401 402 // determine the Levenberg-Marquardt parameter 403 determineLMParameter(qtf, delta, diag, work1, work2, work3); 404 405 // compute the new point and the norm of the evolution direction 406 double lmNorm = 0; 407 for (int j = 0; j < solvedCols; ++j) { 408 int pj = permutation[j]; 409 lmDir[pj] = -lmDir[pj]; 410 currentPoint[pj] = oldX[pj] + lmDir[pj]; 411 double s = diag[pj] * lmDir[pj]; 412 lmNorm += s * s; 413 } 414 lmNorm = FastMath.sqrt(lmNorm); 415 // on the first iteration, adjust the initial step bound. 416 if (firstIteration) { 417 delta = FastMath.min(delta, lmNorm); 418 } 419 420 // Evaluate the function at x + p and calculate its norm. 421 currentObjective = computeObjectiveValue(currentPoint); 422 currentResiduals = computeResiduals(currentObjective); 423 current = new PointVectorValuePair(currentPoint, currentObjective); 424 currentCost = computeCost(currentResiduals); 425 426 // compute the scaled actual reduction 427 double actRed = -1.0; 428 if (0.1 * currentCost < previousCost) { 429 double r = currentCost / previousCost; 430 actRed = 1.0 - r * r; 431 } 432 433 // compute the scaled predicted reduction 434 // and the scaled directional derivative 435 for (int j = 0; j < solvedCols; ++j) { 436 int pj = permutation[j]; 437 double dirJ = lmDir[pj]; 438 work1[j] = 0; 439 for (int i = 0; i <= j; ++i) { 440 work1[i] += weightedJacobian[i][pj] * dirJ; 441 } 442 } 443 double coeff1 = 0; 444 for (int j = 0; j < solvedCols; ++j) { 445 coeff1 += work1[j] * work1[j]; 446 } 447 double pc2 = previousCost * previousCost; 448 coeff1 /= pc2; 449 double coeff2 = lmPar * lmNorm * lmNorm / pc2; 450 double preRed = coeff1 + 2 * coeff2; 451 double dirDer = -(coeff1 + coeff2); 452 453 // ratio of the actual to the predicted reduction 454 ratio = (preRed == 0) ? 0 : (actRed / preRed); 455 456 // update the step bound 457 if (ratio <= 0.25) { 458 double tmp = 459 (actRed < 0) ? (0.5 * dirDer / (dirDer + 0.5 * actRed)) : 0.5; 460 if ((0.1 * currentCost >= previousCost) || (tmp < 0.1)) { 461 tmp = 0.1; 462 } 463 delta = tmp * FastMath.min(delta, 10.0 * lmNorm); 464 lmPar /= tmp; 465 } else if ((lmPar == 0) || (ratio >= 0.75)) { 466 delta = 2 * lmNorm; 467 lmPar *= 0.5; 468 } 469 470 // test for successful iteration. 471 if (ratio >= 1.0e-4) { 472 // successful iteration, update the norm 473 firstIteration = false; 474 xNorm = 0; 475 for (int k = 0; k < nC; ++k) { 476 double xK = diag[k] * currentPoint[k]; 477 xNorm += xK * xK; 478 } 479 xNorm = FastMath.sqrt(xNorm); 480 481 // tests for convergence. 482 if (checker != null && checker.converged(iter, previous, current)) { 483 setCost(currentCost); 484 // Update (deprecated) "point" field. 485 point = current.getPoint(); 486 return current; 487 } 488 } else { 489 // failed iteration, reset the previous values 490 currentCost = previousCost; 491 for (int j = 0; j < solvedCols; ++j) { 492 int pj = permutation[j]; 493 currentPoint[pj] = oldX[pj]; 494 } 495 tmpVec = weightedResidual; 496 weightedResidual = oldRes; 497 oldRes = tmpVec; 498 tmpVec = currentObjective; 499 currentObjective = oldObj; 500 oldObj = tmpVec; 501 // Reset "current" to previous values. 502 current = new PointVectorValuePair(currentPoint, currentObjective); 503 } 504 505 // Default convergence criteria. 506 if ((FastMath.abs(actRed) <= costRelativeTolerance && 507 preRed <= costRelativeTolerance && 508 ratio <= 2.0) || 509 delta <= parRelativeTolerance * xNorm) { 510 setCost(currentCost); 511 // Update (deprecated) "point" field. 512 point = current.getPoint(); 513 return current; 514 } 515 516 // tests for termination and stringent tolerances 517 // (2.2204e-16 is the machine epsilon for IEEE754) 518 if ((FastMath.abs(actRed) <= 2.2204e-16) && (preRed <= 2.2204e-16) && (ratio <= 2.0)) { 519 throw new ConvergenceException(LocalizedFormats.TOO_SMALL_COST_RELATIVE_TOLERANCE, 520 costRelativeTolerance); 521 } else if (delta <= 2.2204e-16 * xNorm) { 522 throw new ConvergenceException(LocalizedFormats.TOO_SMALL_PARAMETERS_RELATIVE_TOLERANCE, 523 parRelativeTolerance); 524 } else if (maxCosine <= 2.2204e-16) { 525 throw new ConvergenceException(LocalizedFormats.TOO_SMALL_ORTHOGONALITY_TOLERANCE, 526 orthoTolerance); 527 } 528 } 529 } 530 } 531 532 /** 533 * Determine the Levenberg-Marquardt parameter. 534 * <p>This implementation is a translation in Java of the MINPACK 535 * <a href="http://www.netlib.org/minpack/lmpar.f">lmpar</a> 536 * routine.</p> 537 * <p>This method sets the lmPar and lmDir attributes.</p> 538 * <p>The authors of the original fortran function are:</p> 539 * <ul> 540 * <li>Argonne National Laboratory. MINPACK project. March 1980</li> 541 * <li>Burton S. Garbow</li> 542 * <li>Kenneth E. Hillstrom</li> 543 * <li>Jorge J. More</li> 544 * </ul> 545 * <p>Luc Maisonobe did the Java translation.</p> 546 * 547 * @param qy array containing qTy 548 * @param delta upper bound on the euclidean norm of diagR * lmDir 549 * @param diag diagonal matrix 550 * @param work1 work array 551 * @param work2 work array 552 * @param work3 work array 553 */ 554 private void determineLMParameter(double[] qy, double delta, double[] diag, 555 double[] work1, double[] work2, double[] work3) { 556 final int nC = weightedJacobian[0].length; 557 558 // compute and store in x the gauss-newton direction, if the 559 // jacobian is rank-deficient, obtain a least squares solution 560 for (int j = 0; j < rank; ++j) { 561 lmDir[permutation[j]] = qy[j]; 562 } 563 for (int j = rank; j < nC; ++j) { 564 lmDir[permutation[j]] = 0; 565 } 566 for (int k = rank - 1; k >= 0; --k) { 567 int pk = permutation[k]; 568 double ypk = lmDir[pk] / diagR[pk]; 569 for (int i = 0; i < k; ++i) { 570 lmDir[permutation[i]] -= ypk * weightedJacobian[i][pk]; 571 } 572 lmDir[pk] = ypk; 573 } 574 575 // evaluate the function at the origin, and test 576 // for acceptance of the Gauss-Newton direction 577 double dxNorm = 0; 578 for (int j = 0; j < solvedCols; ++j) { 579 int pj = permutation[j]; 580 double s = diag[pj] * lmDir[pj]; 581 work1[pj] = s; 582 dxNorm += s * s; 583 } 584 dxNorm = FastMath.sqrt(dxNorm); 585 double fp = dxNorm - delta; 586 if (fp <= 0.1 * delta) { 587 lmPar = 0; 588 return; 589 } 590 591 // if the jacobian is not rank deficient, the Newton step provides 592 // a lower bound, parl, for the zero of the function, 593 // otherwise set this bound to zero 594 double sum2; 595 double parl = 0; 596 if (rank == solvedCols) { 597 for (int j = 0; j < solvedCols; ++j) { 598 int pj = permutation[j]; 599 work1[pj] *= diag[pj] / dxNorm; 600 } 601 sum2 = 0; 602 for (int j = 0; j < solvedCols; ++j) { 603 int pj = permutation[j]; 604 double sum = 0; 605 for (int i = 0; i < j; ++i) { 606 sum += weightedJacobian[i][pj] * work1[permutation[i]]; 607 } 608 double s = (work1[pj] - sum) / diagR[pj]; 609 work1[pj] = s; 610 sum2 += s * s; 611 } 612 parl = fp / (delta * sum2); 613 } 614 615 // calculate an upper bound, paru, for the zero of the function 616 sum2 = 0; 617 for (int j = 0; j < solvedCols; ++j) { 618 int pj = permutation[j]; 619 double sum = 0; 620 for (int i = 0; i <= j; ++i) { 621 sum += weightedJacobian[i][pj] * qy[i]; 622 } 623 sum /= diag[pj]; 624 sum2 += sum * sum; 625 } 626 double gNorm = FastMath.sqrt(sum2); 627 double paru = gNorm / delta; 628 if (paru == 0) { 629 // 2.2251e-308 is the smallest positive real for IEE754 630 paru = 2.2251e-308 / FastMath.min(delta, 0.1); 631 } 632 633 // if the input par lies outside of the interval (parl,paru), 634 // set par to the closer endpoint 635 lmPar = FastMath.min(paru, FastMath.max(lmPar, parl)); 636 if (lmPar == 0) { 637 lmPar = gNorm / dxNorm; 638 } 639 640 for (int countdown = 10; countdown >= 0; --countdown) { 641 642 // evaluate the function at the current value of lmPar 643 if (lmPar == 0) { 644 lmPar = FastMath.max(2.2251e-308, 0.001 * paru); 645 } 646 double sPar = FastMath.sqrt(lmPar); 647 for (int j = 0; j < solvedCols; ++j) { 648 int pj = permutation[j]; 649 work1[pj] = sPar * diag[pj]; 650 } 651 determineLMDirection(qy, work1, work2, work3); 652 653 dxNorm = 0; 654 for (int j = 0; j < solvedCols; ++j) { 655 int pj = permutation[j]; 656 double s = diag[pj] * lmDir[pj]; 657 work3[pj] = s; 658 dxNorm += s * s; 659 } 660 dxNorm = FastMath.sqrt(dxNorm); 661 double previousFP = fp; 662 fp = dxNorm - delta; 663 664 // if the function is small enough, accept the current value 665 // of lmPar, also test for the exceptional cases where parl is zero 666 if ((FastMath.abs(fp) <= 0.1 * delta) || 667 ((parl == 0) && (fp <= previousFP) && (previousFP < 0))) { 668 return; 669 } 670 671 // compute the Newton correction 672 for (int j = 0; j < solvedCols; ++j) { 673 int pj = permutation[j]; 674 work1[pj] = work3[pj] * diag[pj] / dxNorm; 675 } 676 for (int j = 0; j < solvedCols; ++j) { 677 int pj = permutation[j]; 678 work1[pj] /= work2[j]; 679 double tmp = work1[pj]; 680 for (int i = j + 1; i < solvedCols; ++i) { 681 work1[permutation[i]] -= weightedJacobian[i][pj] * tmp; 682 } 683 } 684 sum2 = 0; 685 for (int j = 0; j < solvedCols; ++j) { 686 double s = work1[permutation[j]]; 687 sum2 += s * s; 688 } 689 double correction = fp / (delta * sum2); 690 691 // depending on the sign of the function, update parl or paru. 692 if (fp > 0) { 693 parl = FastMath.max(parl, lmPar); 694 } else if (fp < 0) { 695 paru = FastMath.min(paru, lmPar); 696 } 697 698 // compute an improved estimate for lmPar 699 lmPar = FastMath.max(parl, lmPar + correction); 700 701 } 702 } 703 704 /** 705 * Solve a*x = b and d*x = 0 in the least squares sense. 706 * <p>This implementation is a translation in Java of the MINPACK 707 * <a href="http://www.netlib.org/minpack/qrsolv.f">qrsolv</a> 708 * routine.</p> 709 * <p>This method sets the lmDir and lmDiag attributes.</p> 710 * <p>The authors of the original fortran function are:</p> 711 * <ul> 712 * <li>Argonne National Laboratory. MINPACK project. March 1980</li> 713 * <li>Burton S. Garbow</li> 714 * <li>Kenneth E. Hillstrom</li> 715 * <li>Jorge J. More</li> 716 * </ul> 717 * <p>Luc Maisonobe did the Java translation.</p> 718 * 719 * @param qy array containing qTy 720 * @param diag diagonal matrix 721 * @param lmDiag diagonal elements associated with lmDir 722 * @param work work array 723 */ 724 private void determineLMDirection(double[] qy, double[] diag, 725 double[] lmDiag, double[] work) { 726 727 // copy R and Qty to preserve input and initialize s 728 // in particular, save the diagonal elements of R in lmDir 729 for (int j = 0; j < solvedCols; ++j) { 730 int pj = permutation[j]; 731 for (int i = j + 1; i < solvedCols; ++i) { 732 weightedJacobian[i][pj] = weightedJacobian[j][permutation[i]]; 733 } 734 lmDir[j] = diagR[pj]; 735 work[j] = qy[j]; 736 } 737 738 // eliminate the diagonal matrix d using a Givens rotation 739 for (int j = 0; j < solvedCols; ++j) { 740 741 // prepare the row of d to be eliminated, locating the 742 // diagonal element using p from the Q.R. factorization 743 int pj = permutation[j]; 744 double dpj = diag[pj]; 745 if (dpj != 0) { 746 Arrays.fill(lmDiag, j + 1, lmDiag.length, 0); 747 } 748 lmDiag[j] = dpj; 749 750 // the transformations to eliminate the row of d 751 // modify only a single element of Qty 752 // beyond the first n, which is initially zero. 753 double qtbpj = 0; 754 for (int k = j; k < solvedCols; ++k) { 755 int pk = permutation[k]; 756 757 // determine a Givens rotation which eliminates the 758 // appropriate element in the current row of d 759 if (lmDiag[k] != 0) { 760 761 final double sin; 762 final double cos; 763 double rkk = weightedJacobian[k][pk]; 764 if (FastMath.abs(rkk) < FastMath.abs(lmDiag[k])) { 765 final double cotan = rkk / lmDiag[k]; 766 sin = 1.0 / FastMath.sqrt(1.0 + cotan * cotan); 767 cos = sin * cotan; 768 } else { 769 final double tan = lmDiag[k] / rkk; 770 cos = 1.0 / FastMath.sqrt(1.0 + tan * tan); 771 sin = cos * tan; 772 } 773 774 // compute the modified diagonal element of R and 775 // the modified element of (Qty,0) 776 weightedJacobian[k][pk] = cos * rkk + sin * lmDiag[k]; 777 final double temp = cos * work[k] + sin * qtbpj; 778 qtbpj = -sin * work[k] + cos * qtbpj; 779 work[k] = temp; 780 781 // accumulate the tranformation in the row of s 782 for (int i = k + 1; i < solvedCols; ++i) { 783 double rik = weightedJacobian[i][pk]; 784 final double temp2 = cos * rik + sin * lmDiag[i]; 785 lmDiag[i] = -sin * rik + cos * lmDiag[i]; 786 weightedJacobian[i][pk] = temp2; 787 } 788 } 789 } 790 791 // store the diagonal element of s and restore 792 // the corresponding diagonal element of R 793 lmDiag[j] = weightedJacobian[j][permutation[j]]; 794 weightedJacobian[j][permutation[j]] = lmDir[j]; 795 } 796 797 // solve the triangular system for z, if the system is 798 // singular, then obtain a least squares solution 799 int nSing = solvedCols; 800 for (int j = 0; j < solvedCols; ++j) { 801 if ((lmDiag[j] == 0) && (nSing == solvedCols)) { 802 nSing = j; 803 } 804 if (nSing < solvedCols) { 805 work[j] = 0; 806 } 807 } 808 if (nSing > 0) { 809 for (int j = nSing - 1; j >= 0; --j) { 810 int pj = permutation[j]; 811 double sum = 0; 812 for (int i = j + 1; i < nSing; ++i) { 813 sum += weightedJacobian[i][pj] * work[i]; 814 } 815 work[j] = (work[j] - sum) / lmDiag[j]; 816 } 817 } 818 819 // permute the components of z back to components of lmDir 820 for (int j = 0; j < lmDir.length; ++j) { 821 lmDir[permutation[j]] = work[j]; 822 } 823 } 824 825 /** 826 * Decompose a matrix A as A.P = Q.R using Householder transforms. 827 * <p>As suggested in the P. Lascaux and R. Theodor book 828 * <i>Analyse numérique matricielle appliquée à 829 * l'art de l'ingénieur</i> (Masson, 1986), instead of representing 830 * the Householder transforms with u<sub>k</sub> unit vectors such that: 831 * <pre> 832 * H<sub>k</sub> = I - 2u<sub>k</sub>.u<sub>k</sub><sup>t</sup> 833 * </pre> 834 * we use <sub>k</sub> non-unit vectors such that: 835 * <pre> 836 * H<sub>k</sub> = I - beta<sub>k</sub>v<sub>k</sub>.v<sub>k</sub><sup>t</sup> 837 * </pre> 838 * where v<sub>k</sub> = a<sub>k</sub> - alpha<sub>k</sub> e<sub>k</sub>. 839 * The beta<sub>k</sub> coefficients are provided upon exit as recomputing 840 * them from the v<sub>k</sub> vectors would be costly.</p> 841 * <p>This decomposition handles rank deficient cases since the tranformations 842 * are performed in non-increasing columns norms order thanks to columns 843 * pivoting. The diagonal elements of the R matrix are therefore also in 844 * non-increasing absolute values order.</p> 845 * 846 * @param jacobian Weighted Jacobian matrix at the current point. 847 * @exception ConvergenceException if the decomposition cannot be performed 848 */ 849 private void qrDecomposition(RealMatrix jacobian) throws ConvergenceException { 850 // Code in this class assumes that the weighted Jacobian is -(W^(1/2) J), 851 // hence the multiplication by -1. 852 weightedJacobian = jacobian.scalarMultiply(-1).getData(); 853 854 final int nR = weightedJacobian.length; 855 final int nC = weightedJacobian[0].length; 856 857 // initializations 858 for (int k = 0; k < nC; ++k) { 859 permutation[k] = k; 860 double norm2 = 0; 861 for (int i = 0; i < nR; ++i) { 862 double akk = weightedJacobian[i][k]; 863 norm2 += akk * akk; 864 } 865 jacNorm[k] = FastMath.sqrt(norm2); 866 } 867 868 // transform the matrix column after column 869 for (int k = 0; k < nC; ++k) { 870 871 // select the column with the greatest norm on active components 872 int nextColumn = -1; 873 double ak2 = Double.NEGATIVE_INFINITY; 874 for (int i = k; i < nC; ++i) { 875 double norm2 = 0; 876 for (int j = k; j < nR; ++j) { 877 double aki = weightedJacobian[j][permutation[i]]; 878 norm2 += aki * aki; 879 } 880 if (Double.isInfinite(norm2) || Double.isNaN(norm2)) { 881 throw new ConvergenceException(LocalizedFormats.UNABLE_TO_PERFORM_QR_DECOMPOSITION_ON_JACOBIAN, 882 nR, nC); 883 } 884 if (norm2 > ak2) { 885 nextColumn = i; 886 ak2 = norm2; 887 } 888 } 889 if (ak2 <= qrRankingThreshold) { 890 rank = k; 891 return; 892 } 893 int pk = permutation[nextColumn]; 894 permutation[nextColumn] = permutation[k]; 895 permutation[k] = pk; 896 897 // choose alpha such that Hk.u = alpha ek 898 double akk = weightedJacobian[k][pk]; 899 double alpha = (akk > 0) ? -FastMath.sqrt(ak2) : FastMath.sqrt(ak2); 900 double betak = 1.0 / (ak2 - akk * alpha); 901 beta[pk] = betak; 902 903 // transform the current column 904 diagR[pk] = alpha; 905 weightedJacobian[k][pk] -= alpha; 906 907 // transform the remaining columns 908 for (int dk = nC - 1 - k; dk > 0; --dk) { 909 double gamma = 0; 910 for (int j = k; j < nR; ++j) { 911 gamma += weightedJacobian[j][pk] * weightedJacobian[j][permutation[k + dk]]; 912 } 913 gamma *= betak; 914 for (int j = k; j < nR; ++j) { 915 weightedJacobian[j][permutation[k + dk]] -= gamma * weightedJacobian[j][pk]; 916 } 917 } 918 } 919 rank = solvedCols; 920 } 921 922 /** 923 * Compute the product Qt.y for some Q.R. decomposition. 924 * 925 * @param y vector to multiply (will be overwritten with the result) 926 */ 927 private void qTy(double[] y) { 928 final int nR = weightedJacobian.length; 929 final int nC = weightedJacobian[0].length; 930 931 for (int k = 0; k < nC; ++k) { 932 int pk = permutation[k]; 933 double gamma = 0; 934 for (int i = k; i < nR; ++i) { 935 gamma += weightedJacobian[i][pk] * y[i]; 936 } 937 gamma *= beta[pk]; 938 for (int i = k; i < nR; ++i) { 939 y[i] -= gamma * weightedJacobian[i][pk]; 940 } 941 } 942 } 943}