001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.math4.legacy.fitting.leastsquares; 018 019import java.util.Arrays; 020 021import org.apache.commons.math4.legacy.exception.ConvergenceException; 022import org.apache.commons.math4.legacy.exception.util.LocalizedFormats; 023import org.apache.commons.math4.legacy.fitting.leastsquares.LeastSquaresProblem.Evaluation; 024import org.apache.commons.math4.legacy.linear.ArrayRealVector; 025import org.apache.commons.math4.legacy.linear.RealMatrix; 026import org.apache.commons.math4.legacy.optim.ConvergenceChecker; 027import org.apache.commons.math4.core.jdkmath.JdkMath; 028import org.apache.commons.math4.legacy.core.IntegerSequence; 029import org.apache.commons.numbers.core.Precision; 030 031 032/** 033 * This class solves a least-squares problem using the Levenberg-Marquardt 034 * algorithm. 035 * 036 * <p>This implementation <em>should</em> work even for over-determined systems 037 * (i.e. systems having more point than equations). Over-determined systems 038 * are solved by ignoring the point which have the smallest impact according 039 * to their jacobian column norm. Only the rank of the matrix and some loop bounds 040 * are changed to implement this.</p> 041 * 042 * <p>The resolution engine is a simple translation of the MINPACK <a 043 * href="http://www.netlib.org/minpack/lmder.f">lmder</a> routine with minor 044 * changes. The changes include the over-determined resolution, the use of 045 * inherited convergence checker and the Q.R. decomposition which has been 046 * rewritten following the algorithm described in the 047 * P. Lascaux and R. Theodor book <i>Analyse numérique matricielle 048 * appliquée à l'art de l'ingénieur</i>, Masson 1986.</p> 049 * <p>The authors of the original fortran version are: 050 * <ul> 051 * <li>Argonne National Laboratory. MINPACK project. March 1980</li> 052 * <li>Burton S. Garbow</li> 053 * <li>Kenneth E. Hillstrom</li> 054 * <li>Jorge J. More</li> 055 * </ul> 056 * The redistribution policy for MINPACK is available <a 057 * href="http://www.netlib.org/minpack/disclaimer">here</a>, for convenience, it 058 * is reproduced below. 059 * 060 * <table style="text-align: center; background-color: #E0E0E0" border=""> 061 * <caption>MINPACK redistribution policy</caption> 062 * <tr><td> 063 * Minpack Copyright Notice (1999) University of Chicago. 064 * All rights reserved 065 * </td></tr> 066 * <tr><td> 067 * Redistribution and use in source and binary forms, with or without 068 * modification, are permitted provided that the following conditions 069 * are met: 070 * <ol> 071 * <li>Redistributions of source code must retain the above copyright 072 * notice, this list of conditions and the following disclaimer.</li> 073 * <li>Redistributions in binary form must reproduce the above 074 * copyright notice, this list of conditions and the following 075 * disclaimer in the documentation and/or other materials provided 076 * with the distribution.</li> 077 * <li>The end-user documentation included with the redistribution, if any, 078 * must include the following acknowledgment: 079 * <code>This product includes software developed by the University of 080 * Chicago, as Operator of Argonne National Laboratory.</code> 081 * Alternately, this acknowledgment may appear in the software itself, 082 * if and wherever such third-party acknowledgments normally appear.</li> 083 * <li><strong>WARRANTY DISCLAIMER. THE SOFTWARE IS SUPPLIED "AS IS" 084 * WITHOUT WARRANTY OF ANY KIND. THE COPYRIGHT HOLDER, THE 085 * UNITED STATES, THE UNITED STATES DEPARTMENT OF ENERGY, AND 086 * THEIR EMPLOYEES: (1) DISCLAIM ANY WARRANTIES, EXPRESS OR 087 * IMPLIED, INCLUDING BUT NOT LIMITED TO ANY IMPLIED WARRANTIES 088 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE 089 * OR NON-INFRINGEMENT, (2) DO NOT ASSUME ANY LEGAL LIABILITY 090 * OR RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR 091 * USEFULNESS OF THE SOFTWARE, (3) DO NOT REPRESENT THAT USE OF 092 * THE SOFTWARE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS, (4) 093 * DO NOT WARRANT THAT THE SOFTWARE WILL FUNCTION 094 * UNINTERRUPTED, THAT IT IS ERROR-FREE OR THAT ANY ERRORS WILL 095 * BE CORRECTED.</strong></li> 096 * <li><strong>LIMITATION OF LIABILITY. IN NO EVENT WILL THE COPYRIGHT 097 * HOLDER, THE UNITED STATES, THE UNITED STATES DEPARTMENT OF 098 * ENERGY, OR THEIR EMPLOYEES: BE LIABLE FOR ANY INDIRECT, 099 * INCIDENTAL, CONSEQUENTIAL, SPECIAL OR PUNITIVE DAMAGES OF 100 * ANY KIND OR NATURE, INCLUDING BUT NOT LIMITED TO LOSS OF 101 * PROFITS OR LOSS OF DATA, FOR ANY REASON WHATSOEVER, WHETHER 102 * SUCH LIABILITY IS ASSERTED ON THE BASIS OF CONTRACT, TORT 103 * (INCLUDING NEGLIGENCE OR STRICT LIABILITY), OR OTHERWISE, 104 * EVEN IF ANY OF SAID PARTIES HAS BEEN WARNED OF THE 105 * POSSIBILITY OF SUCH LOSS OR DAMAGES.</strong></li> 106 * </ol></td></tr> 107 * </table> 108 * 109 * @since 3.3 110 */ 111public class LevenbergMarquardtOptimizer implements LeastSquaresOptimizer { 112 113 /** Twice the "epsilon machine". */ 114 private static final double TWO_EPS = 2 * Precision.EPSILON; 115 116 /* configuration parameters */ 117 /** Positive input variable used in determining the initial step bound. */ 118 private final double initialStepBoundFactor; 119 /** Desired relative error in the sum of squares. */ 120 private final double costRelativeTolerance; 121 /** Desired relative error in the approximate solution parameters. */ 122 private final double parRelativeTolerance; 123 /** Desired max cosine on the orthogonality between the function vector 124 * and the columns of the jacobian. */ 125 private final double orthoTolerance; 126 /** Threshold for QR ranking. */ 127 private final double qrRankingThreshold; 128 129 /** Default constructor. 130 * <p> 131 * The default values for the algorithm settings are: 132 * <ul> 133 * <li>Initial step bound factor: 100</li> 134 * <li>Cost relative tolerance: 1e-10</li> 135 * <li>Parameters relative tolerance: 1e-10</li> 136 * <li>Orthogonality tolerance: 1e-10</li> 137 * <li>QR ranking threshold: {@link Precision#SAFE_MIN}</li> 138 * </ul> 139 **/ 140 public LevenbergMarquardtOptimizer() { 141 this(100, 1e-10, 1e-10, 1e-10, Precision.SAFE_MIN); 142 } 143 144 /** 145 * Construct an instance with all parameters specified. 146 * 147 * @param initialStepBoundFactor initial step bound factor 148 * @param costRelativeTolerance cost relative tolerance 149 * @param parRelativeTolerance parameters relative tolerance 150 * @param orthoTolerance orthogonality tolerance 151 * @param qrRankingThreshold threshold in the QR decomposition. Columns with a 2 152 * norm less than this threshold are considered to be 153 * all 0s. 154 */ 155 public LevenbergMarquardtOptimizer( 156 final double initialStepBoundFactor, 157 final double costRelativeTolerance, 158 final double parRelativeTolerance, 159 final double orthoTolerance, 160 final double qrRankingThreshold) { 161 this.initialStepBoundFactor = initialStepBoundFactor; 162 this.costRelativeTolerance = costRelativeTolerance; 163 this.parRelativeTolerance = parRelativeTolerance; 164 this.orthoTolerance = orthoTolerance; 165 this.qrRankingThreshold = qrRankingThreshold; 166 } 167 168 /** 169 * @param newInitialStepBoundFactor Positive input variable used in 170 * determining the initial step bound. This bound is set to the 171 * product of initialStepBoundFactor and the euclidean norm of 172 * {@code diag * x} if non-zero, or else to {@code newInitialStepBoundFactor} 173 * itself. In most cases factor should lie in the interval 174 * {@code (0.1, 100.0)}. {@code 100} is a generally recommended value. 175 * of the matrix is reduced. 176 * @return a new instance. 177 */ 178 public LevenbergMarquardtOptimizer withInitialStepBoundFactor(double newInitialStepBoundFactor) { 179 return new LevenbergMarquardtOptimizer( 180 newInitialStepBoundFactor, 181 costRelativeTolerance, 182 parRelativeTolerance, 183 orthoTolerance, 184 qrRankingThreshold); 185 } 186 187 /** 188 * @param newCostRelativeTolerance Desired relative error in the sum of squares. 189 * @return a new instance. 190 */ 191 public LevenbergMarquardtOptimizer withCostRelativeTolerance(double newCostRelativeTolerance) { 192 return new LevenbergMarquardtOptimizer( 193 initialStepBoundFactor, 194 newCostRelativeTolerance, 195 parRelativeTolerance, 196 orthoTolerance, 197 qrRankingThreshold); 198 } 199 200 /** 201 * @param newParRelativeTolerance Desired relative error in the approximate solution 202 * parameters. 203 * @return a new instance. 204 */ 205 public LevenbergMarquardtOptimizer withParameterRelativeTolerance(double newParRelativeTolerance) { 206 return new LevenbergMarquardtOptimizer( 207 initialStepBoundFactor, 208 costRelativeTolerance, 209 newParRelativeTolerance, 210 orthoTolerance, 211 qrRankingThreshold); 212 } 213 214 /** 215 * Modifies the given parameter. 216 * 217 * @param newOrthoTolerance Desired max cosine on the orthogonality between 218 * the function vector and the columns of the Jacobian. 219 * @return a new instance. 220 */ 221 public LevenbergMarquardtOptimizer withOrthoTolerance(double newOrthoTolerance) { 222 return new LevenbergMarquardtOptimizer( 223 initialStepBoundFactor, 224 costRelativeTolerance, 225 parRelativeTolerance, 226 newOrthoTolerance, 227 qrRankingThreshold); 228 } 229 230 /** 231 * @param newQRRankingThreshold Desired threshold for QR ranking. 232 * If the squared norm of a column vector is smaller or equal to this 233 * threshold during QR decomposition, it is considered to be a zero vector 234 * and hence the rank of the matrix is reduced. 235 * @return a new instance. 236 */ 237 public LevenbergMarquardtOptimizer withRankingThreshold(double newQRRankingThreshold) { 238 return new LevenbergMarquardtOptimizer( 239 initialStepBoundFactor, 240 costRelativeTolerance, 241 parRelativeTolerance, 242 orthoTolerance, 243 newQRRankingThreshold); 244 } 245 246 /** 247 * Gets the value of a tuning parameter. 248 * @see #withInitialStepBoundFactor(double) 249 * 250 * @return the parameter's value. 251 */ 252 public double getInitialStepBoundFactor() { 253 return initialStepBoundFactor; 254 } 255 256 /** 257 * Gets the value of a tuning parameter. 258 * @see #withCostRelativeTolerance(double) 259 * 260 * @return the parameter's value. 261 */ 262 public double getCostRelativeTolerance() { 263 return costRelativeTolerance; 264 } 265 266 /** 267 * Gets the value of a tuning parameter. 268 * @see #withParameterRelativeTolerance(double) 269 * 270 * @return the parameter's value. 271 */ 272 public double getParameterRelativeTolerance() { 273 return parRelativeTolerance; 274 } 275 276 /** 277 * Gets the value of a tuning parameter. 278 * @see #withOrthoTolerance(double) 279 * 280 * @return the parameter's value. 281 */ 282 public double getOrthoTolerance() { 283 return orthoTolerance; 284 } 285 286 /** 287 * Gets the value of a tuning parameter. 288 * @see #withRankingThreshold(double) 289 * 290 * @return the parameter's value. 291 */ 292 public double getRankingThreshold() { 293 return qrRankingThreshold; 294 } 295 296 /** {@inheritDoc} */ 297 @Override 298 public Optimum optimize(final LeastSquaresProblem problem) { 299 // Pull in relevant data from the problem as locals. 300 final int nR = problem.getObservationSize(); // Number of observed data. 301 final int nC = problem.getParameterSize(); // Number of parameters. 302 // Counters. 303 final IntegerSequence.Incrementor iterationCounter = problem.getIterationCounter(); 304 final IntegerSequence.Incrementor evaluationCounter = problem.getEvaluationCounter(); 305 // Convergence criterion. 306 final ConvergenceChecker<Evaluation> checker = problem.getConvergenceChecker(); 307 308 // arrays shared with the other private methods 309 final int solvedCols = JdkMath.min(nR, nC); 310 /* Parameters evolution direction associated with lmPar. */ 311 double[] lmDir = new double[nC]; 312 /* Levenberg-Marquardt parameter. */ 313 double lmPar = 0; 314 315 // local point 316 double delta = 0; 317 double xNorm = 0; 318 double[] diag = new double[nC]; 319 double[] oldX = new double[nC]; 320 double[] oldRes = new double[nR]; 321 double[] qtf = new double[nR]; 322 double[] work1 = new double[nC]; 323 double[] work2 = new double[nC]; 324 double[] work3 = new double[nC]; 325 326 327 // Evaluate the function at the starting point and calculate its norm. 328 evaluationCounter.increment(); 329 //value will be reassigned in the loop 330 Evaluation current = problem.evaluate(problem.getStart()); 331 double[] currentResiduals = current.getResiduals().toArray(); 332 double currentCost = current.getCost(); 333 double[] currentPoint = current.getPoint().toArray(); 334 335 // Outer loop. 336 boolean firstIteration = true; 337 while (true) { 338 iterationCounter.increment(); 339 340 final Evaluation previous = current; 341 342 // QR decomposition of the jacobian matrix 343 final InternalData internalData 344 = qrDecomposition(current.getJacobian(), solvedCols); 345 final double[][] weightedJacobian = internalData.weightedJacobian; 346 final int[] permutation = internalData.permutation; 347 final double[] diagR = internalData.diagR; 348 final double[] jacNorm = internalData.jacNorm; 349 350 //residuals already have weights applied 351 double[] weightedResidual = currentResiduals; 352 System.arraycopy(weightedResidual, 0, qtf, 0, nR); 353 354 // compute Qt.res 355 qTy(qtf, internalData); 356 357 // now we don't need Q anymore, 358 // so let jacobian contain the R matrix with its diagonal elements 359 for (int k = 0; k < solvedCols; ++k) { 360 int pk = permutation[k]; 361 weightedJacobian[k][pk] = diagR[pk]; 362 } 363 364 if (firstIteration) { 365 // scale the point according to the norms of the columns 366 // of the initial jacobian 367 xNorm = 0; 368 for (int k = 0; k < nC; ++k) { 369 double dk = jacNorm[k]; 370 if (dk == 0) { 371 dk = 1.0; 372 } 373 double xk = dk * currentPoint[k]; 374 xNorm += xk * xk; 375 diag[k] = dk; 376 } 377 xNorm = JdkMath.sqrt(xNorm); 378 379 // initialize the step bound delta 380 delta = (xNorm == 0) ? initialStepBoundFactor : (initialStepBoundFactor * xNorm); 381 } 382 383 // check orthogonality between function vector and jacobian columns 384 double maxCosine = 0; 385 if (currentCost != 0) { 386 for (int j = 0; j < solvedCols; ++j) { 387 int pj = permutation[j]; 388 double s = jacNorm[pj]; 389 if (s != 0) { 390 double sum = 0; 391 for (int i = 0; i <= j; ++i) { 392 sum += weightedJacobian[i][pj] * qtf[i]; 393 } 394 maxCosine = JdkMath.max(maxCosine, JdkMath.abs(sum) / (s * currentCost)); 395 } 396 } 397 } 398 if (maxCosine <= orthoTolerance) { 399 // Convergence has been reached. 400 return new OptimumImpl( 401 current, 402 evaluationCounter.getCount(), 403 iterationCounter.getCount()); 404 } 405 406 // rescale if necessary 407 for (int j = 0; j < nC; ++j) { 408 diag[j] = JdkMath.max(diag[j], jacNorm[j]); 409 } 410 411 // Inner loop. 412 for (double ratio = 0; ratio < 1.0e-4;) { 413 414 // save the state 415 for (int j = 0; j < solvedCols; ++j) { 416 int pj = permutation[j]; 417 oldX[pj] = currentPoint[pj]; 418 } 419 final double previousCost = currentCost; 420 double[] tmpVec = weightedResidual; 421 weightedResidual = oldRes; 422 oldRes = tmpVec; 423 424 // determine the Levenberg-Marquardt parameter 425 lmPar = determineLMParameter(qtf, delta, diag, 426 internalData, solvedCols, 427 work1, work2, work3, lmDir, lmPar); 428 429 // compute the new point and the norm of the evolution direction 430 double lmNorm = 0; 431 for (int j = 0; j < solvedCols; ++j) { 432 int pj = permutation[j]; 433 lmDir[pj] = -lmDir[pj]; 434 currentPoint[pj] = oldX[pj] + lmDir[pj]; 435 double s = diag[pj] * lmDir[pj]; 436 lmNorm += s * s; 437 } 438 lmNorm = JdkMath.sqrt(lmNorm); 439 // on the first iteration, adjust the initial step bound. 440 if (firstIteration) { 441 delta = JdkMath.min(delta, lmNorm); 442 } 443 444 // Evaluate the function at x + p and calculate its norm. 445 evaluationCounter.increment(); 446 current = problem.evaluate(new ArrayRealVector(currentPoint)); 447 currentResiduals = current.getResiduals().toArray(); 448 currentCost = current.getCost(); 449 currentPoint = current.getPoint().toArray(); 450 451 // compute the scaled actual reduction 452 double actRed = -1.0; 453 if (0.1 * currentCost < previousCost) { 454 double r = currentCost / previousCost; 455 actRed = 1.0 - r * r; 456 } 457 458 // compute the scaled predicted reduction 459 // and the scaled directional derivative 460 for (int j = 0; j < solvedCols; ++j) { 461 int pj = permutation[j]; 462 double dirJ = lmDir[pj]; 463 work1[j] = 0; 464 for (int i = 0; i <= j; ++i) { 465 work1[i] += weightedJacobian[i][pj] * dirJ; 466 } 467 } 468 double coeff1 = 0; 469 for (int j = 0; j < solvedCols; ++j) { 470 coeff1 += work1[j] * work1[j]; 471 } 472 double pc2 = previousCost * previousCost; 473 coeff1 /= pc2; 474 double coeff2 = lmPar * lmNorm * lmNorm / pc2; 475 double preRed = coeff1 + 2 * coeff2; 476 double dirDer = -(coeff1 + coeff2); 477 478 // ratio of the actual to the predicted reduction 479 ratio = (preRed == 0) ? 0 : (actRed / preRed); 480 481 // update the step bound 482 if (ratio <= 0.25) { 483 double tmp = 484 (actRed < 0) ? (0.5 * dirDer / (dirDer + 0.5 * actRed)) : 0.5; 485 if (0.1 * currentCost >= previousCost || tmp < 0.1) { 486 tmp = 0.1; 487 } 488 delta = tmp * JdkMath.min(delta, 10.0 * lmNorm); 489 lmPar /= tmp; 490 } else if (lmPar == 0 || ratio >= 0.75) { 491 delta = 2 * lmNorm; 492 lmPar *= 0.5; 493 } 494 495 // test for successful iteration. 496 if (ratio >= 1.0e-4) { 497 // successful iteration, update the norm 498 firstIteration = false; 499 xNorm = 0; 500 for (int k = 0; k < nC; ++k) { 501 double xK = diag[k] * currentPoint[k]; 502 xNorm += xK * xK; 503 } 504 xNorm = JdkMath.sqrt(xNorm); 505 506 // tests for convergence. 507 if (checker != null && checker.converged(iterationCounter.getCount(), previous, current)) { 508 return new OptimumImpl(current, evaluationCounter.getCount(), iterationCounter.getCount()); 509 } 510 } else { 511 // failed iteration, reset the previous values 512 currentCost = previousCost; 513 for (int j = 0; j < solvedCols; ++j) { 514 int pj = permutation[j]; 515 currentPoint[pj] = oldX[pj]; 516 } 517 tmpVec = weightedResidual; 518 weightedResidual = oldRes; 519 oldRes = tmpVec; 520 // Reset "current" to previous values. 521 current = previous; 522 } 523 524 // Default convergence criteria. 525 if ((JdkMath.abs(actRed) <= costRelativeTolerance && 526 preRed <= costRelativeTolerance && 527 ratio <= 2.0) || 528 delta <= parRelativeTolerance * xNorm) { 529 return new OptimumImpl(current, evaluationCounter.getCount(), iterationCounter.getCount()); 530 } 531 532 // tests for termination and stringent tolerances 533 if (JdkMath.abs(actRed) <= TWO_EPS && 534 preRed <= TWO_EPS && 535 ratio <= 2.0) { 536 throw new ConvergenceException(LocalizedFormats.TOO_SMALL_COST_RELATIVE_TOLERANCE, 537 costRelativeTolerance); 538 } else if (delta <= TWO_EPS * xNorm) { 539 throw new ConvergenceException(LocalizedFormats.TOO_SMALL_PARAMETERS_RELATIVE_TOLERANCE, 540 parRelativeTolerance); 541 } else if (maxCosine <= TWO_EPS) { 542 throw new ConvergenceException(LocalizedFormats.TOO_SMALL_ORTHOGONALITY_TOLERANCE, 543 orthoTolerance); 544 } 545 } 546 } 547 } 548 549 /** 550 * Holds internal data. 551 * This structure was created so that all optimizer fields can be "final". 552 * Code should be further refactored in order to not pass around arguments 553 * that will modified in-place (cf. "work" arrays). 554 */ 555 private static final class InternalData { 556 /** Weighted Jacobian. */ 557 private final double[][] weightedJacobian; 558 /** Columns permutation array. */ 559 private final int[] permutation; 560 /** Rank of the Jacobian matrix. */ 561 private final int rank; 562 /** Diagonal elements of the R matrix in the QR decomposition. */ 563 private final double[] diagR; 564 /** Norms of the columns of the jacobian matrix. */ 565 private final double[] jacNorm; 566 /** Coefficients of the Householder transforms vectors. */ 567 private final double[] beta; 568 569 /** 570 * @param weightedJacobian Weighted Jacobian. 571 * @param permutation Columns permutation array. 572 * @param rank Rank of the Jacobian matrix. 573 * @param diagR Diagonal elements of the R matrix in the QR decomposition. 574 * @param jacNorm Norms of the columns of the jacobian matrix. 575 * @param beta Coefficients of the Householder transforms vectors. 576 */ 577 InternalData(double[][] weightedJacobian, 578 int[] permutation, 579 int rank, 580 double[] diagR, 581 double[] jacNorm, 582 double[] beta) { 583 this.weightedJacobian = weightedJacobian; 584 this.permutation = permutation; 585 this.rank = rank; 586 this.diagR = diagR; 587 this.jacNorm = jacNorm; 588 this.beta = beta; 589 } 590 } 591 592 /** 593 * Determines the Levenberg-Marquardt parameter. 594 * 595 * <p>This implementation is a translation in Java of the MINPACK 596 * <a href="http://www.netlib.org/minpack/lmpar.f">lmpar</a> 597 * routine.</p> 598 * <p>This method sets the lmPar and lmDir attributes.</p> 599 * <p>The authors of the original fortran function are:</p> 600 * <ul> 601 * <li>Argonne National Laboratory. MINPACK project. March 1980</li> 602 * <li>Burton S. Garbow</li> 603 * <li>Kenneth E. Hillstrom</li> 604 * <li>Jorge J. More</li> 605 * </ul> 606 * <p>Luc Maisonobe did the Java translation.</p> 607 * 608 * @param qy Array containing qTy. 609 * @param delta Upper bound on the euclidean norm of diagR * lmDir. 610 * @param diag Diagonal matrix. 611 * @param internalData Data (modified in-place in this method). 612 * @param solvedCols Number of solved point. 613 * @param work1 work array 614 * @param work2 work array 615 * @param work3 work array 616 * @param lmDir the "returned" LM direction will be stored in this array. 617 * @param lmPar the value of the LM parameter from the previous iteration. 618 * @return the new LM parameter 619 */ 620 private double determineLMParameter(double[] qy, double delta, double[] diag, 621 InternalData internalData, int solvedCols, 622 double[] work1, double[] work2, double[] work3, 623 double[] lmDir, double lmPar) { 624 final double[][] weightedJacobian = internalData.weightedJacobian; 625 final int[] permutation = internalData.permutation; 626 final int rank = internalData.rank; 627 final double[] diagR = internalData.diagR; 628 629 final int nC = weightedJacobian[0].length; 630 631 // compute and store in x the gauss-newton direction, if the 632 // jacobian is rank-deficient, obtain a least squares solution 633 for (int j = 0; j < rank; ++j) { 634 lmDir[permutation[j]] = qy[j]; 635 } 636 for (int j = rank; j < nC; ++j) { 637 lmDir[permutation[j]] = 0; 638 } 639 for (int k = rank - 1; k >= 0; --k) { 640 int pk = permutation[k]; 641 double ypk = lmDir[pk] / diagR[pk]; 642 for (int i = 0; i < k; ++i) { 643 lmDir[permutation[i]] -= ypk * weightedJacobian[i][pk]; 644 } 645 lmDir[pk] = ypk; 646 } 647 648 // evaluate the function at the origin, and test 649 // for acceptance of the Gauss-Newton direction 650 double dxNorm = 0; 651 for (int j = 0; j < solvedCols; ++j) { 652 int pj = permutation[j]; 653 double s = diag[pj] * lmDir[pj]; 654 work1[pj] = s; 655 dxNorm += s * s; 656 } 657 dxNorm = JdkMath.sqrt(dxNorm); 658 double fp = dxNorm - delta; 659 if (fp <= 0.1 * delta) { 660 lmPar = 0; 661 return lmPar; 662 } 663 664 // if the jacobian is not rank deficient, the Newton step provides 665 // a lower bound, parl, for the zero of the function, 666 // otherwise set this bound to zero 667 double sum2; 668 double parl = 0; 669 if (rank == solvedCols) { 670 for (int j = 0; j < solvedCols; ++j) { 671 int pj = permutation[j]; 672 work1[pj] *= diag[pj] / dxNorm; 673 } 674 sum2 = 0; 675 for (int j = 0; j < solvedCols; ++j) { 676 int pj = permutation[j]; 677 double sum = 0; 678 for (int i = 0; i < j; ++i) { 679 sum += weightedJacobian[i][pj] * work1[permutation[i]]; 680 } 681 double s = (work1[pj] - sum) / diagR[pj]; 682 work1[pj] = s; 683 sum2 += s * s; 684 } 685 parl = fp / (delta * sum2); 686 } 687 688 // calculate an upper bound, paru, for the zero of the function 689 sum2 = 0; 690 for (int j = 0; j < solvedCols; ++j) { 691 int pj = permutation[j]; 692 double sum = 0; 693 for (int i = 0; i <= j; ++i) { 694 sum += weightedJacobian[i][pj] * qy[i]; 695 } 696 sum /= diag[pj]; 697 sum2 += sum * sum; 698 } 699 double gNorm = JdkMath.sqrt(sum2); 700 double paru = gNorm / delta; 701 if (paru == 0) { 702 paru = Precision.SAFE_MIN / JdkMath.min(delta, 0.1); 703 } 704 705 // if the input par lies outside of the interval (parl,paru), 706 // set par to the closer endpoint 707 lmPar = JdkMath.min(paru, JdkMath.max(lmPar, parl)); 708 if (lmPar == 0) { 709 lmPar = gNorm / dxNorm; 710 } 711 712 for (int countdown = 10; countdown >= 0; --countdown) { 713 714 // evaluate the function at the current value of lmPar 715 if (lmPar == 0) { 716 lmPar = JdkMath.max(Precision.SAFE_MIN, 0.001 * paru); 717 } 718 double sPar = JdkMath.sqrt(lmPar); 719 for (int j = 0; j < solvedCols; ++j) { 720 int pj = permutation[j]; 721 work1[pj] = sPar * diag[pj]; 722 } 723 determineLMDirection(qy, work1, work2, internalData, solvedCols, work3, lmDir); 724 725 dxNorm = 0; 726 for (int j = 0; j < solvedCols; ++j) { 727 int pj = permutation[j]; 728 double s = diag[pj] * lmDir[pj]; 729 work3[pj] = s; 730 dxNorm += s * s; 731 } 732 dxNorm = JdkMath.sqrt(dxNorm); 733 double previousFP = fp; 734 fp = dxNorm - delta; 735 736 // if the function is small enough, accept the current value 737 // of lmPar, also test for the exceptional cases where parl is zero 738 if (JdkMath.abs(fp) <= 0.1 * delta || 739 (parl == 0 && 740 fp <= previousFP && 741 previousFP < 0)) { 742 return lmPar; 743 } 744 745 // compute the Newton correction 746 for (int j = 0; j < solvedCols; ++j) { 747 int pj = permutation[j]; 748 work1[pj] = work3[pj] * diag[pj] / dxNorm; 749 } 750 for (int j = 0; j < solvedCols; ++j) { 751 int pj = permutation[j]; 752 work1[pj] /= work2[j]; 753 double tmp = work1[pj]; 754 for (int i = j + 1; i < solvedCols; ++i) { 755 work1[permutation[i]] -= weightedJacobian[i][pj] * tmp; 756 } 757 } 758 sum2 = 0; 759 for (int j = 0; j < solvedCols; ++j) { 760 double s = work1[permutation[j]]; 761 sum2 += s * s; 762 } 763 double correction = fp / (delta * sum2); 764 765 // depending on the sign of the function, update parl or paru. 766 if (fp > 0) { 767 parl = JdkMath.max(parl, lmPar); 768 } else if (fp < 0) { 769 paru = JdkMath.min(paru, lmPar); 770 } 771 772 // compute an improved estimate for lmPar 773 lmPar = JdkMath.max(parl, lmPar + correction); 774 } 775 776 return lmPar; 777 } 778 779 /** 780 * Solve a*x = b and d*x = 0 in the least squares sense. 781 * <p>This implementation is a translation in Java of the MINPACK 782 * <a href="http://www.netlib.org/minpack/qrsolv.f">qrsolv</a> 783 * routine.</p> 784 * <p>This method sets the lmDir and lmDiag attributes.</p> 785 * <p>The authors of the original fortran function are:</p> 786 * <ul> 787 * <li>Argonne National Laboratory. MINPACK project. March 1980</li> 788 * <li>Burton S. Garbow</li> 789 * <li>Kenneth E. Hillstrom</li> 790 * <li>Jorge J. More</li> 791 * </ul> 792 * <p>Luc Maisonobe did the Java translation.</p> 793 * 794 * @param qy array containing qTy 795 * @param diag diagonal matrix 796 * @param lmDiag diagonal elements associated with lmDir 797 * @param internalData Data (modified in-place in this method). 798 * @param solvedCols Number of sloved point. 799 * @param work work array 800 * @param lmDir the "returned" LM direction is stored in this array 801 */ 802 private void determineLMDirection(double[] qy, double[] diag, 803 double[] lmDiag, 804 InternalData internalData, 805 int solvedCols, 806 double[] work, 807 double[] lmDir) { 808 final int[] permutation = internalData.permutation; 809 final double[][] weightedJacobian = internalData.weightedJacobian; 810 final double[] diagR = internalData.diagR; 811 812 // copy R and Qty to preserve input and initialize s 813 // in particular, save the diagonal elements of R in lmDir 814 for (int j = 0; j < solvedCols; ++j) { 815 int pj = permutation[j]; 816 for (int i = j + 1; i < solvedCols; ++i) { 817 weightedJacobian[i][pj] = weightedJacobian[j][permutation[i]]; 818 } 819 lmDir[j] = diagR[pj]; 820 work[j] = qy[j]; 821 } 822 823 // eliminate the diagonal matrix d using a Givens rotation 824 for (int j = 0; j < solvedCols; ++j) { 825 826 // prepare the row of d to be eliminated, locating the 827 // diagonal element using p from the Q.R. factorization 828 int pj = permutation[j]; 829 double dpj = diag[pj]; 830 if (dpj != 0) { 831 Arrays.fill(lmDiag, j + 1, lmDiag.length, 0); 832 } 833 lmDiag[j] = dpj; 834 835 // the transformations to eliminate the row of d 836 // modify only a single element of Qty 837 // beyond the first n, which is initially zero. 838 double qtbpj = 0; 839 for (int k = j; k < solvedCols; ++k) { 840 int pk = permutation[k]; 841 842 // determine a Givens rotation which eliminates the 843 // appropriate element in the current row of d 844 if (lmDiag[k] != 0) { 845 846 final double sin; 847 final double cos; 848 double rkk = weightedJacobian[k][pk]; 849 if (JdkMath.abs(rkk) < JdkMath.abs(lmDiag[k])) { 850 final double cotan = rkk / lmDiag[k]; 851 sin = 1.0 / JdkMath.sqrt(1.0 + cotan * cotan); 852 cos = sin * cotan; 853 } else { 854 final double tan = lmDiag[k] / rkk; 855 cos = 1.0 / JdkMath.sqrt(1.0 + tan * tan); 856 sin = cos * tan; 857 } 858 859 // compute the modified diagonal element of R and 860 // the modified element of (Qty,0) 861 weightedJacobian[k][pk] = cos * rkk + sin * lmDiag[k]; 862 final double temp = cos * work[k] + sin * qtbpj; 863 qtbpj = -sin * work[k] + cos * qtbpj; 864 work[k] = temp; 865 866 // accumulate the tranformation in the row of s 867 for (int i = k + 1; i < solvedCols; ++i) { 868 double rik = weightedJacobian[i][pk]; 869 final double temp2 = cos * rik + sin * lmDiag[i]; 870 lmDiag[i] = -sin * rik + cos * lmDiag[i]; 871 weightedJacobian[i][pk] = temp2; 872 } 873 } 874 } 875 876 // store the diagonal element of s and restore 877 // the corresponding diagonal element of R 878 lmDiag[j] = weightedJacobian[j][permutation[j]]; 879 weightedJacobian[j][permutation[j]] = lmDir[j]; 880 } 881 882 // solve the triangular system for z, if the system is 883 // singular, then obtain a least squares solution 884 int nSing = solvedCols; 885 for (int j = 0; j < solvedCols; ++j) { 886 if (lmDiag[j] == 0 && nSing == solvedCols) { 887 nSing = j; 888 } 889 if (nSing < solvedCols) { 890 work[j] = 0; 891 } 892 } 893 if (nSing > 0) { 894 for (int j = nSing - 1; j >= 0; --j) { 895 int pj = permutation[j]; 896 double sum = 0; 897 for (int i = j + 1; i < nSing; ++i) { 898 sum += weightedJacobian[i][pj] * work[i]; 899 } 900 work[j] = (work[j] - sum) / lmDiag[j]; 901 } 902 } 903 904 // permute the components of z back to components of lmDir 905 for (int j = 0; j < lmDir.length; ++j) { 906 lmDir[permutation[j]] = work[j]; 907 } 908 } 909 910 /** 911 * Decompose a matrix A as A.P = Q.R using Householder transforms. 912 * <p>As suggested in the P. Lascaux and R. Theodor book 913 * <i>Analyse numérique matricielle appliquée à 914 * l'art de l'ingénieur</i> (Masson, 1986), instead of representing 915 * the Householder transforms with u<sub>k</sub> unit vectors such that: 916 * <pre> 917 * H<sub>k</sub> = I - 2u<sub>k</sub>.u<sub>k</sub><sup>t</sup> 918 * </pre> 919 * we use <sub>k</sub> non-unit vectors such that: 920 * <pre> 921 * H<sub>k</sub> = I - beta<sub>k</sub>v<sub>k</sub>.v<sub>k</sub><sup>t</sup> 922 * </pre> 923 * where v<sub>k</sub> = a<sub>k</sub> - alpha<sub>k</sub> e<sub>k</sub>. 924 * The beta<sub>k</sub> coefficients are provided upon exit as recomputing 925 * them from the v<sub>k</sub> vectors would be costly.</p> 926 * <p>This decomposition handles rank deficient cases since the tranformations 927 * are performed in non-increasing columns norms order thanks to columns 928 * pivoting. The diagonal elements of the R matrix are therefore also in 929 * non-increasing absolute values order.</p> 930 * 931 * @param jacobian Weighted Jacobian matrix at the current point. 932 * @param solvedCols Number of solved point. 933 * @return data used in other methods of this class. 934 * @throws ConvergenceException if the decomposition cannot be performed. 935 */ 936 private InternalData qrDecomposition(RealMatrix jacobian, 937 int solvedCols) throws ConvergenceException { 938 // Code in this class assumes that the weighted Jacobian is -(W^(1/2) J), 939 // hence the multiplication by -1. 940 final double[][] weightedJacobian = jacobian.scalarMultiply(-1).getData(); 941 942 final int nR = weightedJacobian.length; 943 final int nC = weightedJacobian[0].length; 944 945 final int[] permutation = new int[nC]; 946 final double[] diagR = new double[nC]; 947 final double[] jacNorm = new double[nC]; 948 final double[] beta = new double[nC]; 949 950 // initializations 951 for (int k = 0; k < nC; ++k) { 952 permutation[k] = k; 953 double norm2 = 0; 954 for (int i = 0; i < nR; ++i) { 955 double akk = weightedJacobian[i][k]; 956 norm2 += akk * akk; 957 } 958 jacNorm[k] = JdkMath.sqrt(norm2); 959 } 960 961 // transform the matrix column after column 962 for (int k = 0; k < nC; ++k) { 963 964 // select the column with the greatest norm on active components 965 int nextColumn = -1; 966 double ak2 = Double.NEGATIVE_INFINITY; 967 for (int i = k; i < nC; ++i) { 968 double norm2 = 0; 969 for (int j = k; j < nR; ++j) { 970 double aki = weightedJacobian[j][permutation[i]]; 971 norm2 += aki * aki; 972 } 973 if (Double.isInfinite(norm2) || Double.isNaN(norm2)) { 974 throw new ConvergenceException(LocalizedFormats.UNABLE_TO_PERFORM_QR_DECOMPOSITION_ON_JACOBIAN, 975 nR, nC); 976 } 977 if (norm2 > ak2) { 978 nextColumn = i; 979 ak2 = norm2; 980 } 981 } 982 if (ak2 <= qrRankingThreshold) { 983 return new InternalData(weightedJacobian, permutation, k, diagR, jacNorm, beta); 984 } 985 int pk = permutation[nextColumn]; 986 permutation[nextColumn] = permutation[k]; 987 permutation[k] = pk; 988 989 // choose alpha such that Hk.u = alpha ek 990 double akk = weightedJacobian[k][pk]; 991 double alpha = (akk > 0) ? -JdkMath.sqrt(ak2) : JdkMath.sqrt(ak2); 992 double betak = 1.0 / (ak2 - akk * alpha); 993 beta[pk] = betak; 994 995 // transform the current column 996 diagR[pk] = alpha; 997 weightedJacobian[k][pk] -= alpha; 998 999 // transform the remaining columns 1000 for (int dk = nC - 1 - k; dk > 0; --dk) { 1001 double gamma = 0; 1002 for (int j = k; j < nR; ++j) { 1003 gamma += weightedJacobian[j][pk] * weightedJacobian[j][permutation[k + dk]]; 1004 } 1005 gamma *= betak; 1006 for (int j = k; j < nR; ++j) { 1007 weightedJacobian[j][permutation[k + dk]] -= gamma * weightedJacobian[j][pk]; 1008 } 1009 } 1010 } 1011 1012 return new InternalData(weightedJacobian, permutation, solvedCols, diagR, jacNorm, beta); 1013 } 1014 1015 /** 1016 * Compute the product Qt.y for some Q.R. decomposition. 1017 * 1018 * @param y vector to multiply (will be overwritten with the result) 1019 * @param internalData Data. 1020 */ 1021 private void qTy(double[] y, 1022 InternalData internalData) { 1023 final double[][] weightedJacobian = internalData.weightedJacobian; 1024 final int[] permutation = internalData.permutation; 1025 final double[] beta = internalData.beta; 1026 1027 final int nR = weightedJacobian.length; 1028 final int nC = weightedJacobian[0].length; 1029 1030 for (int k = 0; k < nC; ++k) { 1031 int pk = permutation[k]; 1032 double gamma = 0; 1033 for (int i = k; i < nR; ++i) { 1034 gamma += weightedJacobian[i][pk] * y[i]; 1035 } 1036 gamma *= beta[pk]; 1037 for (int i = k; i < nR; ++i) { 1038 y[i] -= gamma * weightedJacobian[i][pk]; 1039 } 1040 } 1041 } 1042}