001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.math3.optimization.general; 019 020import org.apache.commons.math3.analysis.DifferentiableMultivariateVectorFunction; 021import org.apache.commons.math3.analysis.FunctionUtils; 022import org.apache.commons.math3.analysis.differentiation.DerivativeStructure; 023import org.apache.commons.math3.analysis.differentiation.MultivariateDifferentiableVectorFunction; 024import org.apache.commons.math3.exception.DimensionMismatchException; 025import org.apache.commons.math3.exception.NumberIsTooSmallException; 026import org.apache.commons.math3.exception.util.LocalizedFormats; 027import org.apache.commons.math3.linear.ArrayRealVector; 028import org.apache.commons.math3.linear.RealMatrix; 029import org.apache.commons.math3.linear.DiagonalMatrix; 030import org.apache.commons.math3.linear.DecompositionSolver; 031import org.apache.commons.math3.linear.MatrixUtils; 032import org.apache.commons.math3.linear.QRDecomposition; 033import org.apache.commons.math3.linear.EigenDecomposition; 034import org.apache.commons.math3.optimization.OptimizationData; 035import org.apache.commons.math3.optimization.InitialGuess; 036import org.apache.commons.math3.optimization.Target; 037import org.apache.commons.math3.optimization.Weight; 038import org.apache.commons.math3.optimization.ConvergenceChecker; 039import org.apache.commons.math3.optimization.DifferentiableMultivariateVectorOptimizer; 040import org.apache.commons.math3.optimization.PointVectorValuePair; 041import org.apache.commons.math3.optimization.direct.BaseAbstractMultivariateVectorOptimizer; 042import org.apache.commons.math3.util.FastMath; 043 044/** 045 * Base class for implementing least squares optimizers. 046 * It handles the boilerplate methods associated to thresholds settings, 047 * Jacobian and error estimation. 048 * <br/> 049 * This class constructs the Jacobian matrix of the function argument in method 050 * {@link BaseAbstractMultivariateVectorOptimizer#optimize(int, 051 * org.apache.commons.math3.analysis.MultivariateVectorFunction,OptimizationData[]) 052 * optimize} and assumes that the rows of that matrix iterate on the model 053 * functions while the columns iterate on the parameters; thus, the numbers 054 * of rows is equal to the dimension of the 055 * {@link org.apache.commons.math3.optimization.Target Target} while 056 * the number of columns is equal to the dimension of the 057 * {@link org.apache.commons.math3.optimization.InitialGuess InitialGuess}. 058 * 059 * @deprecated As of 3.1 (to be removed in 4.0). 060 * @since 1.2 061 */ 062@Deprecated 063public abstract class AbstractLeastSquaresOptimizer 064 extends BaseAbstractMultivariateVectorOptimizer<DifferentiableMultivariateVectorFunction> 065 implements DifferentiableMultivariateVectorOptimizer { 066 /** 067 * Singularity threshold (cf. {@link #getCovariances(double)}). 068 * @deprecated As of 3.1. 069 */ 070 @Deprecated 071 private static final double DEFAULT_SINGULARITY_THRESHOLD = 1e-14; 072 /** 073 * Jacobian matrix of the weighted residuals. 074 * This matrix is in canonical form just after the calls to 075 * {@link #updateJacobian()}, but may be modified by the solver 076 * in the derived class (the {@link LevenbergMarquardtOptimizer 077 * Levenberg-Marquardt optimizer} does this). 078 * @deprecated As of 3.1. To be removed in 4.0. Please use 079 * {@link #computeWeightedJacobian(double[])} instead. 080 */ 081 @Deprecated 082 protected double[][] weightedResidualJacobian; 083 /** Number of columns of the jacobian matrix. 084 * @deprecated As of 3.1. 085 */ 086 @Deprecated 087 protected int cols; 088 /** Number of rows of the jacobian matrix. 089 * @deprecated As of 3.1. 090 */ 091 @Deprecated 092 protected int rows; 093 /** Current point. 094 * @deprecated As of 3.1. 095 */ 096 @Deprecated 097 protected double[] point; 098 /** Current objective function value. 099 * @deprecated As of 3.1. 100 */ 101 @Deprecated 102 protected double[] objective; 103 /** Weighted residuals 104 * @deprecated As of 3.1. 105 */ 106 @Deprecated 107 protected double[] weightedResiduals; 108 /** Cost value (square root of the sum of the residuals). 109 * @deprecated As of 3.1. Field to become "private" in 4.0. 110 * Please use {@link #setCost(double)}. 111 */ 112 @Deprecated 113 protected double cost; 114 /** Objective function derivatives. */ 115 private MultivariateDifferentiableVectorFunction jF; 116 /** Number of evaluations of the Jacobian. */ 117 private int jacobianEvaluations; 118 /** Square-root of the weight matrix. */ 119 private RealMatrix weightMatrixSqrt; 120 121 /** 122 * Simple constructor with default settings. 123 * The convergence check is set to a {@link 124 * org.apache.commons.math3.optimization.SimpleVectorValueChecker}. 125 * @deprecated See {@link org.apache.commons.math3.optimization.SimpleValueChecker#SimpleValueChecker()} 126 */ 127 @Deprecated 128 protected AbstractLeastSquaresOptimizer() {} 129 130 /** 131 * @param checker Convergence checker. 132 */ 133 protected AbstractLeastSquaresOptimizer(ConvergenceChecker<PointVectorValuePair> checker) { 134 super(checker); 135 } 136 137 /** 138 * @return the number of evaluations of the Jacobian function. 139 */ 140 public int getJacobianEvaluations() { 141 return jacobianEvaluations; 142 } 143 144 /** 145 * Update the jacobian matrix. 146 * 147 * @throws DimensionMismatchException if the Jacobian dimension does not 148 * match problem dimension. 149 * @deprecated As of 3.1. Please use {@link #computeWeightedJacobian(double[])} 150 * instead. 151 */ 152 @Deprecated 153 protected void updateJacobian() { 154 final RealMatrix weightedJacobian = computeWeightedJacobian(point); 155 weightedResidualJacobian = weightedJacobian.scalarMultiply(-1).getData(); 156 } 157 158 /** 159 * Computes the Jacobian matrix. 160 * 161 * @param params Model parameters at which to compute the Jacobian. 162 * @return the weighted Jacobian: W<sup>1/2</sup> J. 163 * @throws DimensionMismatchException if the Jacobian dimension does not 164 * match problem dimension. 165 * @since 3.1 166 */ 167 protected RealMatrix computeWeightedJacobian(double[] params) { 168 ++jacobianEvaluations; 169 170 final DerivativeStructure[] dsPoint = new DerivativeStructure[params.length]; 171 final int nC = params.length; 172 for (int i = 0; i < nC; ++i) { 173 dsPoint[i] = new DerivativeStructure(nC, 1, i, params[i]); 174 } 175 final DerivativeStructure[] dsValue = jF.value(dsPoint); 176 final int nR = getTarget().length; 177 if (dsValue.length != nR) { 178 throw new DimensionMismatchException(dsValue.length, nR); 179 } 180 final double[][] jacobianData = new double[nR][nC]; 181 for (int i = 0; i < nR; ++i) { 182 int[] orders = new int[nC]; 183 for (int j = 0; j < nC; ++j) { 184 orders[j] = 1; 185 jacobianData[i][j] = dsValue[i].getPartialDerivative(orders); 186 orders[j] = 0; 187 } 188 } 189 190 return weightMatrixSqrt.multiply(MatrixUtils.createRealMatrix(jacobianData)); 191 } 192 193 /** 194 * Update the residuals array and cost function value. 195 * @throws DimensionMismatchException if the dimension does not match the 196 * problem dimension. 197 * @throws org.apache.commons.math3.exception.TooManyEvaluationsException 198 * if the maximal number of evaluations is exceeded. 199 * @deprecated As of 3.1. Please use {@link #computeResiduals(double[])}, 200 * {@link #computeObjectiveValue(double[])}, {@link #computeCost(double[])} 201 * and {@link #setCost(double)} instead. 202 */ 203 @Deprecated 204 protected void updateResidualsAndCost() { 205 objective = computeObjectiveValue(point); 206 final double[] res = computeResiduals(objective); 207 208 // Compute cost. 209 cost = computeCost(res); 210 211 // Compute weighted residuals. 212 final ArrayRealVector residuals = new ArrayRealVector(res); 213 weightedResiduals = weightMatrixSqrt.operate(residuals).toArray(); 214 } 215 216 /** 217 * Computes the cost. 218 * 219 * @param residuals Residuals. 220 * @return the cost. 221 * @see #computeResiduals(double[]) 222 * @since 3.1 223 */ 224 protected double computeCost(double[] residuals) { 225 final ArrayRealVector r = new ArrayRealVector(residuals); 226 return FastMath.sqrt(r.dotProduct(getWeight().operate(r))); 227 } 228 229 /** 230 * Get the Root Mean Square value. 231 * Get the Root Mean Square value, i.e. the root of the arithmetic 232 * mean of the square of all weighted residuals. This is related to the 233 * criterion that is minimized by the optimizer as follows: if 234 * <em>c</em> if the criterion, and <em>n</em> is the number of 235 * measurements, then the RMS is <em>sqrt (c/n)</em>. 236 * 237 * @return RMS value 238 */ 239 public double getRMS() { 240 return FastMath.sqrt(getChiSquare() / rows); 241 } 242 243 /** 244 * Get a Chi-Square-like value assuming the N residuals follow N 245 * distinct normal distributions centered on 0 and whose variances are 246 * the reciprocal of the weights. 247 * @return chi-square value 248 */ 249 public double getChiSquare() { 250 return cost * cost; 251 } 252 253 /** 254 * Gets the square-root of the weight matrix. 255 * 256 * @return the square-root of the weight matrix. 257 * @since 3.1 258 */ 259 public RealMatrix getWeightSquareRoot() { 260 return weightMatrixSqrt.copy(); 261 } 262 263 /** 264 * Sets the cost. 265 * 266 * @param cost Cost value. 267 * @since 3.1 268 */ 269 protected void setCost(double cost) { 270 this.cost = cost; 271 } 272 273 /** 274 * Get the covariance matrix of the optimized parameters. 275 * 276 * @return the covariance matrix. 277 * @throws org.apache.commons.math3.linear.SingularMatrixException 278 * if the covariance matrix cannot be computed (singular problem). 279 * @see #getCovariances(double) 280 * @deprecated As of 3.1. Please use {@link #computeCovariances(double[],double)} 281 * instead. 282 */ 283 @Deprecated 284 public double[][] getCovariances() { 285 return getCovariances(DEFAULT_SINGULARITY_THRESHOLD); 286 } 287 288 /** 289 * Get the covariance matrix of the optimized parameters. 290 * <br/> 291 * Note that this operation involves the inversion of the 292 * <code>J<sup>T</sup>J</code> matrix, where {@code J} is the 293 * Jacobian matrix. 294 * The {@code threshold} parameter is a way for the caller to specify 295 * that the result of this computation should be considered meaningless, 296 * and thus trigger an exception. 297 * 298 * @param threshold Singularity threshold. 299 * @return the covariance matrix. 300 * @throws org.apache.commons.math3.linear.SingularMatrixException 301 * if the covariance matrix cannot be computed (singular problem). 302 * @deprecated As of 3.1. Please use {@link #computeCovariances(double[],double)} 303 * instead. 304 */ 305 @Deprecated 306 public double[][] getCovariances(double threshold) { 307 return computeCovariances(point, threshold); 308 } 309 310 /** 311 * Get the covariance matrix of the optimized parameters. 312 * <br/> 313 * Note that this operation involves the inversion of the 314 * <code>J<sup>T</sup>J</code> matrix, where {@code J} is the 315 * Jacobian matrix. 316 * The {@code threshold} parameter is a way for the caller to specify 317 * that the result of this computation should be considered meaningless, 318 * and thus trigger an exception. 319 * 320 * @param params Model parameters. 321 * @param threshold Singularity threshold. 322 * @return the covariance matrix. 323 * @throws org.apache.commons.math3.linear.SingularMatrixException 324 * if the covariance matrix cannot be computed (singular problem). 325 * @since 3.1 326 */ 327 public double[][] computeCovariances(double[] params, 328 double threshold) { 329 // Set up the Jacobian. 330 final RealMatrix j = computeWeightedJacobian(params); 331 332 // Compute transpose(J)J. 333 final RealMatrix jTj = j.transpose().multiply(j); 334 335 // Compute the covariances matrix. 336 final DecompositionSolver solver 337 = new QRDecomposition(jTj, threshold).getSolver(); 338 return solver.getInverse().getData(); 339 } 340 341 /** 342 * <p> 343 * Returns an estimate of the standard deviation of each parameter. The 344 * returned values are the so-called (asymptotic) standard errors on the 345 * parameters, defined as {@code sd(a[i]) = sqrt(S / (n - m) * C[i][i])}, 346 * where {@code a[i]} is the optimized value of the {@code i}-th parameter, 347 * {@code S} is the minimized value of the sum of squares objective function 348 * (as returned by {@link #getChiSquare()}), {@code n} is the number of 349 * observations, {@code m} is the number of parameters and {@code C} is the 350 * covariance matrix. 351 * </p> 352 * <p> 353 * See also 354 * <a href="http://en.wikipedia.org/wiki/Least_squares">Wikipedia</a>, 355 * or 356 * <a href="http://mathworld.wolfram.com/LeastSquaresFitting.html">MathWorld</a>, 357 * equations (34) and (35) for a particular case. 358 * </p> 359 * 360 * @return an estimate of the standard deviation of the optimized parameters 361 * @throws org.apache.commons.math3.linear.SingularMatrixException 362 * if the covariance matrix cannot be computed. 363 * @throws NumberIsTooSmallException if the number of degrees of freedom is not 364 * positive, i.e. the number of measurements is less or equal to the number of 365 * parameters. 366 * @deprecated as of version 3.1, {@link #computeSigma(double[],double)} should be used 367 * instead. It should be emphasized that {@code guessParametersErrors} and 368 * {@code computeSigma} are <em>not</em> strictly equivalent. 369 */ 370 @Deprecated 371 public double[] guessParametersErrors() { 372 if (rows <= cols) { 373 throw new NumberIsTooSmallException(LocalizedFormats.NO_DEGREES_OF_FREEDOM, 374 rows, cols, false); 375 } 376 double[] errors = new double[cols]; 377 final double c = FastMath.sqrt(getChiSquare() / (rows - cols)); 378 double[][] covar = computeCovariances(point, 1e-14); 379 for (int i = 0; i < errors.length; ++i) { 380 errors[i] = FastMath.sqrt(covar[i][i]) * c; 381 } 382 return errors; 383 } 384 385 /** 386 * Computes an estimate of the standard deviation of the parameters. The 387 * returned values are the square root of the diagonal coefficients of the 388 * covariance matrix, {@code sd(a[i]) ~= sqrt(C[i][i])}, where {@code a[i]} 389 * is the optimized value of the {@code i}-th parameter, and {@code C} is 390 * the covariance matrix. 391 * 392 * @param params Model parameters. 393 * @param covarianceSingularityThreshold Singularity threshold (see 394 * {@link #computeCovariances(double[],double) computeCovariances}). 395 * @return an estimate of the standard deviation of the optimized parameters 396 * @throws org.apache.commons.math3.linear.SingularMatrixException 397 * if the covariance matrix cannot be computed. 398 * @since 3.1 399 */ 400 public double[] computeSigma(double[] params, 401 double covarianceSingularityThreshold) { 402 final int nC = params.length; 403 final double[] sig = new double[nC]; 404 final double[][] cov = computeCovariances(params, covarianceSingularityThreshold); 405 for (int i = 0; i < nC; ++i) { 406 sig[i] = FastMath.sqrt(cov[i][i]); 407 } 408 return sig; 409 } 410 411 /** {@inheritDoc} 412 * @deprecated As of 3.1. Please use 413 * {@link BaseAbstractMultivariateVectorOptimizer#optimize(int, 414 * org.apache.commons.math3.analysis.MultivariateVectorFunction,OptimizationData[]) 415 * optimize(int,MultivariateDifferentiableVectorFunction,OptimizationData...)} 416 * instead. 417 */ 418 @Override 419 @Deprecated 420 public PointVectorValuePair optimize(int maxEval, 421 final DifferentiableMultivariateVectorFunction f, 422 final double[] target, final double[] weights, 423 final double[] startPoint) { 424 return optimizeInternal(maxEval, 425 FunctionUtils.toMultivariateDifferentiableVectorFunction(f), 426 new Target(target), 427 new Weight(weights), 428 new InitialGuess(startPoint)); 429 } 430 431 /** 432 * Optimize an objective function. 433 * Optimization is considered to be a weighted least-squares minimization. 434 * The cost function to be minimized is 435 * <code>∑weight<sub>i</sub>(objective<sub>i</sub> - target<sub>i</sub>)<sup>2</sup></code> 436 * 437 * @param f Objective function. 438 * @param target Target value for the objective functions at optimum. 439 * @param weights Weights for the least squares cost computation. 440 * @param startPoint Start point for optimization. 441 * @return the point/value pair giving the optimal value for objective 442 * function. 443 * @param maxEval Maximum number of function evaluations. 444 * @throws org.apache.commons.math3.exception.DimensionMismatchException 445 * if the start point dimension is wrong. 446 * @throws org.apache.commons.math3.exception.TooManyEvaluationsException 447 * if the maximal number of evaluations is exceeded. 448 * @throws org.apache.commons.math3.exception.NullArgumentException if 449 * any argument is {@code null}. 450 * @deprecated As of 3.1. Please use 451 * {@link BaseAbstractMultivariateVectorOptimizer#optimize(int, 452 * org.apache.commons.math3.analysis.MultivariateVectorFunction,OptimizationData[]) 453 * optimize(int,MultivariateDifferentiableVectorFunction,OptimizationData...)} 454 * instead. 455 */ 456 @Deprecated 457 public PointVectorValuePair optimize(final int maxEval, 458 final MultivariateDifferentiableVectorFunction f, 459 final double[] target, final double[] weights, 460 final double[] startPoint) { 461 return optimizeInternal(maxEval, f, 462 new Target(target), 463 new Weight(weights), 464 new InitialGuess(startPoint)); 465 } 466 467 /** 468 * Optimize an objective function. 469 * Optimization is considered to be a weighted least-squares minimization. 470 * The cost function to be minimized is 471 * <code>∑weight<sub>i</sub>(objective<sub>i</sub> - target<sub>i</sub>)<sup>2</sup></code> 472 * 473 * @param maxEval Allowed number of evaluations of the objective function. 474 * @param f Objective function. 475 * @param optData Optimization data. The following data will be looked for: 476 * <ul> 477 * <li>{@link Target}</li> 478 * <li>{@link Weight}</li> 479 * <li>{@link InitialGuess}</li> 480 * </ul> 481 * @return the point/value pair giving the optimal value of the objective 482 * function. 483 * @throws org.apache.commons.math3.exception.TooManyEvaluationsException if 484 * the maximal number of evaluations is exceeded. 485 * @throws DimensionMismatchException if the target, and weight arguments 486 * have inconsistent dimensions. 487 * @see BaseAbstractMultivariateVectorOptimizer#optimizeInternal(int, 488 * org.apache.commons.math3.analysis.MultivariateVectorFunction,OptimizationData[]) 489 * @since 3.1 490 * @deprecated As of 3.1. Override is necessary only until this class's generic 491 * argument is changed to {@code MultivariateDifferentiableVectorFunction}. 492 */ 493 @Deprecated 494 protected PointVectorValuePair optimizeInternal(final int maxEval, 495 final MultivariateDifferentiableVectorFunction f, 496 OptimizationData... optData) { 497 // XXX Conversion will be removed when the generic argument of the 498 // base class becomes "MultivariateDifferentiableVectorFunction". 499 return super.optimizeInternal(maxEval, FunctionUtils.toDifferentiableMultivariateVectorFunction(f), optData); 500 } 501 502 /** {@inheritDoc} */ 503 @Override 504 protected void setUp() { 505 super.setUp(); 506 507 // Reset counter. 508 jacobianEvaluations = 0; 509 510 // Square-root of the weight matrix. 511 weightMatrixSqrt = squareRoot(getWeight()); 512 513 // Store least squares problem characteristics. 514 // XXX The conversion won't be necessary when the generic argument of 515 // the base class becomes "MultivariateDifferentiableVectorFunction". 516 // XXX "jF" is not strictly necessary anymore but is currently more 517 // efficient than converting the value returned from "getObjectiveFunction()" 518 // every time it is used. 519 jF = FunctionUtils.toMultivariateDifferentiableVectorFunction((DifferentiableMultivariateVectorFunction) getObjectiveFunction()); 520 521 // Arrays shared with "private" and "protected" methods. 522 point = getStartPoint(); 523 rows = getTarget().length; 524 cols = point.length; 525 } 526 527 /** 528 * Computes the residuals. 529 * The residual is the difference between the observed (target) 530 * values and the model (objective function) value. 531 * There is one residual for each element of the vector-valued 532 * function. 533 * 534 * @param objectiveValue Value of the the objective function. This is 535 * the value returned from a call to 536 * {@link #computeObjectiveValue(double[]) computeObjectiveValue} 537 * (whose array argument contains the model parameters). 538 * @return the residuals. 539 * @throws DimensionMismatchException if {@code params} has a wrong 540 * length. 541 * @since 3.1 542 */ 543 protected double[] computeResiduals(double[] objectiveValue) { 544 final double[] target = getTarget(); 545 if (objectiveValue.length != target.length) { 546 throw new DimensionMismatchException(target.length, 547 objectiveValue.length); 548 } 549 550 final double[] residuals = new double[target.length]; 551 for (int i = 0; i < target.length; i++) { 552 residuals[i] = target[i] - objectiveValue[i]; 553 } 554 555 return residuals; 556 } 557 558 /** 559 * Computes the square-root of the weight matrix. 560 * 561 * @param m Symmetric, positive-definite (weight) matrix. 562 * @return the square-root of the weight matrix. 563 */ 564 private RealMatrix squareRoot(RealMatrix m) { 565 if (m instanceof DiagonalMatrix) { 566 final int dim = m.getRowDimension(); 567 final RealMatrix sqrtM = new DiagonalMatrix(dim); 568 for (int i = 0; i < dim; i++) { 569 sqrtM.setEntry(i, i, FastMath.sqrt(m.getEntry(i, i))); 570 } 571 return sqrtM; 572 } else { 573 final EigenDecomposition dec = new EigenDecomposition(m); 574 return dec.getSquareRoot(); 575 } 576 } 577}