View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.math3.optimization.general;
19  
20  import org.apache.commons.math3.analysis.DifferentiableMultivariateVectorFunction;
21  import org.apache.commons.math3.analysis.FunctionUtils;
22  import org.apache.commons.math3.analysis.differentiation.DerivativeStructure;
23  import org.apache.commons.math3.analysis.differentiation.MultivariateDifferentiableVectorFunction;
24  import org.apache.commons.math3.exception.DimensionMismatchException;
25  import org.apache.commons.math3.exception.NumberIsTooSmallException;
26  import org.apache.commons.math3.exception.util.LocalizedFormats;
27  import org.apache.commons.math3.linear.ArrayRealVector;
28  import org.apache.commons.math3.linear.RealMatrix;
29  import org.apache.commons.math3.linear.DiagonalMatrix;
30  import org.apache.commons.math3.linear.DecompositionSolver;
31  import org.apache.commons.math3.linear.MatrixUtils;
32  import org.apache.commons.math3.linear.QRDecomposition;
33  import org.apache.commons.math3.linear.EigenDecomposition;
34  import org.apache.commons.math3.optimization.OptimizationData;
35  import org.apache.commons.math3.optimization.InitialGuess;
36  import org.apache.commons.math3.optimization.Target;
37  import org.apache.commons.math3.optimization.Weight;
38  import org.apache.commons.math3.optimization.ConvergenceChecker;
39  import org.apache.commons.math3.optimization.DifferentiableMultivariateVectorOptimizer;
40  import org.apache.commons.math3.optimization.PointVectorValuePair;
41  import org.apache.commons.math3.optimization.direct.BaseAbstractMultivariateVectorOptimizer;
42  import org.apache.commons.math3.util.FastMath;
43  
44  /**
45   * Base class for implementing least squares optimizers.
46   * It handles the boilerplate methods associated to thresholds settings,
47   * Jacobian and error estimation.
48   * <br/>
49   * This class constructs the Jacobian matrix of the function argument in method
50   * {@link BaseAbstractMultivariateVectorOptimizer#optimize(int,
51   * org.apache.commons.math3.analysis.MultivariateVectorFunction,OptimizationData[])
52   * optimize} and assumes that the rows of that matrix iterate on the model
53   * functions while the columns iterate on the parameters; thus, the numbers
54   * of rows is equal to the dimension of the
55   * {@link org.apache.commons.math3.optimization.Target Target} while
56   * the number of columns is equal to the dimension of the
57   * {@link org.apache.commons.math3.optimization.InitialGuess InitialGuess}.
58   *
59   * @deprecated As of 3.1 (to be removed in 4.0).
60   * @since 1.2
61   */
62  @Deprecated
63  public abstract class AbstractLeastSquaresOptimizer
64      extends BaseAbstractMultivariateVectorOptimizer<DifferentiableMultivariateVectorFunction>
65      implements DifferentiableMultivariateVectorOptimizer {
66      /**
67       * Singularity threshold (cf. {@link #getCovariances(double)}).
68       * @deprecated As of 3.1.
69       */
70      @Deprecated
71      private static final double DEFAULT_SINGULARITY_THRESHOLD = 1e-14;
72      /**
73       * Jacobian matrix of the weighted residuals.
74       * This matrix is in canonical form just after the calls to
75       * {@link #updateJacobian()}, but may be modified by the solver
76       * in the derived class (the {@link LevenbergMarquardtOptimizer
77       * Levenberg-Marquardt optimizer} does this).
78       * @deprecated As of 3.1. To be removed in 4.0. Please use
79       * {@link #computeWeightedJacobian(double[])} instead.
80       */
81      @Deprecated
82      protected double[][] weightedResidualJacobian;
83      /** Number of columns of the jacobian matrix.
84       * @deprecated As of 3.1.
85       */
86      @Deprecated
87      protected int cols;
88      /** Number of rows of the jacobian matrix.
89       * @deprecated As of 3.1.
90       */
91      @Deprecated
92      protected int rows;
93      /** Current point.
94       * @deprecated As of 3.1.
95       */
96      @Deprecated
97      protected double[] point;
98      /** Current objective function value.
99       * @deprecated As of 3.1.
100      */
101     @Deprecated
102     protected double[] objective;
103     /** Weighted residuals
104      * @deprecated As of 3.1.
105      */
106     @Deprecated
107     protected double[] weightedResiduals;
108     /** Cost value (square root of the sum of the residuals).
109      * @deprecated As of 3.1. Field to become "private" in 4.0.
110      * Please use {@link #setCost(double)}.
111      */
112     @Deprecated
113     protected double cost;
114     /** Objective function derivatives. */
115     private MultivariateDifferentiableVectorFunction jF;
116     /** Number of evaluations of the Jacobian. */
117     private int jacobianEvaluations;
118     /** Square-root of the weight matrix. */
119     private RealMatrix weightMatrixSqrt;
120 
121     /**
122      * Simple constructor with default settings.
123      * The convergence check is set to a {@link
124      * org.apache.commons.math3.optimization.SimpleVectorValueChecker}.
125      * @deprecated See {@link org.apache.commons.math3.optimization.SimpleValueChecker#SimpleValueChecker()}
126      */
127     @Deprecated
128     protected AbstractLeastSquaresOptimizer() {}
129 
130     /**
131      * @param checker Convergence checker.
132      */
133     protected AbstractLeastSquaresOptimizer(ConvergenceChecker<PointVectorValuePair> checker) {
134         super(checker);
135     }
136 
137     /**
138      * @return the number of evaluations of the Jacobian function.
139      */
140     public int getJacobianEvaluations() {
141         return jacobianEvaluations;
142     }
143 
144     /**
145      * Update the jacobian matrix.
146      *
147      * @throws DimensionMismatchException if the Jacobian dimension does not
148      * match problem dimension.
149      * @deprecated As of 3.1. Please use {@link #computeWeightedJacobian(double[])}
150      * instead.
151      */
152     @Deprecated
153     protected void updateJacobian() {
154         final RealMatrix weightedJacobian = computeWeightedJacobian(point);
155         weightedResidualJacobian = weightedJacobian.scalarMultiply(-1).getData();
156     }
157 
158     /**
159      * Computes the Jacobian matrix.
160      *
161      * @param params Model parameters at which to compute the Jacobian.
162      * @return the weighted Jacobian: W<sup>1/2</sup> J.
163      * @throws DimensionMismatchException if the Jacobian dimension does not
164      * match problem dimension.
165      * @since 3.1
166      */
167     protected RealMatrix computeWeightedJacobian(double[] params) {
168         ++jacobianEvaluations;
169 
170         final DerivativeStructure[] dsPoint = new DerivativeStructure[params.length];
171         final int nC = params.length;
172         for (int i = 0; i < nC; ++i) {
173             dsPoint[i] = new DerivativeStructure(nC, 1, i, params[i]);
174         }
175         final DerivativeStructure[] dsValue = jF.value(dsPoint);
176         final int nR = getTarget().length;
177         if (dsValue.length != nR) {
178             throw new DimensionMismatchException(dsValue.length, nR);
179         }
180         final double[][] jacobianData = new double[nR][nC];
181         for (int i = 0; i < nR; ++i) {
182             int[] orders = new int[nC];
183             for (int j = 0; j < nC; ++j) {
184                 orders[j] = 1;
185                 jacobianData[i][j] = dsValue[i].getPartialDerivative(orders);
186                 orders[j] = 0;
187             }
188         }
189 
190         return weightMatrixSqrt.multiply(MatrixUtils.createRealMatrix(jacobianData));
191     }
192 
193     /**
194      * Update the residuals array and cost function value.
195      * @throws DimensionMismatchException if the dimension does not match the
196      * problem dimension.
197      * @throws org.apache.commons.math3.exception.TooManyEvaluationsException
198      * if the maximal number of evaluations is exceeded.
199      * @deprecated As of 3.1. Please use {@link #computeResiduals(double[])},
200      * {@link #computeObjectiveValue(double[])}, {@link #computeCost(double[])}
201      * and {@link #setCost(double)} instead.
202      */
203     @Deprecated
204     protected void updateResidualsAndCost() {
205         objective = computeObjectiveValue(point);
206         final double[] res = computeResiduals(objective);
207 
208         // Compute cost.
209         cost = computeCost(res);
210 
211         // Compute weighted residuals.
212         final ArrayRealVector residuals = new ArrayRealVector(res);
213         weightedResiduals = weightMatrixSqrt.operate(residuals).toArray();
214     }
215 
216     /**
217      * Computes the cost.
218      *
219      * @param residuals Residuals.
220      * @return the cost.
221      * @see #computeResiduals(double[])
222      * @since 3.1
223      */
224     protected double computeCost(double[] residuals) {
225         final ArrayRealVector r = new ArrayRealVector(residuals);
226         return FastMath.sqrt(r.dotProduct(getWeight().operate(r)));
227     }
228 
229     /**
230      * Get the Root Mean Square value.
231      * Get the Root Mean Square value, i.e. the root of the arithmetic
232      * mean of the square of all weighted residuals. This is related to the
233      * criterion that is minimized by the optimizer as follows: if
234      * <em>c</em> if the criterion, and <em>n</em> is the number of
235      * measurements, then the RMS is <em>sqrt (c/n)</em>.
236      *
237      * @return RMS value
238      */
239     public double getRMS() {
240         return FastMath.sqrt(getChiSquare() / rows);
241     }
242 
243     /**
244      * Get a Chi-Square-like value assuming the N residuals follow N
245      * distinct normal distributions centered on 0 and whose variances are
246      * the reciprocal of the weights.
247      * @return chi-square value
248      */
249     public double getChiSquare() {
250         return cost * cost;
251     }
252 
253     /**
254      * Gets the square-root of the weight matrix.
255      *
256      * @return the square-root of the weight matrix.
257      * @since 3.1
258      */
259     public RealMatrix getWeightSquareRoot() {
260         return weightMatrixSqrt.copy();
261     }
262 
263     /**
264      * Sets the cost.
265      *
266      * @param cost Cost value.
267      * @since 3.1
268      */
269     protected void setCost(double cost) {
270         this.cost = cost;
271     }
272 
273     /**
274      * Get the covariance matrix of the optimized parameters.
275      *
276      * @return the covariance matrix.
277      * @throws org.apache.commons.math3.linear.SingularMatrixException
278      * if the covariance matrix cannot be computed (singular problem).
279      * @see #getCovariances(double)
280      * @deprecated As of 3.1. Please use {@link #computeCovariances(double[],double)}
281      * instead.
282      */
283     @Deprecated
284     public double[][] getCovariances() {
285         return getCovariances(DEFAULT_SINGULARITY_THRESHOLD);
286     }
287 
288     /**
289      * Get the covariance matrix of the optimized parameters.
290      * <br/>
291      * Note that this operation involves the inversion of the
292      * <code>J<sup>T</sup>J</code> matrix, where {@code J} is the
293      * Jacobian matrix.
294      * The {@code threshold} parameter is a way for the caller to specify
295      * that the result of this computation should be considered meaningless,
296      * and thus trigger an exception.
297      *
298      * @param threshold Singularity threshold.
299      * @return the covariance matrix.
300      * @throws org.apache.commons.math3.linear.SingularMatrixException
301      * if the covariance matrix cannot be computed (singular problem).
302      * @deprecated As of 3.1. Please use {@link #computeCovariances(double[],double)}
303      * instead.
304      */
305     @Deprecated
306     public double[][] getCovariances(double threshold) {
307         return computeCovariances(point, threshold);
308     }
309 
310     /**
311      * Get the covariance matrix of the optimized parameters.
312      * <br/>
313      * Note that this operation involves the inversion of the
314      * <code>J<sup>T</sup>J</code> matrix, where {@code J} is the
315      * Jacobian matrix.
316      * The {@code threshold} parameter is a way for the caller to specify
317      * that the result of this computation should be considered meaningless,
318      * and thus trigger an exception.
319      *
320      * @param params Model parameters.
321      * @param threshold Singularity threshold.
322      * @return the covariance matrix.
323      * @throws org.apache.commons.math3.linear.SingularMatrixException
324      * if the covariance matrix cannot be computed (singular problem).
325      * @since 3.1
326      */
327     public double[][] computeCovariances(double[] params,
328                                          double threshold) {
329         // Set up the Jacobian.
330         final RealMatrix j = computeWeightedJacobian(params);
331 
332         // Compute transpose(J)J.
333         final RealMatrix jTj = j.transpose().multiply(j);
334 
335         // Compute the covariances matrix.
336         final DecompositionSolver solver
337             = new QRDecomposition(jTj, threshold).getSolver();
338         return solver.getInverse().getData();
339     }
340 
341     /**
342      * <p>
343      * Returns an estimate of the standard deviation of each parameter. The
344      * returned values are the so-called (asymptotic) standard errors on the
345      * parameters, defined as {@code sd(a[i]) = sqrt(S / (n - m) * C[i][i])},
346      * where {@code a[i]} is the optimized value of the {@code i}-th parameter,
347      * {@code S} is the minimized value of the sum of squares objective function
348      * (as returned by {@link #getChiSquare()}), {@code n} is the number of
349      * observations, {@code m} is the number of parameters and {@code C} is the
350      * covariance matrix.
351      * </p>
352      * <p>
353      * See also
354      * <a href="http://en.wikipedia.org/wiki/Least_squares">Wikipedia</a>,
355      * or
356      * <a href="http://mathworld.wolfram.com/LeastSquaresFitting.html">MathWorld</a>,
357      * equations (34) and (35) for a particular case.
358      * </p>
359      *
360      * @return an estimate of the standard deviation of the optimized parameters
361      * @throws org.apache.commons.math3.linear.SingularMatrixException
362      * if the covariance matrix cannot be computed.
363      * @throws NumberIsTooSmallException if the number of degrees of freedom is not
364      * positive, i.e. the number of measurements is less or equal to the number of
365      * parameters.
366      * @deprecated as of version 3.1, {@link #computeSigma(double[],double)} should be used
367      * instead. It should be emphasized that {@code guessParametersErrors} and
368      * {@code computeSigma} are <em>not</em> strictly equivalent.
369      */
370     @Deprecated
371     public double[] guessParametersErrors() {
372         if (rows <= cols) {
373             throw new NumberIsTooSmallException(LocalizedFormats.NO_DEGREES_OF_FREEDOM,
374                                                 rows, cols, false);
375         }
376         double[] errors = new double[cols];
377         final double c = FastMath.sqrt(getChiSquare() / (rows - cols));
378         double[][] covar = computeCovariances(point, 1e-14);
379         for (int i = 0; i < errors.length; ++i) {
380             errors[i] = FastMath.sqrt(covar[i][i]) * c;
381         }
382         return errors;
383     }
384 
385     /**
386      * Computes an estimate of the standard deviation of the parameters. The
387      * returned values are the square root of the diagonal coefficients of the
388      * covariance matrix, {@code sd(a[i]) ~= sqrt(C[i][i])}, where {@code a[i]}
389      * is the optimized value of the {@code i}-th parameter, and {@code C} is
390      * the covariance matrix.
391      *
392      * @param params Model parameters.
393      * @param covarianceSingularityThreshold Singularity threshold (see
394      * {@link #computeCovariances(double[],double) computeCovariances}).
395      * @return an estimate of the standard deviation of the optimized parameters
396      * @throws org.apache.commons.math3.linear.SingularMatrixException
397      * if the covariance matrix cannot be computed.
398      * @since 3.1
399      */
400     public double[] computeSigma(double[] params,
401                                  double covarianceSingularityThreshold) {
402         final int nC = params.length;
403         final double[] sig = new double[nC];
404         final double[][] cov = computeCovariances(params, covarianceSingularityThreshold);
405         for (int i = 0; i < nC; ++i) {
406             sig[i] = FastMath.sqrt(cov[i][i]);
407         }
408         return sig;
409     }
410 
411     /** {@inheritDoc}
412      * @deprecated As of 3.1. Please use
413      * {@link BaseAbstractMultivariateVectorOptimizer#optimize(int,
414      * org.apache.commons.math3.analysis.MultivariateVectorFunction,OptimizationData[])
415      * optimize(int,MultivariateDifferentiableVectorFunction,OptimizationData...)}
416      * instead.
417      */
418     @Override
419     @Deprecated
420     public PointVectorValuePair optimize(int maxEval,
421                                          final DifferentiableMultivariateVectorFunction f,
422                                          final double[] target, final double[] weights,
423                                          final double[] startPoint) {
424         return optimizeInternal(maxEval,
425                                 FunctionUtils.toMultivariateDifferentiableVectorFunction(f),
426                                 new Target(target),
427                                 new Weight(weights),
428                                 new InitialGuess(startPoint));
429     }
430 
431     /**
432      * Optimize an objective function.
433      * Optimization is considered to be a weighted least-squares minimization.
434      * The cost function to be minimized is
435      * <code>&sum;weight<sub>i</sub>(objective<sub>i</sub> - target<sub>i</sub>)<sup>2</sup></code>
436      *
437      * @param f Objective function.
438      * @param target Target value for the objective functions at optimum.
439      * @param weights Weights for the least squares cost computation.
440      * @param startPoint Start point for optimization.
441      * @return the point/value pair giving the optimal value for objective
442      * function.
443      * @param maxEval Maximum number of function evaluations.
444      * @throws org.apache.commons.math3.exception.DimensionMismatchException
445      * if the start point dimension is wrong.
446      * @throws org.apache.commons.math3.exception.TooManyEvaluationsException
447      * if the maximal number of evaluations is exceeded.
448      * @throws org.apache.commons.math3.exception.NullArgumentException if
449      * any argument is {@code null}.
450      * @deprecated As of 3.1. Please use
451      * {@link BaseAbstractMultivariateVectorOptimizer#optimize(int,
452      * org.apache.commons.math3.analysis.MultivariateVectorFunction,OptimizationData[])
453      * optimize(int,MultivariateDifferentiableVectorFunction,OptimizationData...)}
454      * instead.
455      */
456     @Deprecated
457     public PointVectorValuePair optimize(final int maxEval,
458                                          final MultivariateDifferentiableVectorFunction f,
459                                          final double[] target, final double[] weights,
460                                          final double[] startPoint) {
461         return optimizeInternal(maxEval, f,
462                                 new Target(target),
463                                 new Weight(weights),
464                                 new InitialGuess(startPoint));
465     }
466 
467     /**
468      * Optimize an objective function.
469      * Optimization is considered to be a weighted least-squares minimization.
470      * The cost function to be minimized is
471      * <code>&sum;weight<sub>i</sub>(objective<sub>i</sub> - target<sub>i</sub>)<sup>2</sup></code>
472      *
473      * @param maxEval Allowed number of evaluations of the objective function.
474      * @param f Objective function.
475      * @param optData Optimization data. The following data will be looked for:
476      * <ul>
477      *  <li>{@link Target}</li>
478      *  <li>{@link Weight}</li>
479      *  <li>{@link InitialGuess}</li>
480      * </ul>
481      * @return the point/value pair giving the optimal value of the objective
482      * function.
483      * @throws org.apache.commons.math3.exception.TooManyEvaluationsException if
484      * the maximal number of evaluations is exceeded.
485      * @throws DimensionMismatchException if the target, and weight arguments
486      * have inconsistent dimensions.
487      * @see BaseAbstractMultivariateVectorOptimizer#optimizeInternal(int,
488      * org.apache.commons.math3.analysis.MultivariateVectorFunction,OptimizationData[])
489      * @since 3.1
490      * @deprecated As of 3.1. Override is necessary only until this class's generic
491      * argument is changed to {@code MultivariateDifferentiableVectorFunction}.
492      */
493     @Deprecated
494     protected PointVectorValuePair optimizeInternal(final int maxEval,
495                                                     final MultivariateDifferentiableVectorFunction f,
496                                                     OptimizationData... optData) {
497         // XXX Conversion will be removed when the generic argument of the
498         // base class becomes "MultivariateDifferentiableVectorFunction".
499         return super.optimizeInternal(maxEval, FunctionUtils.toDifferentiableMultivariateVectorFunction(f), optData);
500     }
501 
502     /** {@inheritDoc} */
503     @Override
504     protected void setUp() {
505         super.setUp();
506 
507         // Reset counter.
508         jacobianEvaluations = 0;
509 
510         // Square-root of the weight matrix.
511         weightMatrixSqrt = squareRoot(getWeight());
512 
513         // Store least squares problem characteristics.
514         // XXX The conversion won't be necessary when the generic argument of
515         // the base class becomes "MultivariateDifferentiableVectorFunction".
516         // XXX "jF" is not strictly necessary anymore but is currently more
517         // efficient than converting the value returned from "getObjectiveFunction()"
518         // every time it is used.
519         jF = FunctionUtils.toMultivariateDifferentiableVectorFunction((DifferentiableMultivariateVectorFunction) getObjectiveFunction());
520 
521         // Arrays shared with "private" and "protected" methods.
522         point = getStartPoint();
523         rows = getTarget().length;
524         cols = point.length;
525     }
526 
527     /**
528      * Computes the residuals.
529      * The residual is the difference between the observed (target)
530      * values and the model (objective function) value.
531      * There is one residual for each element of the vector-valued
532      * function.
533      *
534      * @param objectiveValue Value of the the objective function. This is
535      * the value returned from a call to
536      * {@link #computeObjectiveValue(double[]) computeObjectiveValue}
537      * (whose array argument contains the model parameters).
538      * @return the residuals.
539      * @throws DimensionMismatchException if {@code params} has a wrong
540      * length.
541      * @since 3.1
542      */
543     protected double[] computeResiduals(double[] objectiveValue) {
544         final double[] target = getTarget();
545         if (objectiveValue.length != target.length) {
546             throw new DimensionMismatchException(target.length,
547                                                  objectiveValue.length);
548         }
549 
550         final double[] residuals = new double[target.length];
551         for (int i = 0; i < target.length; i++) {
552             residuals[i] = target[i] - objectiveValue[i];
553         }
554 
555         return residuals;
556     }
557 
558     /**
559      * Computes the square-root of the weight matrix.
560      *
561      * @param m Symmetric, positive-definite (weight) matrix.
562      * @return the square-root of the weight matrix.
563      */
564     private RealMatrix squareRoot(RealMatrix m) {
565         if (m instanceof DiagonalMatrix) {
566             final int dim = m.getRowDimension();
567             final RealMatrix sqrtM = new DiagonalMatrix(dim);
568             for (int i = 0; i < dim; i++) {
569                sqrtM.setEntry(i, i, FastMath.sqrt(m.getEntry(i, i)));
570             }
571             return sqrtM;
572         } else {
573             final EigenDecomposition dec = new EigenDecomposition(m);
574             return dec.getSquareRoot();
575         }
576     }
577 }