View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.math3.optimization.general;
19  
20  import org.apache.commons.math3.analysis.DifferentiableMultivariateVectorFunction;
21  import org.apache.commons.math3.analysis.FunctionUtils;
22  import org.apache.commons.math3.analysis.differentiation.DerivativeStructure;
23  import org.apache.commons.math3.analysis.differentiation.MultivariateDifferentiableVectorFunction;
24  import org.apache.commons.math3.exception.DimensionMismatchException;
25  import org.apache.commons.math3.exception.NumberIsTooSmallException;
26  import org.apache.commons.math3.exception.util.LocalizedFormats;
27  import org.apache.commons.math3.linear.ArrayRealVector;
28  import org.apache.commons.math3.linear.RealMatrix;
29  import org.apache.commons.math3.linear.DiagonalMatrix;
30  import org.apache.commons.math3.linear.DecompositionSolver;
31  import org.apache.commons.math3.linear.MatrixUtils;
32  import org.apache.commons.math3.linear.QRDecomposition;
33  import org.apache.commons.math3.linear.EigenDecomposition;
34  import org.apache.commons.math3.optimization.OptimizationData;
35  import org.apache.commons.math3.optimization.InitialGuess;
36  import org.apache.commons.math3.optimization.Target;
37  import org.apache.commons.math3.optimization.Weight;
38  import org.apache.commons.math3.optimization.ConvergenceChecker;
39  import org.apache.commons.math3.optimization.DifferentiableMultivariateVectorOptimizer;
40  import org.apache.commons.math3.optimization.PointVectorValuePair;
41  import org.apache.commons.math3.optimization.direct.BaseAbstractMultivariateVectorOptimizer;
42  import org.apache.commons.math3.util.FastMath;
43  
44  /**
45   * Base class for implementing least squares optimizers.
46   * It handles the boilerplate methods associated to thresholds settings,
47   * Jacobian and error estimation.
48   * <br/>
49   * This class constructs the Jacobian matrix of the function argument in method
50   * {@link BaseAbstractMultivariateVectorOptimizer#optimize(int,
51   * org.apache.commons.math3.analysis.MultivariateVectorFunction,OptimizationData[])
52   * optimize} and assumes that the rows of that matrix iterate on the model
53   * functions while the columns iterate on the parameters; thus, the numbers
54   * of rows is equal to the dimension of the
55   * {@link org.apache.commons.math3.optimization.Target Target} while
56   * the number of columns is equal to the dimension of the
57   * {@link org.apache.commons.math3.optimization.InitialGuess InitialGuess}.
58   *
59   * @version $Id: AbstractLeastSquaresOptimizer.java 1591835 2014-05-02 09:04:01Z tn $
60   * @deprecated As of 3.1 (to be removed in 4.0).
61   * @since 1.2
62   */
63  @Deprecated
64  public abstract class AbstractLeastSquaresOptimizer
65      extends BaseAbstractMultivariateVectorOptimizer<DifferentiableMultivariateVectorFunction>
66      implements DifferentiableMultivariateVectorOptimizer {
67      /**
68       * Singularity threshold (cf. {@link #getCovariances(double)}).
69       * @deprecated As of 3.1.
70       */
71      @Deprecated
72      private static final double DEFAULT_SINGULARITY_THRESHOLD = 1e-14;
73      /**
74       * Jacobian matrix of the weighted residuals.
75       * This matrix is in canonical form just after the calls to
76       * {@link #updateJacobian()}, but may be modified by the solver
77       * in the derived class (the {@link LevenbergMarquardtOptimizer
78       * Levenberg-Marquardt optimizer} does this).
79       * @deprecated As of 3.1. To be removed in 4.0. Please use
80       * {@link #computeWeightedJacobian(double[])} instead.
81       */
82      @Deprecated
83      protected double[][] weightedResidualJacobian;
84      /** Number of columns of the jacobian matrix.
85       * @deprecated As of 3.1.
86       */
87      @Deprecated
88      protected int cols;
89      /** Number of rows of the jacobian matrix.
90       * @deprecated As of 3.1.
91       */
92      @Deprecated
93      protected int rows;
94      /** Current point.
95       * @deprecated As of 3.1.
96       */
97      @Deprecated
98      protected double[] point;
99      /** Current objective function value.
100      * @deprecated As of 3.1.
101      */
102     @Deprecated
103     protected double[] objective;
104     /** Weighted residuals
105      * @deprecated As of 3.1.
106      */
107     @Deprecated
108     protected double[] weightedResiduals;
109     /** Cost value (square root of the sum of the residuals).
110      * @deprecated As of 3.1. Field to become "private" in 4.0.
111      * Please use {@link #setCost(double)}.
112      */
113     @Deprecated
114     protected double cost;
115     /** Objective function derivatives. */
116     private MultivariateDifferentiableVectorFunction jF;
117     /** Number of evaluations of the Jacobian. */
118     private int jacobianEvaluations;
119     /** Square-root of the weight matrix. */
120     private RealMatrix weightMatrixSqrt;
121 
122     /**
123      * Simple constructor with default settings.
124      * The convergence check is set to a {@link
125      * org.apache.commons.math3.optimization.SimpleVectorValueChecker}.
126      * @deprecated See {@link org.apache.commons.math3.optimization.SimpleValueChecker#SimpleValueChecker()}
127      */
128     @Deprecated
129     protected AbstractLeastSquaresOptimizer() {}
130 
131     /**
132      * @param checker Convergence checker.
133      */
134     protected AbstractLeastSquaresOptimizer(ConvergenceChecker<PointVectorValuePair> checker) {
135         super(checker);
136     }
137 
138     /**
139      * @return the number of evaluations of the Jacobian function.
140      */
141     public int getJacobianEvaluations() {
142         return jacobianEvaluations;
143     }
144 
145     /**
146      * Update the jacobian matrix.
147      *
148      * @throws DimensionMismatchException if the Jacobian dimension does not
149      * match problem dimension.
150      * @deprecated As of 3.1. Please use {@link #computeWeightedJacobian(double[])}
151      * instead.
152      */
153     @Deprecated
154     protected void updateJacobian() {
155         final RealMatrix weightedJacobian = computeWeightedJacobian(point);
156         weightedResidualJacobian = weightedJacobian.scalarMultiply(-1).getData();
157     }
158 
159     /**
160      * Computes the Jacobian matrix.
161      *
162      * @param params Model parameters at which to compute the Jacobian.
163      * @return the weighted Jacobian: W<sup>1/2</sup> J.
164      * @throws DimensionMismatchException if the Jacobian dimension does not
165      * match problem dimension.
166      * @since 3.1
167      */
168     protected RealMatrix computeWeightedJacobian(double[] params) {
169         ++jacobianEvaluations;
170 
171         final DerivativeStructure[] dsPoint = new DerivativeStructure[params.length];
172         final int nC = params.length;
173         for (int i = 0; i < nC; ++i) {
174             dsPoint[i] = new DerivativeStructure(nC, 1, i, params[i]);
175         }
176         final DerivativeStructure[] dsValue = jF.value(dsPoint);
177         final int nR = getTarget().length;
178         if (dsValue.length != nR) {
179             throw new DimensionMismatchException(dsValue.length, nR);
180         }
181         final double[][] jacobianData = new double[nR][nC];
182         for (int i = 0; i < nR; ++i) {
183             int[] orders = new int[nC];
184             for (int j = 0; j < nC; ++j) {
185                 orders[j] = 1;
186                 jacobianData[i][j] = dsValue[i].getPartialDerivative(orders);
187                 orders[j] = 0;
188             }
189         }
190 
191         return weightMatrixSqrt.multiply(MatrixUtils.createRealMatrix(jacobianData));
192     }
193 
194     /**
195      * Update the residuals array and cost function value.
196      * @throws DimensionMismatchException if the dimension does not match the
197      * problem dimension.
198      * @throws org.apache.commons.math3.exception.TooManyEvaluationsException
199      * if the maximal number of evaluations is exceeded.
200      * @deprecated As of 3.1. Please use {@link #computeResiduals(double[])},
201      * {@link #computeObjectiveValue(double[])}, {@link #computeCost(double[])}
202      * and {@link #setCost(double)} instead.
203      */
204     @Deprecated
205     protected void updateResidualsAndCost() {
206         objective = computeObjectiveValue(point);
207         final double[] res = computeResiduals(objective);
208 
209         // Compute cost.
210         cost = computeCost(res);
211 
212         // Compute weighted residuals.
213         final ArrayRealVector residuals = new ArrayRealVector(res);
214         weightedResiduals = weightMatrixSqrt.operate(residuals).toArray();
215     }
216 
217     /**
218      * Computes the cost.
219      *
220      * @param residuals Residuals.
221      * @return the cost.
222      * @see #computeResiduals(double[])
223      * @since 3.1
224      */
225     protected double computeCost(double[] residuals) {
226         final ArrayRealVector r = new ArrayRealVector(residuals);
227         return FastMath.sqrt(r.dotProduct(getWeight().operate(r)));
228     }
229 
230     /**
231      * Get the Root Mean Square value.
232      * Get the Root Mean Square value, i.e. the root of the arithmetic
233      * mean of the square of all weighted residuals. This is related to the
234      * criterion that is minimized by the optimizer as follows: if
235      * <em>c</em> if the criterion, and <em>n</em> is the number of
236      * measurements, then the RMS is <em>sqrt (c/n)</em>.
237      *
238      * @return RMS value
239      */
240     public double getRMS() {
241         return FastMath.sqrt(getChiSquare() / rows);
242     }
243 
244     /**
245      * Get a Chi-Square-like value assuming the N residuals follow N
246      * distinct normal distributions centered on 0 and whose variances are
247      * the reciprocal of the weights.
248      * @return chi-square value
249      */
250     public double getChiSquare() {
251         return cost * cost;
252     }
253 
254     /**
255      * Gets the square-root of the weight matrix.
256      *
257      * @return the square-root of the weight matrix.
258      * @since 3.1
259      */
260     public RealMatrix getWeightSquareRoot() {
261         return weightMatrixSqrt.copy();
262     }
263 
264     /**
265      * Sets the cost.
266      *
267      * @param cost Cost value.
268      * @since 3.1
269      */
270     protected void setCost(double cost) {
271         this.cost = cost;
272     }
273 
274     /**
275      * Get the covariance matrix of the optimized parameters.
276      *
277      * @return the covariance matrix.
278      * @throws org.apache.commons.math3.linear.SingularMatrixException
279      * if the covariance matrix cannot be computed (singular problem).
280      * @see #getCovariances(double)
281      * @deprecated As of 3.1. Please use {@link #computeCovariances(double[],double)}
282      * instead.
283      */
284     @Deprecated
285     public double[][] getCovariances() {
286         return getCovariances(DEFAULT_SINGULARITY_THRESHOLD);
287     }
288 
289     /**
290      * Get the covariance matrix of the optimized parameters.
291      * <br/>
292      * Note that this operation involves the inversion of the
293      * <code>J<sup>T</sup>J</code> matrix, where {@code J} is the
294      * Jacobian matrix.
295      * The {@code threshold} parameter is a way for the caller to specify
296      * that the result of this computation should be considered meaningless,
297      * and thus trigger an exception.
298      *
299      * @param threshold Singularity threshold.
300      * @return the covariance matrix.
301      * @throws org.apache.commons.math3.linear.SingularMatrixException
302      * if the covariance matrix cannot be computed (singular problem).
303      * @deprecated As of 3.1. Please use {@link #computeCovariances(double[],double)}
304      * instead.
305      */
306     @Deprecated
307     public double[][] getCovariances(double threshold) {
308         return computeCovariances(point, threshold);
309     }
310 
311     /**
312      * Get the covariance matrix of the optimized parameters.
313      * <br/>
314      * Note that this operation involves the inversion of the
315      * <code>J<sup>T</sup>J</code> matrix, where {@code J} is the
316      * Jacobian matrix.
317      * The {@code threshold} parameter is a way for the caller to specify
318      * that the result of this computation should be considered meaningless,
319      * and thus trigger an exception.
320      *
321      * @param params Model parameters.
322      * @param threshold Singularity threshold.
323      * @return the covariance matrix.
324      * @throws org.apache.commons.math3.linear.SingularMatrixException
325      * if the covariance matrix cannot be computed (singular problem).
326      * @since 3.1
327      */
328     public double[][] computeCovariances(double[] params,
329                                          double threshold) {
330         // Set up the Jacobian.
331         final RealMatrix j = computeWeightedJacobian(params);
332 
333         // Compute transpose(J)J.
334         final RealMatrix jTj = j.transpose().multiply(j);
335 
336         // Compute the covariances matrix.
337         final DecompositionSolver solver
338             = new QRDecomposition(jTj, threshold).getSolver();
339         return solver.getInverse().getData();
340     }
341 
342     /**
343      * <p>
344      * Returns an estimate of the standard deviation of each parameter. The
345      * returned values are the so-called (asymptotic) standard errors on the
346      * parameters, defined as {@code sd(a[i]) = sqrt(S / (n - m) * C[i][i])},
347      * where {@code a[i]} is the optimized value of the {@code i}-th parameter,
348      * {@code S} is the minimized value of the sum of squares objective function
349      * (as returned by {@link #getChiSquare()}), {@code n} is the number of
350      * observations, {@code m} is the number of parameters and {@code C} is the
351      * covariance matrix.
352      * </p>
353      * <p>
354      * See also
355      * <a href="http://en.wikipedia.org/wiki/Least_squares">Wikipedia</a>,
356      * or
357      * <a href="http://mathworld.wolfram.com/LeastSquaresFitting.html">MathWorld</a>,
358      * equations (34) and (35) for a particular case.
359      * </p>
360      *
361      * @return an estimate of the standard deviation of the optimized parameters
362      * @throws org.apache.commons.math3.linear.SingularMatrixException
363      * if the covariance matrix cannot be computed.
364      * @throws NumberIsTooSmallException if the number of degrees of freedom is not
365      * positive, i.e. the number of measurements is less or equal to the number of
366      * parameters.
367      * @deprecated as of version 3.1, {@link #computeSigma(double[],double)} should be used
368      * instead. It should be emphasized that {@code guessParametersErrors} and
369      * {@code computeSigma} are <em>not</em> strictly equivalent.
370      */
371     @Deprecated
372     public double[] guessParametersErrors() {
373         if (rows <= cols) {
374             throw new NumberIsTooSmallException(LocalizedFormats.NO_DEGREES_OF_FREEDOM,
375                                                 rows, cols, false);
376         }
377         double[] errors = new double[cols];
378         final double c = FastMath.sqrt(getChiSquare() / (rows - cols));
379         double[][] covar = computeCovariances(point, 1e-14);
380         for (int i = 0; i < errors.length; ++i) {
381             errors[i] = FastMath.sqrt(covar[i][i]) * c;
382         }
383         return errors;
384     }
385 
386     /**
387      * Computes an estimate of the standard deviation of the parameters. The
388      * returned values are the square root of the diagonal coefficients of the
389      * covariance matrix, {@code sd(a[i]) ~= sqrt(C[i][i])}, where {@code a[i]}
390      * is the optimized value of the {@code i}-th parameter, and {@code C} is
391      * the covariance matrix.
392      *
393      * @param params Model parameters.
394      * @param covarianceSingularityThreshold Singularity threshold (see
395      * {@link #computeCovariances(double[],double) computeCovariances}).
396      * @return an estimate of the standard deviation of the optimized parameters
397      * @throws org.apache.commons.math3.linear.SingularMatrixException
398      * if the covariance matrix cannot be computed.
399      * @since 3.1
400      */
401     public double[] computeSigma(double[] params,
402                                  double covarianceSingularityThreshold) {
403         final int nC = params.length;
404         final double[] sig = new double[nC];
405         final double[][] cov = computeCovariances(params, covarianceSingularityThreshold);
406         for (int i = 0; i < nC; ++i) {
407             sig[i] = FastMath.sqrt(cov[i][i]);
408         }
409         return sig;
410     }
411 
412     /** {@inheritDoc}
413      * @deprecated As of 3.1. Please use
414      * {@link BaseAbstractMultivariateVectorOptimizer#optimize(int,
415      * org.apache.commons.math3.analysis.MultivariateVectorFunction,OptimizationData[])
416      * optimize(int,MultivariateDifferentiableVectorFunction,OptimizationData...)}
417      * instead.
418      */
419     @Override
420     @Deprecated
421     public PointVectorValuePair optimize(int maxEval,
422                                          final DifferentiableMultivariateVectorFunction f,
423                                          final double[] target, final double[] weights,
424                                          final double[] startPoint) {
425         return optimizeInternal(maxEval,
426                                 FunctionUtils.toMultivariateDifferentiableVectorFunction(f),
427                                 new Target(target),
428                                 new Weight(weights),
429                                 new InitialGuess(startPoint));
430     }
431 
432     /**
433      * Optimize an objective function.
434      * Optimization is considered to be a weighted least-squares minimization.
435      * The cost function to be minimized is
436      * <code>&sum;weight<sub>i</sub>(objective<sub>i</sub> - target<sub>i</sub>)<sup>2</sup></code>
437      *
438      * @param f Objective function.
439      * @param target Target value for the objective functions at optimum.
440      * @param weights Weights for the least squares cost computation.
441      * @param startPoint Start point for optimization.
442      * @return the point/value pair giving the optimal value for objective
443      * function.
444      * @param maxEval Maximum number of function evaluations.
445      * @throws org.apache.commons.math3.exception.DimensionMismatchException
446      * if the start point dimension is wrong.
447      * @throws org.apache.commons.math3.exception.TooManyEvaluationsException
448      * if the maximal number of evaluations is exceeded.
449      * @throws org.apache.commons.math3.exception.NullArgumentException if
450      * any argument is {@code null}.
451      * @deprecated As of 3.1. Please use
452      * {@link BaseAbstractMultivariateVectorOptimizer#optimize(int,
453      * org.apache.commons.math3.analysis.MultivariateVectorFunction,OptimizationData[])
454      * optimize(int,MultivariateDifferentiableVectorFunction,OptimizationData...)}
455      * instead.
456      */
457     @Deprecated
458     public PointVectorValuePair optimize(final int maxEval,
459                                          final MultivariateDifferentiableVectorFunction f,
460                                          final double[] target, final double[] weights,
461                                          final double[] startPoint) {
462         return optimizeInternal(maxEval, f,
463                                 new Target(target),
464                                 new Weight(weights),
465                                 new InitialGuess(startPoint));
466     }
467 
468     /**
469      * Optimize an objective function.
470      * Optimization is considered to be a weighted least-squares minimization.
471      * The cost function to be minimized is
472      * <code>&sum;weight<sub>i</sub>(objective<sub>i</sub> - target<sub>i</sub>)<sup>2</sup></code>
473      *
474      * @param maxEval Allowed number of evaluations of the objective function.
475      * @param f Objective function.
476      * @param optData Optimization data. The following data will be looked for:
477      * <ul>
478      *  <li>{@link Target}</li>
479      *  <li>{@link Weight}</li>
480      *  <li>{@link InitialGuess}</li>
481      * </ul>
482      * @return the point/value pair giving the optimal value of the objective
483      * function.
484      * @throws org.apache.commons.math3.exception.TooManyEvaluationsException if
485      * the maximal number of evaluations is exceeded.
486      * @throws DimensionMismatchException if the target, and weight arguments
487      * have inconsistent dimensions.
488      * @see BaseAbstractMultivariateVectorOptimizer#optimizeInternal(int,
489      * org.apache.commons.math3.analysis.MultivariateVectorFunction,OptimizationData[])
490      * @since 3.1
491      * @deprecated As of 3.1. Override is necessary only until this class's generic
492      * argument is changed to {@code MultivariateDifferentiableVectorFunction}.
493      */
494     @Deprecated
495     protected PointVectorValuePair optimizeInternal(final int maxEval,
496                                                     final MultivariateDifferentiableVectorFunction f,
497                                                     OptimizationData... optData) {
498         // XXX Conversion will be removed when the generic argument of the
499         // base class becomes "MultivariateDifferentiableVectorFunction".
500         return super.optimizeInternal(maxEval, FunctionUtils.toDifferentiableMultivariateVectorFunction(f), optData);
501     }
502 
503     /** {@inheritDoc} */
504     @Override
505     protected void setUp() {
506         super.setUp();
507 
508         // Reset counter.
509         jacobianEvaluations = 0;
510 
511         // Square-root of the weight matrix.
512         weightMatrixSqrt = squareRoot(getWeight());
513 
514         // Store least squares problem characteristics.
515         // XXX The conversion won't be necessary when the generic argument of
516         // the base class becomes "MultivariateDifferentiableVectorFunction".
517         // XXX "jF" is not strictly necessary anymore but is currently more
518         // efficient than converting the value returned from "getObjectiveFunction()"
519         // every time it is used.
520         jF = FunctionUtils.toMultivariateDifferentiableVectorFunction((DifferentiableMultivariateVectorFunction) getObjectiveFunction());
521 
522         // Arrays shared with "private" and "protected" methods.
523         point = getStartPoint();
524         rows = getTarget().length;
525         cols = point.length;
526     }
527 
528     /**
529      * Computes the residuals.
530      * The residual is the difference between the observed (target)
531      * values and the model (objective function) value.
532      * There is one residual for each element of the vector-valued
533      * function.
534      *
535      * @param objectiveValue Value of the the objective function. This is
536      * the value returned from a call to
537      * {@link #computeObjectiveValue(double[]) computeObjectiveValue}
538      * (whose array argument contains the model parameters).
539      * @return the residuals.
540      * @throws DimensionMismatchException if {@code params} has a wrong
541      * length.
542      * @since 3.1
543      */
544     protected double[] computeResiduals(double[] objectiveValue) {
545         final double[] target = getTarget();
546         if (objectiveValue.length != target.length) {
547             throw new DimensionMismatchException(target.length,
548                                                  objectiveValue.length);
549         }
550 
551         final double[] residuals = new double[target.length];
552         for (int i = 0; i < target.length; i++) {
553             residuals[i] = target[i] - objectiveValue[i];
554         }
555 
556         return residuals;
557     }
558 
559     /**
560      * Computes the square-root of the weight matrix.
561      *
562      * @param m Symmetric, positive-definite (weight) matrix.
563      * @return the square-root of the weight matrix.
564      */
565     private RealMatrix squareRoot(RealMatrix m) {
566         if (m instanceof DiagonalMatrix) {
567             final int dim = m.getRowDimension();
568             final RealMatrix sqrtM = new DiagonalMatrix(dim);
569             for (int i = 0; i < dim; i++) {
570                sqrtM.setEntry(i, i, FastMath.sqrt(m.getEntry(i, i)));
571             }
572             return sqrtM;
573         } else {
574             final EigenDecomposition dec = new EigenDecomposition(m);
575             return dec.getSquareRoot();
576         }
577     }
578 }