001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.math3.optimization.general;
019
020import org.apache.commons.math3.analysis.DifferentiableMultivariateVectorFunction;
021import org.apache.commons.math3.analysis.FunctionUtils;
022import org.apache.commons.math3.analysis.differentiation.DerivativeStructure;
023import org.apache.commons.math3.analysis.differentiation.MultivariateDifferentiableVectorFunction;
024import org.apache.commons.math3.exception.DimensionMismatchException;
025import org.apache.commons.math3.exception.NumberIsTooSmallException;
026import org.apache.commons.math3.exception.util.LocalizedFormats;
027import org.apache.commons.math3.linear.ArrayRealVector;
028import org.apache.commons.math3.linear.RealMatrix;
029import org.apache.commons.math3.linear.DiagonalMatrix;
030import org.apache.commons.math3.linear.DecompositionSolver;
031import org.apache.commons.math3.linear.MatrixUtils;
032import org.apache.commons.math3.linear.QRDecomposition;
033import org.apache.commons.math3.linear.EigenDecomposition;
034import org.apache.commons.math3.optimization.OptimizationData;
035import org.apache.commons.math3.optimization.InitialGuess;
036import org.apache.commons.math3.optimization.Target;
037import org.apache.commons.math3.optimization.Weight;
038import org.apache.commons.math3.optimization.ConvergenceChecker;
039import org.apache.commons.math3.optimization.DifferentiableMultivariateVectorOptimizer;
040import org.apache.commons.math3.optimization.PointVectorValuePair;
041import org.apache.commons.math3.optimization.direct.BaseAbstractMultivariateVectorOptimizer;
042import org.apache.commons.math3.util.FastMath;
043
044/**
045 * Base class for implementing least squares optimizers.
046 * It handles the boilerplate methods associated to thresholds settings,
047 * Jacobian and error estimation.
048 * <br/>
049 * This class constructs the Jacobian matrix of the function argument in method
050 * {@link BaseAbstractMultivariateVectorOptimizer#optimize(int,
051 * org.apache.commons.math3.analysis.MultivariateVectorFunction,OptimizationData[])
052 * optimize} and assumes that the rows of that matrix iterate on the model
053 * functions while the columns iterate on the parameters; thus, the numbers
054 * of rows is equal to the dimension of the
055 * {@link org.apache.commons.math3.optimization.Target Target} while
056 * the number of columns is equal to the dimension of the
057 * {@link org.apache.commons.math3.optimization.InitialGuess InitialGuess}.
058 *
059 * @deprecated As of 3.1 (to be removed in 4.0).
060 * @since 1.2
061 */
062@Deprecated
063public abstract class AbstractLeastSquaresOptimizer
064    extends BaseAbstractMultivariateVectorOptimizer<DifferentiableMultivariateVectorFunction>
065    implements DifferentiableMultivariateVectorOptimizer {
066    /**
067     * Singularity threshold (cf. {@link #getCovariances(double)}).
068     * @deprecated As of 3.1.
069     */
070    @Deprecated
071    private static final double DEFAULT_SINGULARITY_THRESHOLD = 1e-14;
072    /**
073     * Jacobian matrix of the weighted residuals.
074     * This matrix is in canonical form just after the calls to
075     * {@link #updateJacobian()}, but may be modified by the solver
076     * in the derived class (the {@link LevenbergMarquardtOptimizer
077     * Levenberg-Marquardt optimizer} does this).
078     * @deprecated As of 3.1. To be removed in 4.0. Please use
079     * {@link #computeWeightedJacobian(double[])} instead.
080     */
081    @Deprecated
082    protected double[][] weightedResidualJacobian;
083    /** Number of columns of the jacobian matrix.
084     * @deprecated As of 3.1.
085     */
086    @Deprecated
087    protected int cols;
088    /** Number of rows of the jacobian matrix.
089     * @deprecated As of 3.1.
090     */
091    @Deprecated
092    protected int rows;
093    /** Current point.
094     * @deprecated As of 3.1.
095     */
096    @Deprecated
097    protected double[] point;
098    /** Current objective function value.
099     * @deprecated As of 3.1.
100     */
101    @Deprecated
102    protected double[] objective;
103    /** Weighted residuals
104     * @deprecated As of 3.1.
105     */
106    @Deprecated
107    protected double[] weightedResiduals;
108    /** Cost value (square root of the sum of the residuals).
109     * @deprecated As of 3.1. Field to become "private" in 4.0.
110     * Please use {@link #setCost(double)}.
111     */
112    @Deprecated
113    protected double cost;
114    /** Objective function derivatives. */
115    private MultivariateDifferentiableVectorFunction jF;
116    /** Number of evaluations of the Jacobian. */
117    private int jacobianEvaluations;
118    /** Square-root of the weight matrix. */
119    private RealMatrix weightMatrixSqrt;
120
121    /**
122     * Simple constructor with default settings.
123     * The convergence check is set to a {@link
124     * org.apache.commons.math3.optimization.SimpleVectorValueChecker}.
125     * @deprecated See {@link org.apache.commons.math3.optimization.SimpleValueChecker#SimpleValueChecker()}
126     */
127    @Deprecated
128    protected AbstractLeastSquaresOptimizer() {}
129
130    /**
131     * @param checker Convergence checker.
132     */
133    protected AbstractLeastSquaresOptimizer(ConvergenceChecker<PointVectorValuePair> checker) {
134        super(checker);
135    }
136
137    /**
138     * @return the number of evaluations of the Jacobian function.
139     */
140    public int getJacobianEvaluations() {
141        return jacobianEvaluations;
142    }
143
144    /**
145     * Update the jacobian matrix.
146     *
147     * @throws DimensionMismatchException if the Jacobian dimension does not
148     * match problem dimension.
149     * @deprecated As of 3.1. Please use {@link #computeWeightedJacobian(double[])}
150     * instead.
151     */
152    @Deprecated
153    protected void updateJacobian() {
154        final RealMatrix weightedJacobian = computeWeightedJacobian(point);
155        weightedResidualJacobian = weightedJacobian.scalarMultiply(-1).getData();
156    }
157
158    /**
159     * Computes the Jacobian matrix.
160     *
161     * @param params Model parameters at which to compute the Jacobian.
162     * @return the weighted Jacobian: W<sup>1/2</sup> J.
163     * @throws DimensionMismatchException if the Jacobian dimension does not
164     * match problem dimension.
165     * @since 3.1
166     */
167    protected RealMatrix computeWeightedJacobian(double[] params) {
168        ++jacobianEvaluations;
169
170        final DerivativeStructure[] dsPoint = new DerivativeStructure[params.length];
171        final int nC = params.length;
172        for (int i = 0; i < nC; ++i) {
173            dsPoint[i] = new DerivativeStructure(nC, 1, i, params[i]);
174        }
175        final DerivativeStructure[] dsValue = jF.value(dsPoint);
176        final int nR = getTarget().length;
177        if (dsValue.length != nR) {
178            throw new DimensionMismatchException(dsValue.length, nR);
179        }
180        final double[][] jacobianData = new double[nR][nC];
181        for (int i = 0; i < nR; ++i) {
182            int[] orders = new int[nC];
183            for (int j = 0; j < nC; ++j) {
184                orders[j] = 1;
185                jacobianData[i][j] = dsValue[i].getPartialDerivative(orders);
186                orders[j] = 0;
187            }
188        }
189
190        return weightMatrixSqrt.multiply(MatrixUtils.createRealMatrix(jacobianData));
191    }
192
193    /**
194     * Update the residuals array and cost function value.
195     * @throws DimensionMismatchException if the dimension does not match the
196     * problem dimension.
197     * @throws org.apache.commons.math3.exception.TooManyEvaluationsException
198     * if the maximal number of evaluations is exceeded.
199     * @deprecated As of 3.1. Please use {@link #computeResiduals(double[])},
200     * {@link #computeObjectiveValue(double[])}, {@link #computeCost(double[])}
201     * and {@link #setCost(double)} instead.
202     */
203    @Deprecated
204    protected void updateResidualsAndCost() {
205        objective = computeObjectiveValue(point);
206        final double[] res = computeResiduals(objective);
207
208        // Compute cost.
209        cost = computeCost(res);
210
211        // Compute weighted residuals.
212        final ArrayRealVector residuals = new ArrayRealVector(res);
213        weightedResiduals = weightMatrixSqrt.operate(residuals).toArray();
214    }
215
216    /**
217     * Computes the cost.
218     *
219     * @param residuals Residuals.
220     * @return the cost.
221     * @see #computeResiduals(double[])
222     * @since 3.1
223     */
224    protected double computeCost(double[] residuals) {
225        final ArrayRealVector r = new ArrayRealVector(residuals);
226        return FastMath.sqrt(r.dotProduct(getWeight().operate(r)));
227    }
228
229    /**
230     * Get the Root Mean Square value.
231     * Get the Root Mean Square value, i.e. the root of the arithmetic
232     * mean of the square of all weighted residuals. This is related to the
233     * criterion that is minimized by the optimizer as follows: if
234     * <em>c</em> if the criterion, and <em>n</em> is the number of
235     * measurements, then the RMS is <em>sqrt (c/n)</em>.
236     *
237     * @return RMS value
238     */
239    public double getRMS() {
240        return FastMath.sqrt(getChiSquare() / rows);
241    }
242
243    /**
244     * Get a Chi-Square-like value assuming the N residuals follow N
245     * distinct normal distributions centered on 0 and whose variances are
246     * the reciprocal of the weights.
247     * @return chi-square value
248     */
249    public double getChiSquare() {
250        return cost * cost;
251    }
252
253    /**
254     * Gets the square-root of the weight matrix.
255     *
256     * @return the square-root of the weight matrix.
257     * @since 3.1
258     */
259    public RealMatrix getWeightSquareRoot() {
260        return weightMatrixSqrt.copy();
261    }
262
263    /**
264     * Sets the cost.
265     *
266     * @param cost Cost value.
267     * @since 3.1
268     */
269    protected void setCost(double cost) {
270        this.cost = cost;
271    }
272
273    /**
274     * Get the covariance matrix of the optimized parameters.
275     *
276     * @return the covariance matrix.
277     * @throws org.apache.commons.math3.linear.SingularMatrixException
278     * if the covariance matrix cannot be computed (singular problem).
279     * @see #getCovariances(double)
280     * @deprecated As of 3.1. Please use {@link #computeCovariances(double[],double)}
281     * instead.
282     */
283    @Deprecated
284    public double[][] getCovariances() {
285        return getCovariances(DEFAULT_SINGULARITY_THRESHOLD);
286    }
287
288    /**
289     * Get the covariance matrix of the optimized parameters.
290     * <br/>
291     * Note that this operation involves the inversion of the
292     * <code>J<sup>T</sup>J</code> matrix, where {@code J} is the
293     * Jacobian matrix.
294     * The {@code threshold} parameter is a way for the caller to specify
295     * that the result of this computation should be considered meaningless,
296     * and thus trigger an exception.
297     *
298     * @param threshold Singularity threshold.
299     * @return the covariance matrix.
300     * @throws org.apache.commons.math3.linear.SingularMatrixException
301     * if the covariance matrix cannot be computed (singular problem).
302     * @deprecated As of 3.1. Please use {@link #computeCovariances(double[],double)}
303     * instead.
304     */
305    @Deprecated
306    public double[][] getCovariances(double threshold) {
307        return computeCovariances(point, threshold);
308    }
309
310    /**
311     * Get the covariance matrix of the optimized parameters.
312     * <br/>
313     * Note that this operation involves the inversion of the
314     * <code>J<sup>T</sup>J</code> matrix, where {@code J} is the
315     * Jacobian matrix.
316     * The {@code threshold} parameter is a way for the caller to specify
317     * that the result of this computation should be considered meaningless,
318     * and thus trigger an exception.
319     *
320     * @param params Model parameters.
321     * @param threshold Singularity threshold.
322     * @return the covariance matrix.
323     * @throws org.apache.commons.math3.linear.SingularMatrixException
324     * if the covariance matrix cannot be computed (singular problem).
325     * @since 3.1
326     */
327    public double[][] computeCovariances(double[] params,
328                                         double threshold) {
329        // Set up the Jacobian.
330        final RealMatrix j = computeWeightedJacobian(params);
331
332        // Compute transpose(J)J.
333        final RealMatrix jTj = j.transpose().multiply(j);
334
335        // Compute the covariances matrix.
336        final DecompositionSolver solver
337            = new QRDecomposition(jTj, threshold).getSolver();
338        return solver.getInverse().getData();
339    }
340
341    /**
342     * <p>
343     * Returns an estimate of the standard deviation of each parameter. The
344     * returned values are the so-called (asymptotic) standard errors on the
345     * parameters, defined as {@code sd(a[i]) = sqrt(S / (n - m) * C[i][i])},
346     * where {@code a[i]} is the optimized value of the {@code i}-th parameter,
347     * {@code S} is the minimized value of the sum of squares objective function
348     * (as returned by {@link #getChiSquare()}), {@code n} is the number of
349     * observations, {@code m} is the number of parameters and {@code C} is the
350     * covariance matrix.
351     * </p>
352     * <p>
353     * See also
354     * <a href="http://en.wikipedia.org/wiki/Least_squares">Wikipedia</a>,
355     * or
356     * <a href="http://mathworld.wolfram.com/LeastSquaresFitting.html">MathWorld</a>,
357     * equations (34) and (35) for a particular case.
358     * </p>
359     *
360     * @return an estimate of the standard deviation of the optimized parameters
361     * @throws org.apache.commons.math3.linear.SingularMatrixException
362     * if the covariance matrix cannot be computed.
363     * @throws NumberIsTooSmallException if the number of degrees of freedom is not
364     * positive, i.e. the number of measurements is less or equal to the number of
365     * parameters.
366     * @deprecated as of version 3.1, {@link #computeSigma(double[],double)} should be used
367     * instead. It should be emphasized that {@code guessParametersErrors} and
368     * {@code computeSigma} are <em>not</em> strictly equivalent.
369     */
370    @Deprecated
371    public double[] guessParametersErrors() {
372        if (rows <= cols) {
373            throw new NumberIsTooSmallException(LocalizedFormats.NO_DEGREES_OF_FREEDOM,
374                                                rows, cols, false);
375        }
376        double[] errors = new double[cols];
377        final double c = FastMath.sqrt(getChiSquare() / (rows - cols));
378        double[][] covar = computeCovariances(point, 1e-14);
379        for (int i = 0; i < errors.length; ++i) {
380            errors[i] = FastMath.sqrt(covar[i][i]) * c;
381        }
382        return errors;
383    }
384
385    /**
386     * Computes an estimate of the standard deviation of the parameters. The
387     * returned values are the square root of the diagonal coefficients of the
388     * covariance matrix, {@code sd(a[i]) ~= sqrt(C[i][i])}, where {@code a[i]}
389     * is the optimized value of the {@code i}-th parameter, and {@code C} is
390     * the covariance matrix.
391     *
392     * @param params Model parameters.
393     * @param covarianceSingularityThreshold Singularity threshold (see
394     * {@link #computeCovariances(double[],double) computeCovariances}).
395     * @return an estimate of the standard deviation of the optimized parameters
396     * @throws org.apache.commons.math3.linear.SingularMatrixException
397     * if the covariance matrix cannot be computed.
398     * @since 3.1
399     */
400    public double[] computeSigma(double[] params,
401                                 double covarianceSingularityThreshold) {
402        final int nC = params.length;
403        final double[] sig = new double[nC];
404        final double[][] cov = computeCovariances(params, covarianceSingularityThreshold);
405        for (int i = 0; i < nC; ++i) {
406            sig[i] = FastMath.sqrt(cov[i][i]);
407        }
408        return sig;
409    }
410
411    /** {@inheritDoc}
412     * @deprecated As of 3.1. Please use
413     * {@link BaseAbstractMultivariateVectorOptimizer#optimize(int,
414     * org.apache.commons.math3.analysis.MultivariateVectorFunction,OptimizationData[])
415     * optimize(int,MultivariateDifferentiableVectorFunction,OptimizationData...)}
416     * instead.
417     */
418    @Override
419    @Deprecated
420    public PointVectorValuePair optimize(int maxEval,
421                                         final DifferentiableMultivariateVectorFunction f,
422                                         final double[] target, final double[] weights,
423                                         final double[] startPoint) {
424        return optimizeInternal(maxEval,
425                                FunctionUtils.toMultivariateDifferentiableVectorFunction(f),
426                                new Target(target),
427                                new Weight(weights),
428                                new InitialGuess(startPoint));
429    }
430
431    /**
432     * Optimize an objective function.
433     * Optimization is considered to be a weighted least-squares minimization.
434     * The cost function to be minimized is
435     * <code>&sum;weight<sub>i</sub>(objective<sub>i</sub> - target<sub>i</sub>)<sup>2</sup></code>
436     *
437     * @param f Objective function.
438     * @param target Target value for the objective functions at optimum.
439     * @param weights Weights for the least squares cost computation.
440     * @param startPoint Start point for optimization.
441     * @return the point/value pair giving the optimal value for objective
442     * function.
443     * @param maxEval Maximum number of function evaluations.
444     * @throws org.apache.commons.math3.exception.DimensionMismatchException
445     * if the start point dimension is wrong.
446     * @throws org.apache.commons.math3.exception.TooManyEvaluationsException
447     * if the maximal number of evaluations is exceeded.
448     * @throws org.apache.commons.math3.exception.NullArgumentException if
449     * any argument is {@code null}.
450     * @deprecated As of 3.1. Please use
451     * {@link BaseAbstractMultivariateVectorOptimizer#optimize(int,
452     * org.apache.commons.math3.analysis.MultivariateVectorFunction,OptimizationData[])
453     * optimize(int,MultivariateDifferentiableVectorFunction,OptimizationData...)}
454     * instead.
455     */
456    @Deprecated
457    public PointVectorValuePair optimize(final int maxEval,
458                                         final MultivariateDifferentiableVectorFunction f,
459                                         final double[] target, final double[] weights,
460                                         final double[] startPoint) {
461        return optimizeInternal(maxEval, f,
462                                new Target(target),
463                                new Weight(weights),
464                                new InitialGuess(startPoint));
465    }
466
467    /**
468     * Optimize an objective function.
469     * Optimization is considered to be a weighted least-squares minimization.
470     * The cost function to be minimized is
471     * <code>&sum;weight<sub>i</sub>(objective<sub>i</sub> - target<sub>i</sub>)<sup>2</sup></code>
472     *
473     * @param maxEval Allowed number of evaluations of the objective function.
474     * @param f Objective function.
475     * @param optData Optimization data. The following data will be looked for:
476     * <ul>
477     *  <li>{@link Target}</li>
478     *  <li>{@link Weight}</li>
479     *  <li>{@link InitialGuess}</li>
480     * </ul>
481     * @return the point/value pair giving the optimal value of the objective
482     * function.
483     * @throws org.apache.commons.math3.exception.TooManyEvaluationsException if
484     * the maximal number of evaluations is exceeded.
485     * @throws DimensionMismatchException if the target, and weight arguments
486     * have inconsistent dimensions.
487     * @see BaseAbstractMultivariateVectorOptimizer#optimizeInternal(int,
488     * org.apache.commons.math3.analysis.MultivariateVectorFunction,OptimizationData[])
489     * @since 3.1
490     * @deprecated As of 3.1. Override is necessary only until this class's generic
491     * argument is changed to {@code MultivariateDifferentiableVectorFunction}.
492     */
493    @Deprecated
494    protected PointVectorValuePair optimizeInternal(final int maxEval,
495                                                    final MultivariateDifferentiableVectorFunction f,
496                                                    OptimizationData... optData) {
497        // XXX Conversion will be removed when the generic argument of the
498        // base class becomes "MultivariateDifferentiableVectorFunction".
499        return super.optimizeInternal(maxEval, FunctionUtils.toDifferentiableMultivariateVectorFunction(f), optData);
500    }
501
502    /** {@inheritDoc} */
503    @Override
504    protected void setUp() {
505        super.setUp();
506
507        // Reset counter.
508        jacobianEvaluations = 0;
509
510        // Square-root of the weight matrix.
511        weightMatrixSqrt = squareRoot(getWeight());
512
513        // Store least squares problem characteristics.
514        // XXX The conversion won't be necessary when the generic argument of
515        // the base class becomes "MultivariateDifferentiableVectorFunction".
516        // XXX "jF" is not strictly necessary anymore but is currently more
517        // efficient than converting the value returned from "getObjectiveFunction()"
518        // every time it is used.
519        jF = FunctionUtils.toMultivariateDifferentiableVectorFunction((DifferentiableMultivariateVectorFunction) getObjectiveFunction());
520
521        // Arrays shared with "private" and "protected" methods.
522        point = getStartPoint();
523        rows = getTarget().length;
524        cols = point.length;
525    }
526
527    /**
528     * Computes the residuals.
529     * The residual is the difference between the observed (target)
530     * values and the model (objective function) value.
531     * There is one residual for each element of the vector-valued
532     * function.
533     *
534     * @param objectiveValue Value of the the objective function. This is
535     * the value returned from a call to
536     * {@link #computeObjectiveValue(double[]) computeObjectiveValue}
537     * (whose array argument contains the model parameters).
538     * @return the residuals.
539     * @throws DimensionMismatchException if {@code params} has a wrong
540     * length.
541     * @since 3.1
542     */
543    protected double[] computeResiduals(double[] objectiveValue) {
544        final double[] target = getTarget();
545        if (objectiveValue.length != target.length) {
546            throw new DimensionMismatchException(target.length,
547                                                 objectiveValue.length);
548        }
549
550        final double[] residuals = new double[target.length];
551        for (int i = 0; i < target.length; i++) {
552            residuals[i] = target[i] - objectiveValue[i];
553        }
554
555        return residuals;
556    }
557
558    /**
559     * Computes the square-root of the weight matrix.
560     *
561     * @param m Symmetric, positive-definite (weight) matrix.
562     * @return the square-root of the weight matrix.
563     */
564    private RealMatrix squareRoot(RealMatrix m) {
565        if (m instanceof DiagonalMatrix) {
566            final int dim = m.getRowDimension();
567            final RealMatrix sqrtM = new DiagonalMatrix(dim);
568            for (int i = 0; i < dim; i++) {
569               sqrtM.setEntry(i, i, FastMath.sqrt(m.getEntry(i, i)));
570            }
571            return sqrtM;
572        } else {
573            final EigenDecomposition dec = new EigenDecomposition(m);
574            return dec.getSquareRoot();
575        }
576    }
577}