001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    
018    package org.apache.commons.math3.optimization.general;
019    
020    import org.apache.commons.math3.analysis.DifferentiableMultivariateVectorFunction;
021    import org.apache.commons.math3.analysis.FunctionUtils;
022    import org.apache.commons.math3.analysis.differentiation.DerivativeStructure;
023    import org.apache.commons.math3.analysis.differentiation.MultivariateDifferentiableVectorFunction;
024    import org.apache.commons.math3.exception.DimensionMismatchException;
025    import org.apache.commons.math3.exception.NumberIsTooSmallException;
026    import org.apache.commons.math3.exception.util.LocalizedFormats;
027    import org.apache.commons.math3.linear.ArrayRealVector;
028    import org.apache.commons.math3.linear.RealMatrix;
029    import org.apache.commons.math3.linear.DiagonalMatrix;
030    import org.apache.commons.math3.linear.DecompositionSolver;
031    import org.apache.commons.math3.linear.MatrixUtils;
032    import org.apache.commons.math3.linear.QRDecomposition;
033    import org.apache.commons.math3.linear.EigenDecomposition;
034    import org.apache.commons.math3.optimization.OptimizationData;
035    import org.apache.commons.math3.optimization.InitialGuess;
036    import org.apache.commons.math3.optimization.Target;
037    import org.apache.commons.math3.optimization.Weight;
038    import org.apache.commons.math3.optimization.ConvergenceChecker;
039    import org.apache.commons.math3.optimization.DifferentiableMultivariateVectorOptimizer;
040    import org.apache.commons.math3.optimization.PointVectorValuePair;
041    import org.apache.commons.math3.optimization.direct.BaseAbstractMultivariateVectorOptimizer;
042    import org.apache.commons.math3.util.FastMath;
043    
044    /**
045     * Base class for implementing least squares optimizers.
046     * It handles the boilerplate methods associated to thresholds settings,
047     * Jacobian and error estimation.
048     * <br/>
049     * This class constructs the Jacobian matrix of the function argument in method
050     * {@link BaseAbstractMultivariateVectorOptimizer#optimize(int,MultivariateVectorFunction,OptimizationData[])
051     * optimize} and assumes that the rows of that matrix iterate on the model
052     * functions while the columns iterate on the parameters; thus, the numbers
053     * of rows is equal to the dimension of the
054     * {@link org.apache.commons.math3.optimization.Target Target} while
055     * the number of columns is equal to the dimension of the
056     * {@link org.apache.commons.math3.optimization.InitialGuess InitialGuess}.
057     *
058     * @version $Id: AbstractLeastSquaresOptimizer.java 1426759 2012-12-29 13:26:44Z erans $
059     * @deprecated As of 3.1 (to be removed in 4.0).
060     * @since 1.2
061     */
062    @Deprecated
063    public abstract class AbstractLeastSquaresOptimizer
064        extends BaseAbstractMultivariateVectorOptimizer<DifferentiableMultivariateVectorFunction>
065        implements DifferentiableMultivariateVectorOptimizer {
066        /**
067         * Singularity threshold (cf. {@link #getCovariances(double)}).
068         * @deprecated As of 3.1.
069         */
070        @Deprecated
071        private static final double DEFAULT_SINGULARITY_THRESHOLD = 1e-14;
072        /**
073         * Jacobian matrix of the weighted residuals.
074         * This matrix is in canonical form just after the calls to
075         * {@link #updateJacobian()}, but may be modified by the solver
076         * in the derived class (the {@link LevenbergMarquardtOptimizer
077         * Levenberg-Marquardt optimizer} does this).
078         * @deprecated As of 3.1. To be removed in 4.0. Please use
079         * {@link #computeWeightedJacobian(double[])} instead.
080         */
081        @Deprecated
082        protected double[][] weightedResidualJacobian;
083        /** Number of columns of the jacobian matrix.
084         * @deprecated As of 3.1.
085         */
086        @Deprecated
087        protected int cols;
088        /** Number of rows of the jacobian matrix.
089         * @deprecated As of 3.1.
090         */
091        @Deprecated
092        protected int rows;
093        /** Current point.
094         * @deprecated As of 3.1.
095         */
096        @Deprecated
097        protected double[] point;
098        /** Current objective function value.
099         * @deprecated As of 3.1.
100         */
101        @Deprecated
102        protected double[] objective;
103        /** Weighted residuals
104         * @deprecated As of 3.1.
105         */
106        @Deprecated
107        protected double[] weightedResiduals;
108        /** Cost value (square root of the sum of the residuals).
109         * @deprecated As of 3.1. Field to become "private" in 4.0.
110         * Please use {@link #setCost(double)}.
111         */
112        @Deprecated
113        protected double cost;
114        /** Objective function derivatives. */
115        private MultivariateDifferentiableVectorFunction jF;
116        /** Number of evaluations of the Jacobian. */
117        private int jacobianEvaluations;
118        /** Square-root of the weight matrix. */
119        private RealMatrix weightMatrixSqrt;
120    
121        /**
122         * Simple constructor with default settings.
123         * The convergence check is set to a {@link
124         * org.apache.commons.math3.optimization.SimpleVectorValueChecker}.
125         * @deprecated See {@link org.apache.commons.math3.optimization.SimpleValueChecker#SimpleValueChecker()}
126         */
127        @Deprecated
128        protected AbstractLeastSquaresOptimizer() {}
129    
130        /**
131         * @param checker Convergence checker.
132         */
133        protected AbstractLeastSquaresOptimizer(ConvergenceChecker<PointVectorValuePair> checker) {
134            super(checker);
135        }
136    
137        /**
138         * @return the number of evaluations of the Jacobian function.
139         */
140        public int getJacobianEvaluations() {
141            return jacobianEvaluations;
142        }
143    
144        /**
145         * Update the jacobian matrix.
146         *
147         * @throws DimensionMismatchException if the Jacobian dimension does not
148         * match problem dimension.
149         * @deprecated As of 3.1. Please use {@link #computeWeightedJacobian(double[])}
150         * instead.
151         */
152        @Deprecated
153        protected void updateJacobian() {
154            final RealMatrix weightedJacobian = computeWeightedJacobian(point);
155            weightedResidualJacobian = weightedJacobian.scalarMultiply(-1).getData();
156        }
157    
158        /**
159         * Computes the Jacobian matrix.
160         *
161         * @param params Model parameters at which to compute the Jacobian.
162         * @return the weighted Jacobian: W<sup>1/2</sup> J.
163         * @throws DimensionMismatchException if the Jacobian dimension does not
164         * match problem dimension.
165         * @since 3.1
166         */
167        protected RealMatrix computeWeightedJacobian(double[] params) {
168            ++jacobianEvaluations;
169    
170            final DerivativeStructure[] dsPoint = new DerivativeStructure[params.length];
171            final int nC = params.length;
172            for (int i = 0; i < nC; ++i) {
173                dsPoint[i] = new DerivativeStructure(nC, 1, i, params[i]);
174            }
175            final DerivativeStructure[] dsValue = jF.value(dsPoint);
176            final int nR = getTarget().length;
177            if (dsValue.length != nR) {
178                throw new DimensionMismatchException(dsValue.length, nR);
179            }
180            final double[][] jacobianData = new double[nR][nC];
181            for (int i = 0; i < nR; ++i) {
182                int[] orders = new int[nC];
183                for (int j = 0; j < nC; ++j) {
184                    orders[j] = 1;
185                    jacobianData[i][j] = dsValue[i].getPartialDerivative(orders);
186                    orders[j] = 0;
187                }
188            }
189    
190            return weightMatrixSqrt.multiply(MatrixUtils.createRealMatrix(jacobianData));
191        }
192    
193        /**
194         * Update the residuals array and cost function value.
195         * @throws DimensionMismatchException if the dimension does not match the
196         * problem dimension.
197         * @throws org.apache.commons.math3.exception.TooManyEvaluationsException
198         * if the maximal number of evaluations is exceeded.
199         * @deprecated As of 3.1. Please use {@link #computeResiduals(double[])},
200         * {@link #computeObjectiveValue(double[])}, {@link #computeCost(double[])}
201         * and {@link #setCost(double)} instead.
202         */
203        @Deprecated
204        protected void updateResidualsAndCost() {
205            objective = computeObjectiveValue(point);
206            final double[] res = computeResiduals(objective);
207    
208            // Compute cost.
209            cost = computeCost(res);
210    
211            // Compute weighted residuals.
212            final ArrayRealVector residuals = new ArrayRealVector(res);
213            weightedResiduals = weightMatrixSqrt.operate(residuals).toArray();
214        }
215    
216        /**
217         * Computes the cost.
218         *
219         * @param residuals Residuals.
220         * @return the cost.
221         * @see #computeResiduals(double[])
222         * @since 3.1
223         */
224        protected double computeCost(double[] residuals) {
225            final ArrayRealVector r = new ArrayRealVector(residuals);
226            return FastMath.sqrt(r.dotProduct(getWeight().operate(r)));
227        }
228    
229        /**
230         * Get the Root Mean Square value.
231         * Get the Root Mean Square value, i.e. the root of the arithmetic
232         * mean of the square of all weighted residuals. This is related to the
233         * criterion that is minimized by the optimizer as follows: if
234         * <em>c</em> if the criterion, and <em>n</em> is the number of
235         * measurements, then the RMS is <em>sqrt (c/n)</em>.
236         *
237         * @return RMS value
238         */
239        public double getRMS() {
240            return FastMath.sqrt(getChiSquare() / rows);
241        }
242    
243        /**
244         * Get a Chi-Square-like value assuming the N residuals follow N
245         * distinct normal distributions centered on 0 and whose variances are
246         * the reciprocal of the weights.
247         * @return chi-square value
248         */
249        public double getChiSquare() {
250            return cost * cost;
251        }
252    
253        /**
254         * Gets the square-root of the weight matrix.
255         *
256         * @return the square-root of the weight matrix.
257         * @since 3.1
258         */
259        public RealMatrix getWeightSquareRoot() {
260            return weightMatrixSqrt.copy();
261        }
262    
263        /**
264         * Sets the cost.
265         *
266         * @param cost Cost value.
267         * @since 3.1
268         */
269        protected void setCost(double cost) {
270            this.cost = cost;
271        }
272    
273        /**
274         * Get the covariance matrix of the optimized parameters.
275         *
276         * @return the covariance matrix.
277         * @throws org.apache.commons.math3.linear.SingularMatrixException
278         * if the covariance matrix cannot be computed (singular problem).
279         * @see #getCovariances(double)
280         * @deprecated As of 3.1. Please use {@link #computeCovariances(double[],double)}
281         * instead.
282         */
283        @Deprecated
284        public double[][] getCovariances() {
285            return getCovariances(DEFAULT_SINGULARITY_THRESHOLD);
286        }
287    
288        /**
289         * Get the covariance matrix of the optimized parameters.
290         * <br/>
291         * Note that this operation involves the inversion of the
292         * <code>J<sup>T</sup>J</code> matrix, where {@code J} is the
293         * Jacobian matrix.
294         * The {@code threshold} parameter is a way for the caller to specify
295         * that the result of this computation should be considered meaningless,
296         * and thus trigger an exception.
297         *
298         * @param threshold Singularity threshold.
299         * @return the covariance matrix.
300         * @throws org.apache.commons.math3.linear.SingularMatrixException
301         * if the covariance matrix cannot be computed (singular problem).
302         * @deprecated As of 3.1. Please use {@link #computeCovariances(double[],double)}
303         * instead.
304         */
305        @Deprecated
306        public double[][] getCovariances(double threshold) {
307            return computeCovariances(point, threshold);
308        }
309    
310        /**
311         * Get the covariance matrix of the optimized parameters.
312         * <br/>
313         * Note that this operation involves the inversion of the
314         * <code>J<sup>T</sup>J</code> matrix, where {@code J} is the
315         * Jacobian matrix.
316         * The {@code threshold} parameter is a way for the caller to specify
317         * that the result of this computation should be considered meaningless,
318         * and thus trigger an exception.
319         *
320         * @param params Model parameters.
321         * @param threshold Singularity threshold.
322         * @return the covariance matrix.
323         * @throws org.apache.commons.math3.linear.SingularMatrixException
324         * if the covariance matrix cannot be computed (singular problem).
325         * @since 3.1
326         */
327        public double[][] computeCovariances(double[] params,
328                                             double threshold) {
329            // Set up the Jacobian.
330            final RealMatrix j = computeWeightedJacobian(params);
331    
332            // Compute transpose(J)J.
333            final RealMatrix jTj = j.transpose().multiply(j);
334    
335            // Compute the covariances matrix.
336            final DecompositionSolver solver
337                = new QRDecomposition(jTj, threshold).getSolver();
338            return solver.getInverse().getData();
339        }
340    
341        /**
342         * <p>
343         * Returns an estimate of the standard deviation of each parameter. The
344         * returned values are the so-called (asymptotic) standard errors on the
345         * parameters, defined as {@code sd(a[i]) = sqrt(S / (n - m) * C[i][i])},
346         * where {@code a[i]} is the optimized value of the {@code i}-th parameter,
347         * {@code S} is the minimized value of the sum of squares objective function
348         * (as returned by {@link #getChiSquare()}), {@code n} is the number of
349         * observations, {@code m} is the number of parameters and {@code C} is the
350         * covariance matrix.
351         * </p>
352         * <p>
353         * See also
354         * <a href="http://en.wikipedia.org/wiki/Least_squares">Wikipedia</a>,
355         * or
356         * <a href="http://mathworld.wolfram.com/LeastSquaresFitting.html">MathWorld</a>,
357         * equations (34) and (35) for a particular case.
358         * </p>
359         *
360         * @return an estimate of the standard deviation of the optimized parameters
361         * @throws org.apache.commons.math3.linear.SingularMatrixException
362         * if the covariance matrix cannot be computed.
363         * @throws NumberIsTooSmallException if the number of degrees of freedom is not
364         * positive, i.e. the number of measurements is less or equal to the number of
365         * parameters.
366         * @deprecated as of version 3.1, {@link #computeSigma(double[],double)} should be used
367         * instead. It should be emphasized that {@code guessParametersErrors} and
368         * {@code computeSigma} are <em>not</em> strictly equivalent.
369         */
370        @Deprecated
371        public double[] guessParametersErrors() {
372            if (rows <= cols) {
373                throw new NumberIsTooSmallException(LocalizedFormats.NO_DEGREES_OF_FREEDOM,
374                                                    rows, cols, false);
375            }
376            double[] errors = new double[cols];
377            final double c = FastMath.sqrt(getChiSquare() / (rows - cols));
378            double[][] covar = computeCovariances(point, 1e-14);
379            for (int i = 0; i < errors.length; ++i) {
380                errors[i] = FastMath.sqrt(covar[i][i]) * c;
381            }
382            return errors;
383        }
384    
385        /**
386         * Computes an estimate of the standard deviation of the parameters. The
387         * returned values are the square root of the diagonal coefficients of the
388         * covariance matrix, {@code sd(a[i]) ~= sqrt(C[i][i])}, where {@code a[i]}
389         * is the optimized value of the {@code i}-th parameter, and {@code C} is
390         * the covariance matrix.
391         *
392         * @param params Model parameters.
393         * @param covarianceSingularityThreshold Singularity threshold (see
394         * {@link #computeCovariances(double[],double) computeCovariances}).
395         * @return an estimate of the standard deviation of the optimized parameters
396         * @throws org.apache.commons.math3.linear.SingularMatrixException
397         * if the covariance matrix cannot be computed.
398         * @since 3.1
399         */
400        public double[] computeSigma(double[] params,
401                                     double covarianceSingularityThreshold) {
402            final int nC = params.length;
403            final double[] sig = new double[nC];
404            final double[][] cov = computeCovariances(params, covarianceSingularityThreshold);
405            for (int i = 0; i < nC; ++i) {
406                sig[i] = FastMath.sqrt(cov[i][i]);
407            }
408            return sig;
409        }
410    
411        /** {@inheritDoc}
412         * @deprecated As of 3.1. Please use
413         * {@link BaseAbstractMultivariateVectorOptimizer#optimize(int,MultivariateVectorFunction,OptimizationData[])
414         * optimize(int,MultivariateDifferentiableVectorFunction,OptimizationData...)}
415         * instead.
416         */
417        @Override
418        @Deprecated
419        public PointVectorValuePair optimize(int maxEval,
420                                             final DifferentiableMultivariateVectorFunction f,
421                                             final double[] target, final double[] weights,
422                                             final double[] startPoint) {
423            return optimizeInternal(maxEval,
424                                    FunctionUtils.toMultivariateDifferentiableVectorFunction(f),
425                                    new Target(target),
426                                    new Weight(weights),
427                                    new InitialGuess(startPoint));
428        }
429    
430        /**
431         * Optimize an objective function.
432         * Optimization is considered to be a weighted least-squares minimization.
433         * The cost function to be minimized is
434         * <code>&sum;weight<sub>i</sub>(objective<sub>i</sub> - target<sub>i</sub>)<sup>2</sup></code>
435         *
436         * @param f Objective function.
437         * @param target Target value for the objective functions at optimum.
438         * @param weights Weights for the least squares cost computation.
439         * @param startPoint Start point for optimization.
440         * @return the point/value pair giving the optimal value for objective
441         * function.
442         * @param maxEval Maximum number of function evaluations.
443         * @throws org.apache.commons.math3.exception.DimensionMismatchException
444         * if the start point dimension is wrong.
445         * @throws org.apache.commons.math3.exception.TooManyEvaluationsException
446         * if the maximal number of evaluations is exceeded.
447         * @throws org.apache.commons.math3.exception.NullArgumentException if
448         * any argument is {@code null}.
449         * @deprecated As of 3.1. Please use
450         * {@link BaseAbstractMultivariateVectorOptimizer#optimize(int,MultivariateVectorFunction,OptimizationData[])
451         * optimize(int,MultivariateDifferentiableVectorFunction,OptimizationData...)}
452         * instead.
453         */
454        @Deprecated
455        public PointVectorValuePair optimize(final int maxEval,
456                                             final MultivariateDifferentiableVectorFunction f,
457                                             final double[] target, final double[] weights,
458                                             final double[] startPoint) {
459            return optimizeInternal(maxEval, f,
460                                    new Target(target),
461                                    new Weight(weights),
462                                    new InitialGuess(startPoint));
463        }
464    
465        /**
466         * Optimize an objective function.
467         * Optimization is considered to be a weighted least-squares minimization.
468         * The cost function to be minimized is
469         * <code>&sum;weight<sub>i</sub>(objective<sub>i</sub> - target<sub>i</sub>)<sup>2</sup></code>
470         *
471         * @param maxEval Allowed number of evaluations of the objective function.
472         * @param f Objective function.
473         * @param optData Optimization data. The following data will be looked for:
474         * <ul>
475         *  <li>{@link Target}</li>
476         *  <li>{@link Weight}</li>
477         *  <li>{@link InitialGuess}</li>
478         * </ul>
479         * @return the point/value pair giving the optimal value of the objective
480         * function.
481         * @throws org.apache.commons.math3.exception.TooManyEvaluationsException if
482         * the maximal number of evaluations is exceeded.
483         * @throws DimensionMismatchException if the target, and weight arguments
484         * have inconsistent dimensions.
485         * @see BaseAbstractMultivariateVectorOptimizer#optimizeInternal(int,MultivariateVectorFunction,OptimizationData[])
486         * @since 3.1
487         * @deprecated As of 3.1. Override is necessary only until this class's generic
488         * argument is changed to {@code MultivariateDifferentiableVectorFunction}.
489         */
490        @Deprecated
491        protected PointVectorValuePair optimizeInternal(final int maxEval,
492                                                        final MultivariateDifferentiableVectorFunction f,
493                                                        OptimizationData... optData) {
494            // XXX Conversion will be removed when the generic argument of the
495            // base class becomes "MultivariateDifferentiableVectorFunction".
496            return super.optimizeInternal(maxEval, FunctionUtils.toDifferentiableMultivariateVectorFunction(f), optData);
497        }
498    
499        /** {@inheritDoc} */
500        @Override
501        protected void setUp() {
502            super.setUp();
503    
504            // Reset counter.
505            jacobianEvaluations = 0;
506    
507            // Square-root of the weight matrix.
508            weightMatrixSqrt = squareRoot(getWeight());
509    
510            // Store least squares problem characteristics.
511            // XXX The conversion won't be necessary when the generic argument of
512            // the base class becomes "MultivariateDifferentiableVectorFunction".
513            // XXX "jF" is not strictly necessary anymore but is currently more
514            // efficient than converting the value returned from "getObjectiveFunction()"
515            // every time it is used.
516            jF = FunctionUtils.toMultivariateDifferentiableVectorFunction((DifferentiableMultivariateVectorFunction) getObjectiveFunction());
517    
518            // Arrays shared with "private" and "protected" methods.
519            point = getStartPoint();
520            rows = getTarget().length;
521            cols = point.length;
522        }
523    
524        /**
525         * Computes the residuals.
526         * The residual is the difference between the observed (target)
527         * values and the model (objective function) value.
528         * There is one residual for each element of the vector-valued
529         * function.
530         *
531         * @param objectiveValue Value of the the objective function. This is
532         * the value returned from a call to
533         * {@link #computeObjectiveValue(double[]) computeObjectiveValue}
534         * (whose array argument contains the model parameters).
535         * @return the residuals.
536         * @throws DimensionMismatchException if {@code params} has a wrong
537         * length.
538         * @since 3.1
539         */
540        protected double[] computeResiduals(double[] objectiveValue) {
541            final double[] target = getTarget();
542            if (objectiveValue.length != target.length) {
543                throw new DimensionMismatchException(target.length,
544                                                     objectiveValue.length);
545            }
546    
547            final double[] residuals = new double[target.length];
548            for (int i = 0; i < target.length; i++) {
549                residuals[i] = target[i] - objectiveValue[i];
550            }
551    
552            return residuals;
553        }
554    
555        /**
556         * Computes the square-root of the weight matrix.
557         *
558         * @param m Symmetric, positive-definite (weight) matrix.
559         * @return the square-root of the weight matrix.
560         */
561        private RealMatrix squareRoot(RealMatrix m) {
562            if (m instanceof DiagonalMatrix) {
563                final int dim = m.getRowDimension();
564                final RealMatrix sqrtM = new DiagonalMatrix(dim);
565                for (int i = 0; i < dim; i++) {
566                   sqrtM.setEntry(i, i, FastMath.sqrt(m.getEntry(i, i)));
567                }
568                return sqrtM;
569            } else {
570                final EigenDecomposition dec = new EigenDecomposition(m);
571                return dec.getSquareRoot();
572            }
573        }
574    }