Source code

001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.math3.optim.nonlinear.vector.jacobian;
018
019import java.util.Arrays;
020import org.apache.commons.math3.exception.ConvergenceException;
021import org.apache.commons.math3.exception.MathUnsupportedOperationException;
022import org.apache.commons.math3.exception.util.LocalizedFormats;
023import org.apache.commons.math3.optim.PointVectorValuePair;
024import org.apache.commons.math3.optim.ConvergenceChecker;
025import org.apache.commons.math3.linear.RealMatrix;
026import org.apache.commons.math3.util.Precision;
027import org.apache.commons.math3.util.FastMath;
028
029
030/**
031 * This class solves a least-squares problem using the Levenberg-Marquardt
032 * algorithm.
033 * <br/>
034 * Constraints are not supported: the call to
035 * {@link #optimize(OptimizationData[]) optimize} will throw
036 * {@link MathUnsupportedOperationException} if bounds are passed to it.
037 *
038 * <p>This implementation <em>should</em> work even for over-determined systems
039 * (i.e. systems having more point than equations). Over-determined systems
040 * are solved by ignoring the point which have the smallest impact according
041 * to their jacobian column norm. Only the rank of the matrix and some loop bounds
042 * are changed to implement this.</p>
043 *
044 * <p>The resolution engine is a simple translation of the MINPACK <a
045 * href="http://www.netlib.org/minpack/lmder.f">lmder</a> routine with minor
046 * changes. The changes include the over-determined resolution, the use of
047 * inherited convergence checker and the Q.R. decomposition which has been
048 * rewritten following the algorithm described in the
049 * P. Lascaux and R. Theodor book <i>Analyse num&eacute;rique matricielle
050 * appliqu&eacute;e &agrave; l'art de l'ing&eacute;nieur</i>, Masson 1986.</p>
051 * <p>The authors of the original fortran version are:
052 * <ul>
053 * <li>Argonne National Laboratory. MINPACK project. March 1980</li>
054 * <li>Burton S. Garbow</li>
055 * <li>Kenneth E. Hillstrom</li>
056 * <li>Jorge J. More</li>
057 * </ul>
058 * The redistribution policy for MINPACK is available <a
059 * href="http://www.netlib.org/minpack/disclaimer">here</a>, for convenience, it
060 * is reproduced below.</p>
061 *
062 * <table border="0" width="80%" cellpadding="10" align="center" bgcolor="#E0E0E0">
063 * <tr><td>
064 *    Minpack Copyright Notice (1999) University of Chicago.
065 *    All rights reserved
066 * </td></tr>
067 * <tr><td>
068 * Redistribution and use in source and binary forms, with or without
069 * modification, are permitted provided that the following conditions
070 * are met:
071 * <ol>
072 *  <li>Redistributions of source code must retain the above copyright
073 *      notice, this list of conditions and the following disclaimer.</li>
074 * <li>Redistributions in binary form must reproduce the above
075 *     copyright notice, this list of conditions and the following
076 *     disclaimer in the documentation and/or other materials provided
077 *     with the distribution.</li>
078 * <li>The end-user documentation included with the redistribution, if any,
079 *     must include the following acknowledgment:
080 *     <code>This product includes software developed by the University of
081 *           Chicago, as Operator of Argonne National Laboratory.</code>
082 *     Alternately, this acknowledgment may appear in the software itself,
083 *     if and wherever such third-party acknowledgments normally appear.</li>
084 * <li><strong>WARRANTY DISCLAIMER. THE SOFTWARE IS SUPPLIED "AS IS"
085 *     WITHOUT WARRANTY OF ANY KIND. THE COPYRIGHT HOLDER, THE
086 *     UNITED STATES, THE UNITED STATES DEPARTMENT OF ENERGY, AND
087 *     THEIR EMPLOYEES: (1) DISCLAIM ANY WARRANTIES, EXPRESS OR
088 *     IMPLIED, INCLUDING BUT NOT LIMITED TO ANY IMPLIED WARRANTIES
089 *     OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE
090 *     OR NON-INFRINGEMENT, (2) DO NOT ASSUME ANY LEGAL LIABILITY
091 *     OR RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR
092 *     USEFULNESS OF THE SOFTWARE, (3) DO NOT REPRESENT THAT USE OF
093 *     THE SOFTWARE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS, (4)
094 *     DO NOT WARRANT THAT THE SOFTWARE WILL FUNCTION
095 *     UNINTERRUPTED, THAT IT IS ERROR-FREE OR THAT ANY ERRORS WILL
096 *     BE CORRECTED.</strong></li>
097 * <li><strong>LIMITATION OF LIABILITY. IN NO EVENT WILL THE COPYRIGHT
098 *     HOLDER, THE UNITED STATES, THE UNITED STATES DEPARTMENT OF
099 *     ENERGY, OR THEIR EMPLOYEES: BE LIABLE FOR ANY INDIRECT,
100 *     INCIDENTAL, CONSEQUENTIAL, SPECIAL OR PUNITIVE DAMAGES OF
101 *     ANY KIND OR NATURE, INCLUDING BUT NOT LIMITED TO LOSS OF
102 *     PROFITS OR LOSS OF DATA, FOR ANY REASON WHATSOEVER, WHETHER
103 *     SUCH LIABILITY IS ASSERTED ON THE BASIS OF CONTRACT, TORT
104 *     (INCLUDING NEGLIGENCE OR STRICT LIABILITY), OR OTHERWISE,
105 *     EVEN IF ANY OF SAID PARTIES HAS BEEN WARNED OF THE
106 *     POSSIBILITY OF SUCH LOSS OR DAMAGES.</strong></li>
107 * <ol></td></tr>
108 * </table>
109 *
110 * @since 2.0
111 * @deprecated All classes and interfaces in this package are deprecated.
112 * The optimizers that were provided here were moved to the
113 * {@link org.apache.commons.math3.fitting.leastsquares} package
114 * (cf. MATH-1008).
115 */
116@Deprecated
117public class LevenbergMarquardtOptimizer
118    extends AbstractLeastSquaresOptimizer {
119    /** Twice the "epsilon machine". */
120    private static final double TWO_EPS = 2 * Precision.EPSILON;
121    /** Number of solved point. */
122    private int solvedCols;
123    /** Diagonal elements of the R matrix in the Q.R. decomposition. */
124    private double[] diagR;
125    /** Norms of the columns of the jacobian matrix. */
126    private double[] jacNorm;
127    /** Coefficients of the Householder transforms vectors. */
128    private double[] beta;
129    /** Columns permutation array. */
130    private int[] permutation;
131    /** Rank of the jacobian matrix. */
132    private int rank;
133    /** Levenberg-Marquardt parameter. */
134    private double lmPar;
135    /** Parameters evolution direction associated with lmPar. */
136    private double[] lmDir;
137    /** Positive input variable used in determining the initial step bound. */
138    private final double initialStepBoundFactor;
139    /** Desired relative error in the sum of squares. */
140    private final double costRelativeTolerance;
141    /**  Desired relative error in the approximate solution parameters. */
142    private final double parRelativeTolerance;
143    /** Desired max cosine on the orthogonality between the function vector
144     * and the columns of the jacobian. */
145    private final double orthoTolerance;
146    /** Threshold for QR ranking. */
147    private final double qrRankingThreshold;
148    /** Weighted residuals. */
149    private double[] weightedResidual;
150    /** Weighted Jacobian. */
151    private double[][] weightedJacobian;
152
153    /**
154     * Build an optimizer for least squares problems with default values
155     * for all the tuning parameters (see the {@link
156     * #LevenbergMarquardtOptimizer(double,double,double,double,double)
157     * other contructor}.
158     * The default values for the algorithm settings are:
159     * <ul>
160     *  <li>Initial step bound factor: 100</li>
161     *  <li>Cost relative tolerance: 1e-10</li>
162     *  <li>Parameters relative tolerance: 1e-10</li>
163     *  <li>Orthogonality tolerance: 1e-10</li>
164     *  <li>QR ranking threshold: {@link Precision#SAFE_MIN}</li>
165     * </ul>
166     */
167    public LevenbergMarquardtOptimizer() {
168        this(100, 1e-10, 1e-10, 1e-10, Precision.SAFE_MIN);
169    }
170
171    /**
172     * Constructor that allows the specification of a custom convergence
173     * checker.
174     * Note that all the usual convergence checks will be <em>disabled</em>.
175     * The default values for the algorithm settings are:
176     * <ul>
177     *  <li>Initial step bound factor: 100</li>
178     *  <li>Cost relative tolerance: 1e-10</li>
179     *  <li>Parameters relative tolerance: 1e-10</li>
180     *  <li>Orthogonality tolerance: 1e-10</li>
181     *  <li>QR ranking threshold: {@link Precision#SAFE_MIN}</li>
182     * </ul>
183     *
184     * @param checker Convergence checker.
185     */
186    public LevenbergMarquardtOptimizer(ConvergenceChecker<PointVectorValuePair> checker) {
187        this(100, checker, 1e-10, 1e-10, 1e-10, Precision.SAFE_MIN);
188    }
189
190    /**
191     * Constructor that allows the specification of a custom convergence
192     * checker, in addition to the standard ones.
193     *
194     * @param initialStepBoundFactor Positive input variable used in
195     * determining the initial step bound. This bound is set to the
196     * product of initialStepBoundFactor and the euclidean norm of
197     * {@code diag * x} if non-zero, or else to {@code initialStepBoundFactor}
198     * itself. In most cases factor should lie in the interval
199     * {@code (0.1, 100.0)}. {@code 100} is a generally recommended value.
200     * @param checker Convergence checker.
201     * @param costRelativeTolerance Desired relative error in the sum of
202     * squares.
203     * @param parRelativeTolerance Desired relative error in the approximate
204     * solution parameters.
205     * @param orthoTolerance Desired max cosine on the orthogonality between
206     * the function vector and the columns of the Jacobian.
207     * @param threshold Desired threshold for QR ranking. If the squared norm
208     * of a column vector is smaller or equal to this threshold during QR
209     * decomposition, it is considered to be a zero vector and hence the rank
210     * of the matrix is reduced.
211     */
212    public LevenbergMarquardtOptimizer(double initialStepBoundFactor,
213                                       ConvergenceChecker<PointVectorValuePair> checker,
214                                       double costRelativeTolerance,
215                                       double parRelativeTolerance,
216                                       double orthoTolerance,
217                                       double threshold) {
218        super(checker);
219        this.initialStepBoundFactor = initialStepBoundFactor;
220        this.costRelativeTolerance = costRelativeTolerance;
221        this.parRelativeTolerance = parRelativeTolerance;
222        this.orthoTolerance = orthoTolerance;
223        this.qrRankingThreshold = threshold;
224    }
225
226    /**
227     * Build an optimizer for least squares problems with default values
228     * for some of the tuning parameters (see the {@link
229     * #LevenbergMarquardtOptimizer(double,double,double,double,double)
230     * other contructor}.
231     * The default values for the algorithm settings are:
232     * <ul>
233     *  <li>Initial step bound factor}: 100</li>
234     *  <li>QR ranking threshold}: {@link Precision#SAFE_MIN}</li>
235     * </ul>
236     *
237     * @param costRelativeTolerance Desired relative error in the sum of
238     * squares.
239     * @param parRelativeTolerance Desired relative error in the approximate
240     * solution parameters.
241     * @param orthoTolerance Desired max cosine on the orthogonality between
242     * the function vector and the columns of the Jacobian.
243     */
244    public LevenbergMarquardtOptimizer(double costRelativeTolerance,
245                                       double parRelativeTolerance,
246                                       double orthoTolerance) {
247        this(100,
248             costRelativeTolerance, parRelativeTolerance, orthoTolerance,
249             Precision.SAFE_MIN);
250    }
251
252    /**
253     * The arguments control the behaviour of the default convergence checking
254     * procedure.
255     * Additional criteria can defined through the setting of a {@link
256     * ConvergenceChecker}.
257     *
258     * @param initialStepBoundFactor Positive input variable used in
259     * determining the initial step bound. This bound is set to the
260     * product of initialStepBoundFactor and the euclidean norm of
261     * {@code diag * x} if non-zero, or else to {@code initialStepBoundFactor}
262     * itself. In most cases factor should lie in the interval
263     * {@code (0.1, 100.0)}. {@code 100} is a generally recommended value.
264     * @param costRelativeTolerance Desired relative error in the sum of
265     * squares.
266     * @param parRelativeTolerance Desired relative error in the approximate
267     * solution parameters.
268     * @param orthoTolerance Desired max cosine on the orthogonality between
269     * the function vector and the columns of the Jacobian.
270     * @param threshold Desired threshold for QR ranking. If the squared norm
271     * of a column vector is smaller or equal to this threshold during QR
272     * decomposition, it is considered to be a zero vector and hence the rank
273     * of the matrix is reduced.
274     */
275    public LevenbergMarquardtOptimizer(double initialStepBoundFactor,
276                                       double costRelativeTolerance,
277                                       double parRelativeTolerance,
278                                       double orthoTolerance,
279                                       double threshold) {
280        super(null); // No custom convergence criterion.
281        this.initialStepBoundFactor = initialStepBoundFactor;
282        this.costRelativeTolerance = costRelativeTolerance;
283        this.parRelativeTolerance = parRelativeTolerance;
284        this.orthoTolerance = orthoTolerance;
285        this.qrRankingThreshold = threshold;
286    }
287
288    /** {@inheritDoc} */
289    @Override
290    protected PointVectorValuePair doOptimize() {
291        checkParameters();
292
293        final int nR = getTarget().length; // Number of observed data.
294        final double[] currentPoint = getStartPoint();
295        final int nC = currentPoint.length; // Number of parameters.
296
297        // arrays shared with the other private methods
298        solvedCols  = FastMath.min(nR, nC);
299        diagR       = new double[nC];
300        jacNorm     = new double[nC];
301        beta        = new double[nC];
302        permutation = new int[nC];
303        lmDir       = new double[nC];
304
305        // local point
306        double   delta   = 0;
307        double   xNorm   = 0;
308        double[] diag    = new double[nC];
309        double[] oldX    = new double[nC];
310        double[] oldRes  = new double[nR];
311        double[] oldObj  = new double[nR];
312        double[] qtf     = new double[nR];
313        double[] work1   = new double[nC];
314        double[] work2   = new double[nC];
315        double[] work3   = new double[nC];
316
317        final RealMatrix weightMatrixSqrt = getWeightSquareRoot();
318
319        // Evaluate the function at the starting point and calculate its norm.
320        double[] currentObjective = computeObjectiveValue(currentPoint);
321        double[] currentResiduals = computeResiduals(currentObjective);
322        PointVectorValuePair current = new PointVectorValuePair(currentPoint, currentObjective);
323        double currentCost = computeCost(currentResiduals);
324
325        // Outer loop.
326        lmPar = 0;
327        boolean firstIteration = true;
328        final ConvergenceChecker<PointVectorValuePair> checker = getConvergenceChecker();
329        while (true) {
330            incrementIterationCount();
331
332            final PointVectorValuePair previous = current;
333
334            // QR decomposition of the jacobian matrix
335            qrDecomposition(computeWeightedJacobian(currentPoint));
336
337            weightedResidual = weightMatrixSqrt.operate(currentResiduals);
338            for (int i = 0; i < nR; i++) {
339                qtf[i] = weightedResidual[i];
340            }
341
342            // compute Qt.res
343            qTy(qtf);
344
345            // now we don't need Q anymore,
346            // so let jacobian contain the R matrix with its diagonal elements
347            for (int k = 0; k < solvedCols; ++k) {
348                int pk = permutation[k];
349                weightedJacobian[k][pk] = diagR[pk];
350            }
351
352            if (firstIteration) {
353                // scale the point according to the norms of the columns
354                // of the initial jacobian
355                xNorm = 0;
356                for (int k = 0; k < nC; ++k) {
357                    double dk = jacNorm[k];
358                    if (dk == 0) {
359                        dk = 1.0;
360                    }
361                    double xk = dk * currentPoint[k];
362                    xNorm  += xk * xk;
363                    diag[k] = dk;
364                }
365                xNorm = FastMath.sqrt(xNorm);
366
367                // initialize the step bound delta
368                delta = (xNorm == 0) ? initialStepBoundFactor : (initialStepBoundFactor * xNorm);
369            }
370
371            // check orthogonality between function vector and jacobian columns
372            double maxCosine = 0;
373            if (currentCost != 0) {
374                for (int j = 0; j < solvedCols; ++j) {
375                    int    pj = permutation[j];
376                    double s  = jacNorm[pj];
377                    if (s != 0) {
378                        double sum = 0;
379                        for (int i = 0; i <= j; ++i) {
380                            sum += weightedJacobian[i][pj] * qtf[i];
381                        }
382                        maxCosine = FastMath.max(maxCosine, FastMath.abs(sum) / (s * currentCost));
383                    }
384                }
385            }
386            if (maxCosine <= orthoTolerance) {
387                // Convergence has been reached.
388                setCost(currentCost);
389                return current;
390            }
391
392            // rescale if necessary
393            for (int j = 0; j < nC; ++j) {
394                diag[j] = FastMath.max(diag[j], jacNorm[j]);
395            }
396
397            // Inner loop.
398            for (double ratio = 0; ratio < 1.0e-4;) {
399
400                // save the state
401                for (int j = 0; j < solvedCols; ++j) {
402                    int pj = permutation[j];
403                    oldX[pj] = currentPoint[pj];
404                }
405                final double previousCost = currentCost;
406                double[] tmpVec = weightedResidual;
407                weightedResidual = oldRes;
408                oldRes    = tmpVec;
409                tmpVec    = currentObjective;
410                currentObjective = oldObj;
411                oldObj    = tmpVec;
412
413                // determine the Levenberg-Marquardt parameter
414                determineLMParameter(qtf, delta, diag, work1, work2, work3);
415
416                // compute the new point and the norm of the evolution direction
417                double lmNorm = 0;
418                for (int j = 0; j < solvedCols; ++j) {
419                    int pj = permutation[j];
420                    lmDir[pj] = -lmDir[pj];
421                    currentPoint[pj] = oldX[pj] + lmDir[pj];
422                    double s = diag[pj] * lmDir[pj];
423                    lmNorm  += s * s;
424                }
425                lmNorm = FastMath.sqrt(lmNorm);
426                // on the first iteration, adjust the initial step bound.
427                if (firstIteration) {
428                    delta = FastMath.min(delta, lmNorm);
429                }
430
431                // Evaluate the function at x + p and calculate its norm.
432                currentObjective = computeObjectiveValue(currentPoint);
433                currentResiduals = computeResiduals(currentObjective);
434                current = new PointVectorValuePair(currentPoint, currentObjective);
435                currentCost = computeCost(currentResiduals);
436
437                // compute the scaled actual reduction
438                double actRed = -1.0;
439                if (0.1 * currentCost < previousCost) {
440                    double r = currentCost / previousCost;
441                    actRed = 1.0 - r * r;
442                }
443
444                // compute the scaled predicted reduction
445                // and the scaled directional derivative
446                for (int j = 0; j < solvedCols; ++j) {
447                    int pj = permutation[j];
448                    double dirJ = lmDir[pj];
449                    work1[j] = 0;
450                    for (int i = 0; i <= j; ++i) {
451                        work1[i] += weightedJacobian[i][pj] * dirJ;
452                    }
453                }
454                double coeff1 = 0;
455                for (int j = 0; j < solvedCols; ++j) {
456                    coeff1 += work1[j] * work1[j];
457                }
458                double pc2 = previousCost * previousCost;
459                coeff1 /= pc2;
460                double coeff2 = lmPar * lmNorm * lmNorm / pc2;
461                double preRed = coeff1 + 2 * coeff2;
462                double dirDer = -(coeff1 + coeff2);
463
464                // ratio of the actual to the predicted reduction
465                ratio = (preRed == 0) ? 0 : (actRed / preRed);
466
467                // update the step bound
468                if (ratio <= 0.25) {
469                    double tmp =
470                        (actRed < 0) ? (0.5 * dirDer / (dirDer + 0.5 * actRed)) : 0.5;
471                        if ((0.1 * currentCost >= previousCost) || (tmp < 0.1)) {
472                            tmp = 0.1;
473                        }
474                        delta = tmp * FastMath.min(delta, 10.0 * lmNorm);
475                        lmPar /= tmp;
476                } else if ((lmPar == 0) || (ratio >= 0.75)) {
477                    delta = 2 * lmNorm;
478                    lmPar *= 0.5;
479                }
480
481                // test for successful iteration.
482                if (ratio >= 1.0e-4) {
483                    // successful iteration, update the norm
484                    firstIteration = false;
485                    xNorm = 0;
486                    for (int k = 0; k < nC; ++k) {
487                        double xK = diag[k] * currentPoint[k];
488                        xNorm += xK * xK;
489                    }
490                    xNorm = FastMath.sqrt(xNorm);
491
492                    // tests for convergence.
493                    if (checker != null && checker.converged(getIterations(), previous, current)) {
494                        setCost(currentCost);
495                        return current;
496                    }
497                } else {
498                    // failed iteration, reset the previous values
499                    currentCost = previousCost;
500                    for (int j = 0; j < solvedCols; ++j) {
501                        int pj = permutation[j];
502                        currentPoint[pj] = oldX[pj];
503                    }
504                    tmpVec    = weightedResidual;
505                    weightedResidual = oldRes;
506                    oldRes    = tmpVec;
507                    tmpVec    = currentObjective;
508                    currentObjective = oldObj;
509                    oldObj    = tmpVec;
510                    // Reset "current" to previous values.
511                    current = new PointVectorValuePair(currentPoint, currentObjective);
512                }
513
514                // Default convergence criteria.
515                if ((FastMath.abs(actRed) <= costRelativeTolerance &&
516                     preRed <= costRelativeTolerance &&
517                     ratio <= 2.0) ||
518                    delta <= parRelativeTolerance * xNorm) {
519                    setCost(currentCost);
520                    return current;
521                }
522
523                // tests for termination and stringent tolerances
524                if (FastMath.abs(actRed) <= TWO_EPS &&
525                    preRed <= TWO_EPS &&
526                    ratio <= 2.0) {
527                    throw new ConvergenceException(LocalizedFormats.TOO_SMALL_COST_RELATIVE_TOLERANCE,
528                                                   costRelativeTolerance);
529                } else if (delta <= TWO_EPS * xNorm) {
530                    throw new ConvergenceException(LocalizedFormats.TOO_SMALL_PARAMETERS_RELATIVE_TOLERANCE,
531                                                   parRelativeTolerance);
532                } else if (maxCosine <= TWO_EPS) {
533                    throw new ConvergenceException(LocalizedFormats.TOO_SMALL_ORTHOGONALITY_TOLERANCE,
534                                                   orthoTolerance);
535                }
536            }
537        }
538    }
539
540    /**
541     * Determine the Levenberg-Marquardt parameter.
542     * <p>This implementation is a translation in Java of the MINPACK
543     * <a href="http://www.netlib.org/minpack/lmpar.f">lmpar</a>
544     * routine.</p>
545     * <p>This method sets the lmPar and lmDir attributes.</p>
546     * <p>The authors of the original fortran function are:</p>
547     * <ul>
548     *   <li>Argonne National Laboratory. MINPACK project. March 1980</li>
549     *   <li>Burton  S. Garbow</li>
550     *   <li>Kenneth E. Hillstrom</li>
551     *   <li>Jorge   J. More</li>
552     * </ul>
553     * <p>Luc Maisonobe did the Java translation.</p>
554     *
555     * @param qy array containing qTy
556     * @param delta upper bound on the euclidean norm of diagR * lmDir
557     * @param diag diagonal matrix
558     * @param work1 work array
559     * @param work2 work array
560     * @param work3 work array
561     */
562    private void determineLMParameter(double[] qy, double delta, double[] diag,
563                                      double[] work1, double[] work2, double[] work3) {
564        final int nC = weightedJacobian[0].length;
565
566        // compute and store in x the gauss-newton direction, if the
567        // jacobian is rank-deficient, obtain a least squares solution
568        for (int j = 0; j < rank; ++j) {
569            lmDir[permutation[j]] = qy[j];
570        }
571        for (int j = rank; j < nC; ++j) {
572            lmDir[permutation[j]] = 0;
573        }
574        for (int k = rank - 1; k >= 0; --k) {
575            int pk = permutation[k];
576            double ypk = lmDir[pk] / diagR[pk];
577            for (int i = 0; i < k; ++i) {
578                lmDir[permutation[i]] -= ypk * weightedJacobian[i][pk];
579            }
580            lmDir[pk] = ypk;
581        }
582
583        // evaluate the function at the origin, and test
584        // for acceptance of the Gauss-Newton direction
585        double dxNorm = 0;
586        for (int j = 0; j < solvedCols; ++j) {
587            int pj = permutation[j];
588            double s = diag[pj] * lmDir[pj];
589            work1[pj] = s;
590            dxNorm += s * s;
591        }
592        dxNorm = FastMath.sqrt(dxNorm);
593        double fp = dxNorm - delta;
594        if (fp <= 0.1 * delta) {
595            lmPar = 0;
596            return;
597        }
598
599        // if the jacobian is not rank deficient, the Newton step provides
600        // a lower bound, parl, for the zero of the function,
601        // otherwise set this bound to zero
602        double sum2;
603        double parl = 0;
604        if (rank == solvedCols) {
605            for (int j = 0; j < solvedCols; ++j) {
606                int pj = permutation[j];
607                work1[pj] *= diag[pj] / dxNorm;
608            }
609            sum2 = 0;
610            for (int j = 0; j < solvedCols; ++j) {
611                int pj = permutation[j];
612                double sum = 0;
613                for (int i = 0; i < j; ++i) {
614                    sum += weightedJacobian[i][pj] * work1[permutation[i]];
615                }
616                double s = (work1[pj] - sum) / diagR[pj];
617                work1[pj] = s;
618                sum2 += s * s;
619            }
620            parl = fp / (delta * sum2);
621        }
622
623        // calculate an upper bound, paru, for the zero of the function
624        sum2 = 0;
625        for (int j = 0; j < solvedCols; ++j) {
626            int pj = permutation[j];
627            double sum = 0;
628            for (int i = 0; i <= j; ++i) {
629                sum += weightedJacobian[i][pj] * qy[i];
630            }
631            sum /= diag[pj];
632            sum2 += sum * sum;
633        }
634        double gNorm = FastMath.sqrt(sum2);
635        double paru = gNorm / delta;
636        if (paru == 0) {
637            paru = Precision.SAFE_MIN / FastMath.min(delta, 0.1);
638        }
639
640        // if the input par lies outside of the interval (parl,paru),
641        // set par to the closer endpoint
642        lmPar = FastMath.min(paru, FastMath.max(lmPar, parl));
643        if (lmPar == 0) {
644            lmPar = gNorm / dxNorm;
645        }
646
647        for (int countdown = 10; countdown >= 0; --countdown) {
648
649            // evaluate the function at the current value of lmPar
650            if (lmPar == 0) {
651                lmPar = FastMath.max(Precision.SAFE_MIN, 0.001 * paru);
652            }
653            double sPar = FastMath.sqrt(lmPar);
654            for (int j = 0; j < solvedCols; ++j) {
655                int pj = permutation[j];
656                work1[pj] = sPar * diag[pj];
657            }
658            determineLMDirection(qy, work1, work2, work3);
659
660            dxNorm = 0;
661            for (int j = 0; j < solvedCols; ++j) {
662                int pj = permutation[j];
663                double s = diag[pj] * lmDir[pj];
664                work3[pj] = s;
665                dxNorm += s * s;
666            }
667            dxNorm = FastMath.sqrt(dxNorm);
668            double previousFP = fp;
669            fp = dxNorm - delta;
670
671            // if the function is small enough, accept the current value
672            // of lmPar, also test for the exceptional cases where parl is zero
673            if ((FastMath.abs(fp) <= 0.1 * delta) ||
674                    ((parl == 0) && (fp <= previousFP) && (previousFP < 0))) {
675                return;
676            }
677
678            // compute the Newton correction
679            for (int j = 0; j < solvedCols; ++j) {
680                int pj = permutation[j];
681                work1[pj] = work3[pj] * diag[pj] / dxNorm;
682            }
683            for (int j = 0; j < solvedCols; ++j) {
684                int pj = permutation[j];
685                work1[pj] /= work2[j];
686                double tmp = work1[pj];
687                for (int i = j + 1; i < solvedCols; ++i) {
688                    work1[permutation[i]] -= weightedJacobian[i][pj] * tmp;
689                }
690            }
691            sum2 = 0;
692            for (int j = 0; j < solvedCols; ++j) {
693                double s = work1[permutation[j]];
694                sum2 += s * s;
695            }
696            double correction = fp / (delta * sum2);
697
698            // depending on the sign of the function, update parl or paru.
699            if (fp > 0) {
700                parl = FastMath.max(parl, lmPar);
701            } else if (fp < 0) {
702                paru = FastMath.min(paru, lmPar);
703            }
704
705            // compute an improved estimate for lmPar
706            lmPar = FastMath.max(parl, lmPar + correction);
707
708        }
709    }
710
711    /**
712     * Solve a*x = b and d*x = 0 in the least squares sense.
713     * <p>This implementation is a translation in Java of the MINPACK
714     * <a href="http://www.netlib.org/minpack/qrsolv.f">qrsolv</a>
715     * routine.</p>
716     * <p>This method sets the lmDir and lmDiag attributes.</p>
717     * <p>The authors of the original fortran function are:</p>
718     * <ul>
719     *   <li>Argonne National Laboratory. MINPACK project. March 1980</li>
720     *   <li>Burton  S. Garbow</li>
721     *   <li>Kenneth E. Hillstrom</li>
722     *   <li>Jorge   J. More</li>
723     * </ul>
724     * <p>Luc Maisonobe did the Java translation.</p>
725     *
726     * @param qy array containing qTy
727     * @param diag diagonal matrix
728     * @param lmDiag diagonal elements associated with lmDir
729     * @param work work array
730     */
731    private void determineLMDirection(double[] qy, double[] diag,
732                                      double[] lmDiag, double[] work) {
733
734        // copy R and Qty to preserve input and initialize s
735        //  in particular, save the diagonal elements of R in lmDir
736        for (int j = 0; j < solvedCols; ++j) {
737            int pj = permutation[j];
738            for (int i = j + 1; i < solvedCols; ++i) {
739                weightedJacobian[i][pj] = weightedJacobian[j][permutation[i]];
740            }
741            lmDir[j] = diagR[pj];
742            work[j]  = qy[j];
743        }
744
745        // eliminate the diagonal matrix d using a Givens rotation
746        for (int j = 0; j < solvedCols; ++j) {
747
748            // prepare the row of d to be eliminated, locating the
749            // diagonal element using p from the Q.R. factorization
750            int pj = permutation[j];
751            double dpj = diag[pj];
752            if (dpj != 0) {
753                Arrays.fill(lmDiag, j + 1, lmDiag.length, 0);
754            }
755            lmDiag[j] = dpj;
756
757            //  the transformations to eliminate the row of d
758            // modify only a single element of Qty
759            // beyond the first n, which is initially zero.
760            double qtbpj = 0;
761            for (int k = j; k < solvedCols; ++k) {
762                int pk = permutation[k];
763
764                // determine a Givens rotation which eliminates the
765                // appropriate element in the current row of d
766                if (lmDiag[k] != 0) {
767
768                    final double sin;
769                    final double cos;
770                    double rkk = weightedJacobian[k][pk];
771                    if (FastMath.abs(rkk) < FastMath.abs(lmDiag[k])) {
772                        final double cotan = rkk / lmDiag[k];
773                        sin   = 1.0 / FastMath.sqrt(1.0 + cotan * cotan);
774                        cos   = sin * cotan;
775                    } else {
776                        final double tan = lmDiag[k] / rkk;
777                        cos = 1.0 / FastMath.sqrt(1.0 + tan * tan);
778                        sin = cos * tan;
779                    }
780
781                    // compute the modified diagonal element of R and
782                    // the modified element of (Qty,0)
783                    weightedJacobian[k][pk] = cos * rkk + sin * lmDiag[k];
784                    final double temp = cos * work[k] + sin * qtbpj;
785                    qtbpj = -sin * work[k] + cos * qtbpj;
786                    work[k] = temp;
787
788                    // accumulate the tranformation in the row of s
789                    for (int i = k + 1; i < solvedCols; ++i) {
790                        double rik = weightedJacobian[i][pk];
791                        final double temp2 = cos * rik + sin * lmDiag[i];
792                        lmDiag[i] = -sin * rik + cos * lmDiag[i];
793                        weightedJacobian[i][pk] = temp2;
794                    }
795                }
796            }
797
798            // store the diagonal element of s and restore
799            // the corresponding diagonal element of R
800            lmDiag[j] = weightedJacobian[j][permutation[j]];
801            weightedJacobian[j][permutation[j]] = lmDir[j];
802        }
803
804        // solve the triangular system for z, if the system is
805        // singular, then obtain a least squares solution
806        int nSing = solvedCols;
807        for (int j = 0; j < solvedCols; ++j) {
808            if ((lmDiag[j] == 0) && (nSing == solvedCols)) {
809                nSing = j;
810            }
811            if (nSing < solvedCols) {
812                work[j] = 0;
813            }
814        }
815        if (nSing > 0) {
816            for (int j = nSing - 1; j >= 0; --j) {
817                int pj = permutation[j];
818                double sum = 0;
819                for (int i = j + 1; i < nSing; ++i) {
820                    sum += weightedJacobian[i][pj] * work[i];
821                }
822                work[j] = (work[j] - sum) / lmDiag[j];
823            }
824        }
825
826        // permute the components of z back to components of lmDir
827        for (int j = 0; j < lmDir.length; ++j) {
828            lmDir[permutation[j]] = work[j];
829        }
830    }
831
832    /**
833     * Decompose a matrix A as A.P = Q.R using Householder transforms.
834     * <p>As suggested in the P. Lascaux and R. Theodor book
835     * <i>Analyse num&eacute;rique matricielle appliqu&eacute;e &agrave;
836     * l'art de l'ing&eacute;nieur</i> (Masson, 1986), instead of representing
837     * the Householder transforms with u<sub>k</sub> unit vectors such that:
838     * <pre>
839     * H<sub>k</sub> = I - 2u<sub>k</sub>.u<sub>k</sub><sup>t</sup>
840     * </pre>
841     * we use <sub>k</sub> non-unit vectors such that:
842     * <pre>
843     * H<sub>k</sub> = I - beta<sub>k</sub>v<sub>k</sub>.v<sub>k</sub><sup>t</sup>
844     * </pre>
845     * where v<sub>k</sub> = a<sub>k</sub> - alpha<sub>k</sub> e<sub>k</sub>.
846     * The beta<sub>k</sub> coefficients are provided upon exit as recomputing
847     * them from the v<sub>k</sub> vectors would be costly.</p>
848     * <p>This decomposition handles rank deficient cases since the tranformations
849     * are performed in non-increasing columns norms order thanks to columns
850     * pivoting. The diagonal elements of the R matrix are therefore also in
851     * non-increasing absolute values order.</p>
852     *
853     * @param jacobian Weighted Jacobian matrix at the current point.
854     * @exception ConvergenceException if the decomposition cannot be performed
855     */
856    private void qrDecomposition(RealMatrix jacobian) throws ConvergenceException {
857        // Code in this class assumes that the weighted Jacobian is -(W^(1/2) J),
858        // hence the multiplication by -1.
859        weightedJacobian = jacobian.scalarMultiply(-1).getData();
860
861        final int nR = weightedJacobian.length;
862        final int nC = weightedJacobian[0].length;
863
864        // initializations
865        for (int k = 0; k < nC; ++k) {
866            permutation[k] = k;
867            double norm2 = 0;
868            for (int i = 0; i < nR; ++i) {
869                double akk = weightedJacobian[i][k];
870                norm2 += akk * akk;
871            }
872            jacNorm[k] = FastMath.sqrt(norm2);
873        }
874
875        // transform the matrix column after column
876        for (int k = 0; k < nC; ++k) {
877
878            // select the column with the greatest norm on active components
879            int nextColumn = -1;
880            double ak2 = Double.NEGATIVE_INFINITY;
881            for (int i = k; i < nC; ++i) {
882                double norm2 = 0;
883                for (int j = k; j < nR; ++j) {
884                    double aki = weightedJacobian[j][permutation[i]];
885                    norm2 += aki * aki;
886                }
887                if (Double.isInfinite(norm2) || Double.isNaN(norm2)) {
888                    throw new ConvergenceException(LocalizedFormats.UNABLE_TO_PERFORM_QR_DECOMPOSITION_ON_JACOBIAN,
889                                                   nR, nC);
890                }
891                if (norm2 > ak2) {
892                    nextColumn = i;
893                    ak2        = norm2;
894                }
895            }
896            if (ak2 <= qrRankingThreshold) {
897                rank = k;
898                return;
899            }
900            int pk                  = permutation[nextColumn];
901            permutation[nextColumn] = permutation[k];
902            permutation[k]          = pk;
903
904            // choose alpha such that Hk.u = alpha ek
905            double akk   = weightedJacobian[k][pk];
906            double alpha = (akk > 0) ? -FastMath.sqrt(ak2) : FastMath.sqrt(ak2);
907            double betak = 1.0 / (ak2 - akk * alpha);
908            beta[pk]     = betak;
909
910            // transform the current column
911            diagR[pk]        = alpha;
912            weightedJacobian[k][pk] -= alpha;
913
914            // transform the remaining columns
915            for (int dk = nC - 1 - k; dk > 0; --dk) {
916                double gamma = 0;
917                for (int j = k; j < nR; ++j) {
918                    gamma += weightedJacobian[j][pk] * weightedJacobian[j][permutation[k + dk]];
919                }
920                gamma *= betak;
921                for (int j = k; j < nR; ++j) {
922                    weightedJacobian[j][permutation[k + dk]] -= gamma * weightedJacobian[j][pk];
923                }
924            }
925        }
926        rank = solvedCols;
927    }
928
929    /**
930     * Compute the product Qt.y for some Q.R. decomposition.
931     *
932     * @param y vector to multiply (will be overwritten with the result)
933     */
934    private void qTy(double[] y) {
935        final int nR = weightedJacobian.length;
936        final int nC = weightedJacobian[0].length;
937
938        for (int k = 0; k < nC; ++k) {
939            int pk = permutation[k];
940            double gamma = 0;
941            for (int i = k; i < nR; ++i) {
942                gamma += weightedJacobian[i][pk] * y[i];
943            }
944            gamma *= beta[pk];
945            for (int i = k; i < nR; ++i) {
946                y[i] -= gamma * weightedJacobian[i][pk];
947            }
948        }
949    }
950
951    /**
952     * @throws MathUnsupportedOperationException if bounds were passed to the
953     * {@link #optimize(OptimizationData[]) optimize} method.
954     */
955    private void checkParameters() {
956        if (getLowerBound() != null ||
957            getUpperBound() != null) {
958            throw new MathUnsupportedOperationException(LocalizedFormats.CONSTRAINT);
959        }
960    }
961}