Source code

001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.math4.legacy.optim.nonlinear.scalar.gradient;
019
020import org.apache.commons.math4.legacy.analysis.MultivariateFunction;
021import org.apache.commons.math4.legacy.exception.MathInternalError;
022import org.apache.commons.math4.legacy.exception.MathUnsupportedOperationException;
023import org.apache.commons.math4.legacy.exception.TooManyEvaluationsException;
024import org.apache.commons.math4.legacy.exception.util.LocalizedFormats;
025import org.apache.commons.math4.legacy.optim.ConvergenceChecker;
026import org.apache.commons.math4.legacy.optim.OptimizationData;
027import org.apache.commons.math4.legacy.optim.PointValuePair;
028import org.apache.commons.math4.legacy.optim.nonlinear.scalar.GoalType;
029import org.apache.commons.math4.legacy.optim.nonlinear.scalar.GradientMultivariateOptimizer;
030
031/**
032 * Non-linear conjugate gradient optimizer.
033 * <br>
034 * This class supports both the Fletcher-Reeves and the Polak-Ribière
035 * update formulas for the conjugate search directions.
036 * It also supports optional preconditioning.
037 * <br>
038 * Line search must be setup via {@link org.apache.commons.math4.legacy.optim.nonlinear.scalar.LineSearchTolerance}.
039 * <br>
040 * Constraints are not supported: the call to
041 * {@link #optimize(OptimizationData[]) optimize} will throw
042 * {@link MathUnsupportedOperationException} if bounds are passed to it.
043 *
044 * @since 2.0
045 */
046public class NonLinearConjugateGradientOptimizer
047    extends GradientMultivariateOptimizer {
048    /** Update formula for the beta parameter. */
049    private final Formula updateFormula;
050    /** Preconditioner (may be null). */
051    private final Preconditioner preconditioner;
052
053    /**
054     * Available choices of update formulas for the updating the parameter
055     * that is used to compute the successive conjugate search directions.
056     * For non-linear conjugate gradients, there are
057     * two formulas:
058     * <ul>
059     *   <li>Fletcher-Reeves formula</li>
060     *   <li>Polak-Ribière formula</li>
061     * </ul>
062     *
063     * On the one hand, the Fletcher-Reeves formula is guaranteed to converge
064     * if the start point is close enough of the optimum whether the
065     * Polak-Ribière formula may not converge in rare cases. On the
066     * other hand, the Polak-Ribière formula is often faster when it
067     * does converge. Polak-Ribière is often used.
068     *
069     * @since 2.0
070     */
071    public enum Formula {
072        /** Fletcher-Reeves formula. */
073        FLETCHER_REEVES,
074        /** Polak-Ribière formula. */
075        POLAK_RIBIERE
076    }
077
078    /**
079     * Constructor with default {@link IdentityPreconditioner preconditioner}.
080     *
081     * @param updateFormula formula to use for updating the &beta; parameter,
082     * must be one of {@link Formula#FLETCHER_REEVES} or
083     * {@link Formula#POLAK_RIBIERE}.
084     * @param checker Convergence checker.
085     *
086     * @since 3.3
087     */
088    public NonLinearConjugateGradientOptimizer(final Formula updateFormula,
089                                               ConvergenceChecker<PointValuePair> checker) {
090        this(updateFormula,
091             checker,
092             new IdentityPreconditioner());
093    }
094
095    /**
096     * @param updateFormula formula to use for updating the &beta; parameter,
097     * must be one of {@link Formula#FLETCHER_REEVES} or
098     * {@link Formula#POLAK_RIBIERE}.
099     * @param checker Convergence checker.
100     * @param preconditioner Preconditioner.
101     *
102     * @since 3.3
103     */
104    public NonLinearConjugateGradientOptimizer(final Formula updateFormula,
105                                               ConvergenceChecker<PointValuePair> checker,
106                                               final Preconditioner preconditioner) {
107        super(checker);
108
109        this.updateFormula = updateFormula;
110        this.preconditioner = preconditioner;
111    }
112
113    /**
114     * {@inheritDoc}
115     */
116    @Override
117    public PointValuePair optimize(OptimizationData... optData)
118        throws TooManyEvaluationsException {
119        // Set up base class and perform computation.
120        return super.optimize(optData);
121    }
122
123    /** {@inheritDoc} */
124    @Override
125    protected PointValuePair doOptimize() {
126        final ConvergenceChecker<PointValuePair> checker = getConvergenceChecker();
127        final double[] point = getStartPoint();
128        final GoalType goal = getGoalType();
129        final MultivariateFunction func = getObjectiveFunction();
130        final int n = point.length;
131        double[] r = computeObjectiveGradient(point);
132        if (goal == GoalType.MINIMIZE) {
133            for (int i = 0; i < n; i++) {
134                r[i] = -r[i];
135            }
136        }
137
138        // Initial search direction.
139        double[] steepestDescent = preconditioner.precondition(point, r);
140        double[] searchDirection = steepestDescent.clone();
141
142        double delta = 0;
143        for (int i = 0; i < n; ++i) {
144            delta += r[i] * searchDirection[i];
145        }
146
147        createLineSearch();
148
149        PointValuePair current = null;
150        while (true) {
151            incrementIterationCount();
152
153            final double objective = func.value(point);
154            PointValuePair previous = current;
155            current = new PointValuePair(point, objective);
156            if (previous != null &&
157                checker.converged(getIterations(), previous, current)) {
158                // We have found an optimum.
159                return current;
160            }
161
162            final double step = lineSearch(point, searchDirection).getPoint();
163
164            // Validate new point.
165            for (int i = 0; i < point.length; ++i) {
166                point[i] += step * searchDirection[i];
167            }
168
169            r = computeObjectiveGradient(point);
170            if (goal == GoalType.MINIMIZE) {
171                for (int i = 0; i < n; ++i) {
172                    r[i] = -r[i];
173                }
174            }
175
176            // Compute beta.
177            final double deltaOld = delta;
178            final double[] newSteepestDescent = preconditioner.precondition(point, r);
179            delta = 0;
180            for (int i = 0; i < n; ++i) {
181                delta += r[i] * newSteepestDescent[i];
182            }
183
184            final double beta;
185            switch (updateFormula) {
186            case FLETCHER_REEVES:
187                beta = delta / deltaOld;
188                break;
189            case POLAK_RIBIERE:
190                double deltaMid = 0;
191                for (int i = 0; i < r.length; ++i) {
192                    deltaMid += r[i] * steepestDescent[i];
193                }
194                beta = (delta - deltaMid) / deltaOld;
195                break;
196            default:
197                // Should never happen.
198                throw new MathInternalError();
199            }
200            steepestDescent = newSteepestDescent;
201
202            // Compute conjugate search direction.
203            if (getIterations() % n == 0 ||
204                beta < 0) {
205                // Break conjugation: reset search direction.
206                searchDirection = steepestDescent.clone();
207            } else {
208                // Compute new conjugate search direction.
209                for (int i = 0; i < n; ++i) {
210                    searchDirection[i] = steepestDescent[i] + beta * searchDirection[i];
211                }
212            }
213        }
214    }
215
216    /**
217     * {@inheritDoc}
218     */
219    @Override
220    protected void parseOptimizationData(OptimizationData... optData) {
221        // Allow base class to register its own data.
222        super.parseOptimizationData(optData);
223
224        checkParameters();
225    }
226
227    /** Default identity preconditioner. */
228    public static class IdentityPreconditioner implements Preconditioner {
229        /** {@inheritDoc} */
230        @Override
231        public double[] precondition(double[] variables, double[] r) {
232            return r.clone();
233        }
234    }
235
236    // Class is not used anymore (cf. MATH-1092). However, it might
237    // be interesting to create a class similar to "LineSearch", but
238    // that will take advantage that the model's gradient is available.
239//     /**
240//      * Internal class for line search.
241//      * <p>
242//      * The function represented by this class is the dot product of
243//      * the objective function gradient and the search direction. Its
244//      * value is zero when the gradient is orthogonal to the search
245//      * direction, i.e. when the objective function value is a local
246//      * extremum along the search direction.
247//      * </p>
248//      */
249//     private class LineSearchFunction implements UnivariateFunction {
250//         /** Current point. */
251//         private final double[] currentPoint;
252//         /** Search direction. */
253//         private final double[] searchDirection;
254
255//         /**
256//          * @param point Current point.
257//          * @param direction Search direction.
258//          */
259//         public LineSearchFunction(double[] point,
260//                                   double[] direction) {
261//             currentPoint = point.clone();
262//             searchDirection = direction.clone();
263//         }
264
265//         /** {@inheritDoc} */
266//         public double value(double x) {
267//             // current point in the search direction
268//             final double[] shiftedPoint = currentPoint.clone();
269//             for (int i = 0; i < shiftedPoint.length; ++i) {
270//                 shiftedPoint[i] += x * searchDirection[i];
271//             }
272
273//             // gradient of the objective function
274//             final double[] gradient = computeObjectiveGradient(shiftedPoint);
275
276//             // dot product with the search direction
277//             double dotProduct = 0;
278//             for (int i = 0; i < gradient.length; ++i) {
279//                 dotProduct += gradient[i] * searchDirection[i];
280//             }
281
282//             return dotProduct;
283//         }
284//     }
285
286    /**
287     * @throws MathUnsupportedOperationException if bounds were passed to the
288     * {@link #optimize(OptimizationData[]) optimize} method.
289     */
290    private void checkParameters() {
291        if (getLowerBound() != null ||
292            getUpperBound() != null) {
293            throw new MathUnsupportedOperationException(LocalizedFormats.CONSTRAINT);
294        }
295    }
296}