001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.math4.optim.nonlinear.scalar.gradient;
019
020import org.apache.commons.math4.exception.MathInternalError;
021import org.apache.commons.math4.exception.MathUnsupportedOperationException;
022import org.apache.commons.math4.exception.TooManyEvaluationsException;
023import org.apache.commons.math4.exception.util.LocalizedFormats;
024import org.apache.commons.math4.optim.ConvergenceChecker;
025import org.apache.commons.math4.optim.OptimizationData;
026import org.apache.commons.math4.optim.PointValuePair;
027import org.apache.commons.math4.optim.nonlinear.scalar.GoalType;
028import org.apache.commons.math4.optim.nonlinear.scalar.GradientMultivariateOptimizer;
029import org.apache.commons.math4.optim.nonlinear.scalar.LineSearch;
030
031
032/**
033 * Non-linear conjugate gradient optimizer.
034 * <br>
035 * This class supports both the Fletcher-Reeves and the Polak-Ribière
036 * update formulas for the conjugate search directions.
037 * It also supports optional preconditioning.
038 * <br>
039 * Constraints are not supported: the call to
040 * {@link #optimize(OptimizationData[]) optimize} will throw
041 * {@link MathUnsupportedOperationException} if bounds are passed to it.
042 *
043 * @since 2.0
044 */
045public class NonLinearConjugateGradientOptimizer
046    extends GradientMultivariateOptimizer {
047    /** Update formula for the beta parameter. */
048    private final Formula updateFormula;
049    /** Preconditioner (may be null). */
050    private final Preconditioner preconditioner;
051    /** Line search algorithm. */
052    private final LineSearch line;
053
054    /**
055     * Available choices of update formulas for the updating the parameter
056     * that is used to compute the successive conjugate search directions.
057     * For non-linear conjugate gradients, there are
058     * two formulas:
059     * <ul>
060     *   <li>Fletcher-Reeves formula</li>
061     *   <li>Polak-Ribière formula</li>
062     * </ul>
063     *
064     * On the one hand, the Fletcher-Reeves formula is guaranteed to converge
065     * if the start point is close enough of the optimum whether the
066     * Polak-Ribière formula may not converge in rare cases. On the
067     * other hand, the Polak-Ribière formula is often faster when it
068     * does converge. Polak-Ribière is often used.
069     *
070     * @since 2.0
071     */
072    public enum Formula {
073        /** Fletcher-Reeves formula. */
074        FLETCHER_REEVES,
075        /** Polak-Ribière formula. */
076        POLAK_RIBIERE
077    }
078
079    /**
080     * Constructor with default tolerances for the line search (1e-8) and
081     * {@link IdentityPreconditioner preconditioner}.
082     *
083     * @param updateFormula formula to use for updating the &beta; parameter,
084     * must be one of {@link Formula#FLETCHER_REEVES} or
085     * {@link Formula#POLAK_RIBIERE}.
086     * @param checker Convergence checker.
087     */
088    public NonLinearConjugateGradientOptimizer(final Formula updateFormula,
089                                               ConvergenceChecker<PointValuePair> checker) {
090        this(updateFormula,
091             checker,
092             1e-8,
093             1e-8,
094             1e-8,
095             new IdentityPreconditioner());
096    }
097
098    /**
099     * Constructor with default {@link IdentityPreconditioner preconditioner}.
100     *
101     * @param updateFormula formula to use for updating the &beta; parameter,
102     * must be one of {@link Formula#FLETCHER_REEVES} or
103     * {@link Formula#POLAK_RIBIERE}.
104     * @param checker Convergence checker.
105     * @param relativeTolerance Relative threshold for line search.
106     * @param absoluteTolerance Absolute threshold for line search.
107     * @param initialBracketingRange Extent of the initial interval used to
108     * find an interval that brackets the optimum in order to perform the
109     * line search.
110     *
111     * @see LineSearch#LineSearch(org.apache.commons.math4.optim.nonlinear.scalar.MultivariateOptimizer,double,double,double)
112     * @since 3.3
113     */
114    public NonLinearConjugateGradientOptimizer(final Formula updateFormula,
115                                               ConvergenceChecker<PointValuePair> checker,
116                                               double relativeTolerance,
117                                               double absoluteTolerance,
118                                               double initialBracketingRange) {
119        this(updateFormula,
120             checker,
121             relativeTolerance,
122             absoluteTolerance,
123             initialBracketingRange,
124             new IdentityPreconditioner());
125    }
126
127    /**
128     * @param updateFormula formula to use for updating the &beta; parameter,
129     * must be one of {@link Formula#FLETCHER_REEVES} or
130     * {@link Formula#POLAK_RIBIERE}.
131     * @param checker Convergence checker.
132     * @param preconditioner Preconditioner.
133     * @param relativeTolerance Relative threshold for line search.
134     * @param absoluteTolerance Absolute threshold for line search.
135     * @param initialBracketingRange Extent of the initial interval used to
136     * find an interval that brackets the optimum in order to perform the
137     * line search.
138     *
139     * @see LineSearch#LineSearch(org.apache.commons.math4.optim.nonlinear.scalar.MultivariateOptimizer, double, double, double)
140     * @since 3.3
141     */
142    public NonLinearConjugateGradientOptimizer(final Formula updateFormula,
143                                               ConvergenceChecker<PointValuePair> checker,
144                                               double relativeTolerance,
145                                               double absoluteTolerance,
146                                               double initialBracketingRange,
147                                               final Preconditioner preconditioner) {
148        super(checker);
149
150        this.updateFormula = updateFormula;
151        this.preconditioner = preconditioner;
152        line = new LineSearch(this,
153                              relativeTolerance,
154                              absoluteTolerance,
155                              initialBracketingRange);
156    }
157
158    /**
159     * {@inheritDoc}
160     */
161    @Override
162    public PointValuePair optimize(OptimizationData... optData)
163        throws TooManyEvaluationsException {
164        // Set up base class and perform computation.
165        return super.optimize(optData);
166    }
167
168    /** {@inheritDoc} */
169    @Override
170    protected PointValuePair doOptimize() {
171        final ConvergenceChecker<PointValuePair> checker = getConvergenceChecker();
172        final double[] point = getStartPoint();
173        final GoalType goal = getGoalType();
174        final int n = point.length;
175        double[] r = computeObjectiveGradient(point);
176        if (goal == GoalType.MINIMIZE) {
177            for (int i = 0; i < n; i++) {
178                r[i] = -r[i];
179            }
180        }
181
182        // Initial search direction.
183        double[] steepestDescent = preconditioner.precondition(point, r);
184        double[] searchDirection = steepestDescent.clone();
185
186        double delta = 0;
187        for (int i = 0; i < n; ++i) {
188            delta += r[i] * searchDirection[i];
189        }
190
191        PointValuePair current = null;
192        while (true) {
193            incrementIterationCount();
194
195            final double objective = computeObjectiveValue(point);
196            PointValuePair previous = current;
197            current = new PointValuePair(point, objective);
198            if (previous != null && checker.converged(getIterations(), previous, current)) {
199                // We have found an optimum.
200                return current;
201            }
202
203            final double step = line.search(point, searchDirection).getPoint();
204
205            // Validate new point.
206            for (int i = 0; i < point.length; ++i) {
207                point[i] += step * searchDirection[i];
208            }
209
210            r = computeObjectiveGradient(point);
211            if (goal == GoalType.MINIMIZE) {
212                for (int i = 0; i < n; ++i) {
213                    r[i] = -r[i];
214                }
215            }
216
217            // Compute beta.
218            final double deltaOld = delta;
219            final double[] newSteepestDescent = preconditioner.precondition(point, r);
220            delta = 0;
221            for (int i = 0; i < n; ++i) {
222                delta += r[i] * newSteepestDescent[i];
223            }
224
225            final double beta;
226            switch (updateFormula) {
227            case FLETCHER_REEVES:
228                beta = delta / deltaOld;
229                break;
230            case POLAK_RIBIERE:
231                double deltaMid = 0;
232                for (int i = 0; i < r.length; ++i) {
233                    deltaMid += r[i] * steepestDescent[i];
234                }
235                beta = (delta - deltaMid) / deltaOld;
236                break;
237            default:
238                // Should never happen.
239                throw new MathInternalError();
240            }
241            steepestDescent = newSteepestDescent;
242
243            // Compute conjugate search direction.
244            if (getIterations() % n == 0 ||
245                beta < 0) {
246                // Break conjugation: reset search direction.
247                searchDirection = steepestDescent.clone();
248            } else {
249                // Compute new conjugate search direction.
250                for (int i = 0; i < n; ++i) {
251                    searchDirection[i] = steepestDescent[i] + beta * searchDirection[i];
252                }
253            }
254        }
255    }
256
257    /**
258     * {@inheritDoc}
259     */
260    @Override
261    protected void parseOptimizationData(OptimizationData... optData) {
262        // Allow base class to register its own data.
263        super.parseOptimizationData(optData);
264
265        checkParameters();
266    }
267
268    /** Default identity preconditioner. */
269    public static class IdentityPreconditioner implements Preconditioner {
270        /** {@inheritDoc} */
271        @Override
272        public double[] precondition(double[] variables, double[] r) {
273            return r.clone();
274        }
275    }
276
277    // Class is not used anymore (cf. MATH-1092). However, it might
278    // be interesting to create a class similar to "LineSearch", but
279    // that will take advantage that the model's gradient is available.
280//     /**
281//      * Internal class for line search.
282//      * <p>
283//      * The function represented by this class is the dot product of
284//      * the objective function gradient and the search direction. Its
285//      * value is zero when the gradient is orthogonal to the search
286//      * direction, i.e. when the objective function value is a local
287//      * extremum along the search direction.
288//      * </p>
289//      */
290//     private class LineSearchFunction implements UnivariateFunction {
291//         /** Current point. */
292//         private final double[] currentPoint;
293//         /** Search direction. */
294//         private final double[] searchDirection;
295
296//         /**
297//          * @param point Current point.
298//          * @param direction Search direction.
299//          */
300//         public LineSearchFunction(double[] point,
301//                                   double[] direction) {
302//             currentPoint = point.clone();
303//             searchDirection = direction.clone();
304//         }
305
306//         /** {@inheritDoc} */
307//         public double value(double x) {
308//             // current point in the search direction
309//             final double[] shiftedPoint = currentPoint.clone();
310//             for (int i = 0; i < shiftedPoint.length; ++i) {
311//                 shiftedPoint[i] += x * searchDirection[i];
312//             }
313
314//             // gradient of the objective function
315//             final double[] gradient = computeObjectiveGradient(shiftedPoint);
316
317//             // dot product with the search direction
318//             double dotProduct = 0;
319//             for (int i = 0; i < gradient.length; ++i) {
320//                 dotProduct += gradient[i] * searchDirection[i];
321//             }
322
323//             return dotProduct;
324//         }
325//     }
326
327    /**
328     * @throws MathUnsupportedOperationException if bounds were passed to the
329     * {@link #optimize(OptimizationData[]) optimize} method.
330     */
331    private void checkParameters() {
332        if (getLowerBound() != null ||
333            getUpperBound() != null) {
334            throw new MathUnsupportedOperationException(LocalizedFormats.CONSTRAINT);
335        }
336    }
337}