001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.math3.optim.nonlinear.scalar;
019
020import org.apache.commons.math3.analysis.MultivariateFunction;
021import org.apache.commons.math3.analysis.MultivariateVectorFunction;
022import org.apache.commons.math3.exception.DimensionMismatchException;
023import org.apache.commons.math3.linear.RealMatrix;
024
025/**
026 * This class converts
027 * {@link MultivariateVectorFunction vectorial objective functions} to
028 * {@link MultivariateFunction scalar objective functions}
029 * when the goal is to minimize them.
030 * <br/>
031 * This class is mostly used when the vectorial objective function represents
032 * a theoretical result computed from a point set applied to a model and
033 * the models point must be adjusted to fit the theoretical result to some
034 * reference observations. The observations may be obtained for example from
035 * physical measurements whether the model is built from theoretical
036 * considerations.
037 * <br/>
038 * This class computes a possibly weighted squared sum of the residuals, which is
039 * a scalar value. The residuals are the difference between the theoretical model
040 * (i.e. the output of the vectorial objective function) and the observations. The
041 * class implements the {@link MultivariateFunction} interface and can therefore be
042 * minimized by any optimizer supporting scalar objectives functions.This is one way
043 * to perform a least square estimation. There are other ways to do this without using
044 * this converter, as some optimization algorithms directly support vectorial objective
045 * functions.
046 * <br/>
047 * This class support combination of residuals with or without weights and correlations.
048  *
049 * @see MultivariateFunction
050 * @see MultivariateVectorFunction
051 * @since 2.0
052 */
053
054public class LeastSquaresConverter implements MultivariateFunction {
055    /** Underlying vectorial function. */
056    private final MultivariateVectorFunction function;
057    /** Observations to be compared to objective function to compute residuals. */
058    private final double[] observations;
059    /** Optional weights for the residuals. */
060    private final double[] weights;
061    /** Optional scaling matrix (weight and correlations) for the residuals. */
062    private final RealMatrix scale;
063
064    /**
065     * Builds a simple converter for uncorrelated residuals with identical
066     * weights.
067     *
068     * @param function vectorial residuals function to wrap
069     * @param observations observations to be compared to objective function to compute residuals
070     */
071    public LeastSquaresConverter(final MultivariateVectorFunction function,
072                                 final double[] observations) {
073        this.function     = function;
074        this.observations = observations.clone();
075        this.weights      = null;
076        this.scale        = null;
077    }
078
079    /**
080     * Builds a simple converter for uncorrelated residuals with the
081     * specified weights.
082     * <p>
083     * The scalar objective function value is computed as:
084     * <pre>
085     * objective = &sum;weight<sub>i</sub>(observation<sub>i</sub>-objective<sub>i</sub>)<sup>2</sup>
086     * </pre>
087     * </p>
088     * <p>
089     * Weights can be used for example to combine residuals with different standard
090     * deviations. As an example, consider a residuals array in which even elements
091     * are angular measurements in degrees with a 0.01&deg; standard deviation and
092     * odd elements are distance measurements in meters with a 15m standard deviation.
093     * In this case, the weights array should be initialized with value
094     * 1.0/(0.01<sup>2</sup>) in the even elements and 1.0/(15.0<sup>2</sup>) in the
095     * odd elements (i.e. reciprocals of variances).
096     * </p>
097     * <p>
098     * The array computed by the objective function, the observations array and the
099     * weights array must have consistent sizes or a {@link DimensionMismatchException}
100     * will be triggered while computing the scalar objective.
101     * </p>
102     *
103     * @param function vectorial residuals function to wrap
104     * @param observations observations to be compared to objective function to compute residuals
105     * @param weights weights to apply to the residuals
106     * @throws DimensionMismatchException if the observations vector and the weights
107     * vector dimensions do not match (objective function dimension is checked only when
108     * the {@link #value(double[])} method is called)
109     */
110    public LeastSquaresConverter(final MultivariateVectorFunction function,
111                                 final double[] observations,
112                                 final double[] weights) {
113        if (observations.length != weights.length) {
114            throw new DimensionMismatchException(observations.length, weights.length);
115        }
116        this.function     = function;
117        this.observations = observations.clone();
118        this.weights      = weights.clone();
119        this.scale        = null;
120    }
121
122    /**
123     * Builds a simple converter for correlated residuals with the
124     * specified weights.
125     * <p>
126     * The scalar objective function value is computed as:
127     * <pre>
128     * objective = y<sup>T</sup>y with y = scale&times;(observation-objective)
129     * </pre>
130     * </p>
131     * <p>
132     * The array computed by the objective function, the observations array and the
133     * the scaling matrix must have consistent sizes or a {@link DimensionMismatchException}
134     * will be triggered while computing the scalar objective.
135     * </p>
136     *
137     * @param function vectorial residuals function to wrap
138     * @param observations observations to be compared to objective function to compute residuals
139     * @param scale scaling matrix
140     * @throws DimensionMismatchException if the observations vector and the scale
141     * matrix dimensions do not match (objective function dimension is checked only when
142     * the {@link #value(double[])} method is called)
143     */
144    public LeastSquaresConverter(final MultivariateVectorFunction function,
145                                 final double[] observations,
146                                 final RealMatrix scale) {
147        if (observations.length != scale.getColumnDimension()) {
148            throw new DimensionMismatchException(observations.length, scale.getColumnDimension());
149        }
150        this.function     = function;
151        this.observations = observations.clone();
152        this.weights      = null;
153        this.scale        = scale.copy();
154    }
155
156    /** {@inheritDoc} */
157    public double value(final double[] point) {
158        // compute residuals
159        final double[] residuals = function.value(point);
160        if (residuals.length != observations.length) {
161            throw new DimensionMismatchException(residuals.length, observations.length);
162        }
163        for (int i = 0; i < residuals.length; ++i) {
164            residuals[i] -= observations[i];
165        }
166
167        // compute sum of squares
168        double sumSquares = 0;
169        if (weights != null) {
170            for (int i = 0; i < residuals.length; ++i) {
171                final double ri = residuals[i];
172                sumSquares +=  weights[i] * ri * ri;
173            }
174        } else if (scale != null) {
175            for (final double yi : scale.operate(residuals)) {
176                sumSquares += yi * yi;
177            }
178        } else {
179            for (final double ri : residuals) {
180                sumSquares += ri * ri;
181            }
182        }
183
184        return sumSquares;
185    }
186}