001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.commons.math3.fitting;
018    
019    import org.apache.commons.math3.optim.nonlinear.vector.MultivariateVectorOptimizer;
020    import org.apache.commons.math3.analysis.function.HarmonicOscillator;
021    import org.apache.commons.math3.exception.ZeroException;
022    import org.apache.commons.math3.exception.NumberIsTooSmallException;
023    import org.apache.commons.math3.exception.MathIllegalStateException;
024    import org.apache.commons.math3.exception.util.LocalizedFormats;
025    import org.apache.commons.math3.util.FastMath;
026    
027    /**
028     * Class that implements a curve fitting specialized for sinusoids.
029     *
030     * Harmonic fitting is a very simple case of curve fitting. The
031     * estimated coefficients are the amplitude a, the pulsation ω and
032     * the phase &phi;: <code>f (t) = a cos (&omega; t + &phi;)</code>. They are
033     * searched by a least square estimator initialized with a rough guess
034     * based on integrals.
035     *
036     * @version $Id: HarmonicFitter.java 1416643 2012-12-03 19:37:14Z tn $
037     * @since 2.0
038     */
039    public class HarmonicFitter extends CurveFitter<HarmonicOscillator.Parametric> {
040        /**
041         * Simple constructor.
042         * @param optimizer Optimizer to use for the fitting.
043         */
044        public HarmonicFitter(final MultivariateVectorOptimizer optimizer) {
045            super(optimizer);
046        }
047    
048        /**
049         * Fit an harmonic function to the observed points.
050         *
051         * @param initialGuess First guess values in the following order:
052         * <ul>
053         *  <li>Amplitude</li>
054         *  <li>Angular frequency</li>
055         *  <li>Phase</li>
056         * </ul>
057         * @return the parameters of the harmonic function that best fits the
058         * observed points (in the same order as above).
059         */
060        public double[] fit(double[] initialGuess) {
061            return fit(new HarmonicOscillator.Parametric(), initialGuess);
062        }
063    
064        /**
065         * Fit an harmonic function to the observed points.
066         * An initial guess will be automatically computed.
067         *
068         * @return the parameters of the harmonic function that best fits the
069         * observed points (see the other {@link #fit(double[]) fit} method.
070         * @throws NumberIsTooSmallException if the sample is too short for the
071         * the first guess to be computed.
072         * @throws ZeroException if the first guess cannot be computed because
073         * the abscissa range is zero.
074         */
075        public double[] fit() {
076            return fit((new ParameterGuesser(getObservations())).guess());
077        }
078    
079        /**
080         * This class guesses harmonic coefficients from a sample.
081         * <p>The algorithm used to guess the coefficients is as follows:</p>
082         *
083         * <p>We know f (t) at some sampling points t<sub>i</sub> and want to find a,
084         * &omega; and &phi; such that f (t) = a cos (&omega; t + &phi;).
085         * </p>
086         *
087         * <p>From the analytical expression, we can compute two primitives :
088         * <pre>
089         *     If2  (t) = &int; f<sup>2</sup>  = a<sup>2</sup> &times; [t + S (t)] / 2
090         *     If'2 (t) = &int; f'<sup>2</sup> = a<sup>2</sup> &omega;<sup>2</sup> &times; [t - S (t)] / 2
091         *     where S (t) = sin (2 (&omega; t + &phi;)) / (2 &omega;)
092         * </pre>
093         * </p>
094         *
095         * <p>We can remove S between these expressions :
096         * <pre>
097         *     If'2 (t) = a<sup>2</sup> &omega;<sup>2</sup> t - &omega;<sup>2</sup> If2 (t)
098         * </pre>
099         * </p>
100         *
101         * <p>The preceding expression shows that If'2 (t) is a linear
102         * combination of both t and If2 (t): If'2 (t) = A &times; t + B &times; If2 (t)
103         * </p>
104         *
105         * <p>From the primitive, we can deduce the same form for definite
106         * integrals between t<sub>1</sub> and t<sub>i</sub> for each t<sub>i</sub> :
107         * <pre>
108         *   If2 (t<sub>i</sub>) - If2 (t<sub>1</sub>) = A &times; (t<sub>i</sub> - t<sub>1</sub>) + B &times; (If2 (t<sub>i</sub>) - If2 (t<sub>1</sub>))
109         * </pre>
110         * </p>
111         *
112         * <p>We can find the coefficients A and B that best fit the sample
113         * to this linear expression by computing the definite integrals for
114         * each sample points.
115         * </p>
116         *
117         * <p>For a bilinear expression z (x<sub>i</sub>, y<sub>i</sub>) = A &times; x<sub>i</sub> + B &times; y<sub>i</sub>, the
118         * coefficients A and B that minimize a least square criterion
119         * &sum; (z<sub>i</sub> - z (x<sub>i</sub>, y<sub>i</sub>))<sup>2</sup> are given by these expressions:</p>
120         * <pre>
121         *
122         *         &sum;y<sub>i</sub>y<sub>i</sub> &sum;x<sub>i</sub>z<sub>i</sub> - &sum;x<sub>i</sub>y<sub>i</sub> &sum;y<sub>i</sub>z<sub>i</sub>
123         *     A = ------------------------
124         *         &sum;x<sub>i</sub>x<sub>i</sub> &sum;y<sub>i</sub>y<sub>i</sub> - &sum;x<sub>i</sub>y<sub>i</sub> &sum;x<sub>i</sub>y<sub>i</sub>
125         *
126         *         &sum;x<sub>i</sub>x<sub>i</sub> &sum;y<sub>i</sub>z<sub>i</sub> - &sum;x<sub>i</sub>y<sub>i</sub> &sum;x<sub>i</sub>z<sub>i</sub>
127         *     B = ------------------------
128         *         &sum;x<sub>i</sub>x<sub>i</sub> &sum;y<sub>i</sub>y<sub>i</sub> - &sum;x<sub>i</sub>y<sub>i</sub> &sum;x<sub>i</sub>y<sub>i</sub>
129         * </pre>
130         * </p>
131         *
132         *
133         * <p>In fact, we can assume both a and &omega; are positive and
134         * compute them directly, knowing that A = a<sup>2</sup> &omega;<sup>2</sup> and that
135         * B = - &omega;<sup>2</sup>. The complete algorithm is therefore:</p>
136         * <pre>
137         *
138         * for each t<sub>i</sub> from t<sub>1</sub> to t<sub>n-1</sub>, compute:
139         *   f  (t<sub>i</sub>)
140         *   f' (t<sub>i</sub>) = (f (t<sub>i+1</sub>) - f(t<sub>i-1</sub>)) / (t<sub>i+1</sub> - t<sub>i-1</sub>)
141         *   x<sub>i</sub> = t<sub>i</sub> - t<sub>1</sub>
142         *   y<sub>i</sub> = &int; f<sup>2</sup> from t<sub>1</sub> to t<sub>i</sub>
143         *   z<sub>i</sub> = &int; f'<sup>2</sup> from t<sub>1</sub> to t<sub>i</sub>
144         *   update the sums &sum;x<sub>i</sub>x<sub>i</sub>, &sum;y<sub>i</sub>y<sub>i</sub>, &sum;x<sub>i</sub>y<sub>i</sub>, &sum;x<sub>i</sub>z<sub>i</sub> and &sum;y<sub>i</sub>z<sub>i</sub>
145         * end for
146         *
147         *            |--------------------------
148         *         \  | &sum;y<sub>i</sub>y<sub>i</sub> &sum;x<sub>i</sub>z<sub>i</sub> - &sum;x<sub>i</sub>y<sub>i</sub> &sum;y<sub>i</sub>z<sub>i</sub>
149         * a     =  \ | ------------------------
150         *           \| &sum;x<sub>i</sub>y<sub>i</sub> &sum;x<sub>i</sub>z<sub>i</sub> - &sum;x<sub>i</sub>x<sub>i</sub> &sum;y<sub>i</sub>z<sub>i</sub>
151         *
152         *
153         *            |--------------------------
154         *         \  | &sum;x<sub>i</sub>y<sub>i</sub> &sum;x<sub>i</sub>z<sub>i</sub> - &sum;x<sub>i</sub>x<sub>i</sub> &sum;y<sub>i</sub>z<sub>i</sub>
155         * &omega;     =  \ | ------------------------
156         *           \| &sum;x<sub>i</sub>x<sub>i</sub> &sum;y<sub>i</sub>y<sub>i</sub> - &sum;x<sub>i</sub>y<sub>i</sub> &sum;x<sub>i</sub>y<sub>i</sub>
157         *
158         * </pre>
159         * </p>
160         *
161         * <p>Once we know &omega;, we can compute:
162         * <pre>
163         *    fc = &omega; f (t) cos (&omega; t) - f' (t) sin (&omega; t)
164         *    fs = &omega; f (t) sin (&omega; t) + f' (t) cos (&omega; t)
165         * </pre>
166         * </p>
167         *
168         * <p>It appears that <code>fc = a &omega; cos (&phi;)</code> and
169         * <code>fs = -a &omega; sin (&phi;)</code>, so we can use these
170         * expressions to compute &phi;. The best estimate over the sample is
171         * given by averaging these expressions.
172         * </p>
173         *
174         * <p>Since integrals and means are involved in the preceding
175         * estimations, these operations run in O(n) time, where n is the
176         * number of measurements.</p>
177         */
178        public static class ParameterGuesser {
179            /** Amplitude. */
180            private final double a;
181            /** Angular frequency. */
182            private final double omega;
183            /** Phase. */
184            private final double phi;
185    
186            /**
187             * Simple constructor.
188             *
189             * @param observations Sampled observations.
190             * @throws NumberIsTooSmallException if the sample is too short.
191             * @throws ZeroException if the abscissa range is zero.
192             * @throws MathIllegalStateException when the guessing procedure cannot
193             * produce sensible results.
194             */
195            public ParameterGuesser(WeightedObservedPoint[] observations) {
196                if (observations.length < 4) {
197                    throw new NumberIsTooSmallException(LocalizedFormats.INSUFFICIENT_OBSERVED_POINTS_IN_SAMPLE,
198                                                        observations.length, 4, true);
199                }
200    
201                final WeightedObservedPoint[] sorted = sortObservations(observations);
202    
203                final double aOmega[] = guessAOmega(sorted);
204                a = aOmega[0];
205                omega = aOmega[1];
206    
207                phi = guessPhi(sorted);
208            }
209    
210            /**
211             * Gets an estimation of the parameters.
212             *
213             * @return the guessed parameters, in the following order:
214             * <ul>
215             *  <li>Amplitude</li>
216             *  <li>Angular frequency</li>
217             *  <li>Phase</li>
218             * </ul>
219             */
220            public double[] guess() {
221                return new double[] { a, omega, phi };
222            }
223    
224            /**
225             * Sort the observations with respect to the abscissa.
226             *
227             * @param unsorted Input observations.
228             * @return the input observations, sorted.
229             */
230            private WeightedObservedPoint[] sortObservations(WeightedObservedPoint[] unsorted) {
231                final WeightedObservedPoint[] observations = unsorted.clone();
232    
233                // Since the samples are almost always already sorted, this
234                // method is implemented as an insertion sort that reorders the
235                // elements in place. Insertion sort is very efficient in this case.
236                WeightedObservedPoint curr = observations[0];
237                for (int j = 1; j < observations.length; ++j) {
238                    WeightedObservedPoint prec = curr;
239                    curr = observations[j];
240                    if (curr.getX() < prec.getX()) {
241                        // the current element should be inserted closer to the beginning
242                        int i = j - 1;
243                        WeightedObservedPoint mI = observations[i];
244                        while ((i >= 0) && (curr.getX() < mI.getX())) {
245                            observations[i + 1] = mI;
246                            if (i-- != 0) {
247                                mI = observations[i];
248                            }
249                        }
250                        observations[i + 1] = curr;
251                        curr = observations[j];
252                    }
253                }
254    
255                return observations;
256            }
257    
258            /**
259             * Estimate a first guess of the amplitude and angular frequency.
260             * This method assumes that the {@link #sortObservations()} method
261             * has been called previously.
262             *
263             * @param observations Observations, sorted w.r.t. abscissa.
264             * @throws ZeroException if the abscissa range is zero.
265             * @throws MathIllegalStateException when the guessing procedure cannot
266             * produce sensible results.
267             * @return the guessed amplitude (at index 0) and circular frequency
268             * (at index 1).
269             */
270            private double[] guessAOmega(WeightedObservedPoint[] observations) {
271                final double[] aOmega = new double[2];
272    
273                // initialize the sums for the linear model between the two integrals
274                double sx2 = 0;
275                double sy2 = 0;
276                double sxy = 0;
277                double sxz = 0;
278                double syz = 0;
279    
280                double currentX = observations[0].getX();
281                double currentY = observations[0].getY();
282                double f2Integral = 0;
283                double fPrime2Integral = 0;
284                final double startX = currentX;
285                for (int i = 1; i < observations.length; ++i) {
286                    // one step forward
287                    final double previousX = currentX;
288                    final double previousY = currentY;
289                    currentX = observations[i].getX();
290                    currentY = observations[i].getY();
291    
292                    // update the integrals of f<sup>2</sup> and f'<sup>2</sup>
293                    // considering a linear model for f (and therefore constant f')
294                    final double dx = currentX - previousX;
295                    final double dy = currentY - previousY;
296                    final double f2StepIntegral =
297                        dx * (previousY * previousY + previousY * currentY + currentY * currentY) / 3;
298                    final double fPrime2StepIntegral = dy * dy / dx;
299    
300                    final double x = currentX - startX;
301                    f2Integral += f2StepIntegral;
302                    fPrime2Integral += fPrime2StepIntegral;
303    
304                    sx2 += x * x;
305                    sy2 += f2Integral * f2Integral;
306                    sxy += x * f2Integral;
307                    sxz += x * fPrime2Integral;
308                    syz += f2Integral * fPrime2Integral;
309                }
310    
311                // compute the amplitude and pulsation coefficients
312                double c1 = sy2 * sxz - sxy * syz;
313                double c2 = sxy * sxz - sx2 * syz;
314                double c3 = sx2 * sy2 - sxy * sxy;
315                if ((c1 / c2 < 0) || (c2 / c3 < 0)) {
316                    final int last = observations.length - 1;
317                    // Range of the observations, assuming that the
318                    // observations are sorted.
319                    final double xRange = observations[last].getX() - observations[0].getX();
320                    if (xRange == 0) {
321                        throw new ZeroException();
322                    }
323                    aOmega[1] = 2 * Math.PI / xRange;
324    
325                    double yMin = Double.POSITIVE_INFINITY;
326                    double yMax = Double.NEGATIVE_INFINITY;
327                    for (int i = 1; i < observations.length; ++i) {
328                        final double y = observations[i].getY();
329                        if (y < yMin) {
330                            yMin = y;
331                        }
332                        if (y > yMax) {
333                            yMax = y;
334                        }
335                    }
336                    aOmega[0] = 0.5 * (yMax - yMin);
337                } else {
338                    if (c2 == 0) {
339                        // In some ill-conditioned cases (cf. MATH-844), the guesser
340                        // procedure cannot produce sensible results.
341                        throw new MathIllegalStateException(LocalizedFormats.ZERO_DENOMINATOR);
342                    }
343    
344                    aOmega[0] = FastMath.sqrt(c1 / c2);
345                    aOmega[1] = FastMath.sqrt(c2 / c3);
346                }
347    
348                return aOmega;
349            }
350    
351            /**
352             * Estimate a first guess of the phase.
353             *
354             * @param observations Observations, sorted w.r.t. abscissa.
355             * @return the guessed phase.
356             */
357            private double guessPhi(WeightedObservedPoint[] observations) {
358                // initialize the means
359                double fcMean = 0;
360                double fsMean = 0;
361    
362                double currentX = observations[0].getX();
363                double currentY = observations[0].getY();
364                for (int i = 1; i < observations.length; ++i) {
365                    // one step forward
366                    final double previousX = currentX;
367                    final double previousY = currentY;
368                    currentX = observations[i].getX();
369                    currentY = observations[i].getY();
370                    final double currentYPrime = (currentY - previousY) / (currentX - previousX);
371    
372                    double omegaX = omega * currentX;
373                    double cosine = FastMath.cos(omegaX);
374                    double sine = FastMath.sin(omegaX);
375                    fcMean += omega * currentY * cosine - currentYPrime * sine;
376                    fsMean += omega * currentY * sine + currentYPrime * cosine;
377                }
378    
379                return FastMath.atan2(-fsMean, fcMean);
380            }
381        }
382    }