001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.commons.math.analysis.interpolation;
018    
019    import java.io.Serializable;
020    import java.util.Arrays;
021    
022    import org.apache.commons.math.analysis.polynomials.PolynomialSplineFunction;
023    import org.apache.commons.math.exception.NotPositiveException;
024    import org.apache.commons.math.exception.OutOfRangeException;
025    import org.apache.commons.math.exception.DimensionMismatchException;
026    import org.apache.commons.math.exception.NoDataException;
027    import org.apache.commons.math.exception.NumberIsTooSmallException;
028    import org.apache.commons.math.exception.util.LocalizedFormats;
029    import org.apache.commons.math.util.FastMath;
030    import org.apache.commons.math.util.MathUtils;
031    import org.apache.commons.math.util.MathArrays;
032    
033    /**
034     * Implements the <a href="http://en.wikipedia.org/wiki/Local_regression">
035     * Local Regression Algorithm</a> (also Loess, Lowess) for interpolation of
036     * real univariate functions.
037     * <p/>
038     * For reference, see
039     * <a href="http://www.math.tau.ac.il/~yekutiel/MA seminar/Cleveland 1979.pdf">
040     * William S. Cleveland - Robust Locally Weighted Regression and Smoothing
041     * Scatterplots</a>
042     * <p/>
043     * This class implements both the loess method and serves as an interpolation
044     * adapter to it, allowing one to build a spline on the obtained loess fit.
045     *
046     * @version $Id: LoessInterpolator.java 1182134 2011-10-11 22:55:08Z erans $
047     * @since 2.0
048     */
049    public class LoessInterpolator
050            implements UnivariateRealInterpolator, Serializable {
051        /** Default value of the bandwidth parameter. */
052        public static final double DEFAULT_BANDWIDTH = 0.3;
053        /** Default value of the number of robustness iterations. */
054        public static final int DEFAULT_ROBUSTNESS_ITERS = 2;
055        /**
056         * Default value for accuracy.
057         * @since 2.1
058         */
059        public static final double DEFAULT_ACCURACY = 1e-12;
060        /** serializable version identifier. */
061        private static final long serialVersionUID = 5204927143605193821L;
062        /**
063         * The bandwidth parameter: when computing the loess fit at
064         * a particular point, this fraction of source points closest
065         * to the current point is taken into account for computing
066         * a least-squares regression.
067         * <p/>
068         * A sensible value is usually 0.25 to 0.5.
069         */
070        private final double bandwidth;
071        /**
072         * The number of robustness iterations parameter: this many
073         * robustness iterations are done.
074         * <p/>
075         * A sensible value is usually 0 (just the initial fit without any
076         * robustness iterations) to 4.
077         */
078        private final int robustnessIters;
079        /**
080         * If the median residual at a certain robustness iteration
081         * is less than this amount, no more iterations are done.
082         */
083        private final double accuracy;
084    
085        /**
086         * Constructs a new {@link LoessInterpolator}
087         * with a bandwidth of {@link #DEFAULT_BANDWIDTH},
088         * {@link #DEFAULT_ROBUSTNESS_ITERS} robustness iterations
089         * and an accuracy of {#link #DEFAULT_ACCURACY}.
090         * See {@link #LoessInterpolator(double, int, double)} for an explanation of
091         * the parameters.
092         */
093        public LoessInterpolator() {
094            this.bandwidth = DEFAULT_BANDWIDTH;
095            this.robustnessIters = DEFAULT_ROBUSTNESS_ITERS;
096            this.accuracy = DEFAULT_ACCURACY;
097        }
098    
099        /**
100         * Construct a new {@link LoessInterpolator}
101         * with given bandwidth and number of robustness iterations.
102         * <p>
103         * Calling this constructor is equivalent to calling {link {@link
104         * #LoessInterpolator(double, int, double) LoessInterpolator(bandwidth,
105         * robustnessIters, LoessInterpolator.DEFAULT_ACCURACY)}
106         * </p>
107         *
108         * @param bandwidth  when computing the loess fit at
109         * a particular point, this fraction of source points closest
110         * to the current point is taken into account for computing
111         * a least-squares regression.</br>
112         * A sensible value is usually 0.25 to 0.5, the default value is
113         * {@link #DEFAULT_BANDWIDTH}.
114         * @param robustnessIters This many robustness iterations are done.</br>
115         * A sensible value is usually 0 (just the initial fit without any
116         * robustness iterations) to 4, the default value is
117         * {@link #DEFAULT_ROBUSTNESS_ITERS}.
118    
119         * @see #LoessInterpolator(double, int, double)
120         */
121        public LoessInterpolator(double bandwidth, int robustnessIters) {
122            this(bandwidth, robustnessIters, DEFAULT_ACCURACY);
123        }
124    
125        /**
126         * Construct a new {@link LoessInterpolator}
127         * with given bandwidth, number of robustness iterations and accuracy.
128         *
129         * @param bandwidth  when computing the loess fit at
130         * a particular point, this fraction of source points closest
131         * to the current point is taken into account for computing
132         * a least-squares regression.</br>
133         * A sensible value is usually 0.25 to 0.5, the default value is
134         * {@link #DEFAULT_BANDWIDTH}.
135         * @param robustnessIters This many robustness iterations are done.</br>
136         * A sensible value is usually 0 (just the initial fit without any
137         * robustness iterations) to 4, the default value is
138         * {@link #DEFAULT_ROBUSTNESS_ITERS}.
139         * @param accuracy If the median residual at a certain robustness iteration
140         * is less than this amount, no more iterations are done.
141         * @throws OutOfRangeException if bandwidth does not lie in the interval [0,1].
142         * @throws NotPositiveException if {@code robustnessIters} is negative.
143         * @see #LoessInterpolator(double, int)
144         * @since 2.1
145         */
146        public LoessInterpolator(double bandwidth, int robustnessIters, double accuracy) {
147            if (bandwidth < 0 ||
148                bandwidth > 1) {
149                throw new OutOfRangeException(LocalizedFormats.BANDWIDTH, bandwidth, 0, 1);
150            }
151            this.bandwidth = bandwidth;
152            if (robustnessIters < 0) {
153                throw new NotPositiveException(LocalizedFormats.ROBUSTNESS_ITERATIONS, robustnessIters);
154            }
155            this.robustnessIters = robustnessIters;
156            this.accuracy = accuracy;
157        }
158    
159        /**
160         * Compute an interpolating function by performing a loess fit
161         * on the data at the original abscissae and then building a cubic spline
162         * with a
163         * {@link org.apache.commons.math.analysis.interpolation.SplineInterpolator}
164         * on the resulting fit.
165         *
166         * @param xval the arguments for the interpolation points
167         * @param yval the values for the interpolation points
168         * @return A cubic spline built upon a loess fit to the data at the original abscissae
169         * @throws org.apache.commons.math.exception.NonMonotonicSequenceException
170         * if {@code xval} not sorted in strictly increasing order.
171         * @throws DimensionMismatchException if {@code xval} and {@code yval} have
172         * different sizes.
173         * @throws NoDataException if {@code xval} or {@code yval} has zero size.
174         * @throws org.apache.commons.math.exception.NotFiniteNumberException if
175         * any of the arguments and values are not finite real numbers.
176         * @throws NumberIsTooSmallException if the bandwidth is too small to
177         * accomodate the size of the input data (i.e. the bandwidth must be
178         * larger than 2/n).
179         */
180        public final PolynomialSplineFunction interpolate(final double[] xval, final double[] yval) {
181            return new SplineInterpolator().interpolate(xval, smooth(xval, yval));
182        }
183    
184        /**
185         * Compute a weighted loess fit on the data at the original abscissae.
186         *
187         * @param xval Arguments for the interpolation points.
188         * @param yval Values for the interpolation points.
189         * @param weights point weights: coefficients by which the robustness weight
190         * of a point is multiplied.
191         * @return the values of the loess fit at corresponding original abscissae.
192         * @throws org.apache.commons.math.exception.NonMonotonicSequenceException
193         * if {@code xval} not sorted in strictly increasing order.
194         * @throws DimensionMismatchException if {@code xval} and {@code yval} have
195         * different sizes.
196         * @throws NoDataException if {@code xval} or {@code yval} has zero size.
197         * @throws org.apache.commons.math.exception.NotFiniteNumberException if
198         * any of the arguments and values are not finite real numbers.
199         * @throws NumberIsTooSmallException if the bandwidth is too small to
200         * accomodate the size of the input data (i.e. the bandwidth must be
201         * larger than 2/n).
202         * @since 2.1
203         */
204        public final double[] smooth(final double[] xval, final double[] yval,
205                                     final double[] weights)  {
206            if (xval.length != yval.length) {
207                throw new DimensionMismatchException(xval.length, yval.length);
208            }
209    
210            final int n = xval.length;
211    
212            if (n == 0) {
213                throw new NoDataException();
214            }
215    
216            checkAllFiniteReal(xval);
217            checkAllFiniteReal(yval);
218            checkAllFiniteReal(weights);
219    
220            MathArrays.checkOrder(xval);
221    
222            if (n == 1) {
223                return new double[]{yval[0]};
224            }
225    
226            if (n == 2) {
227                return new double[]{yval[0], yval[1]};
228            }
229    
230            int bandwidthInPoints = (int) (bandwidth * n);
231    
232            if (bandwidthInPoints < 2) {
233                throw new NumberIsTooSmallException(LocalizedFormats.BANDWIDTH,
234                                                    bandwidthInPoints, 2, true);
235            }
236    
237            final double[] res = new double[n];
238    
239            final double[] residuals = new double[n];
240            final double[] sortedResiduals = new double[n];
241    
242            final double[] robustnessWeights = new double[n];
243    
244            // Do an initial fit and 'robustnessIters' robustness iterations.
245            // This is equivalent to doing 'robustnessIters+1' robustness iterations
246            // starting with all robustness weights set to 1.
247            Arrays.fill(robustnessWeights, 1);
248    
249            for (int iter = 0; iter <= robustnessIters; ++iter) {
250                final int[] bandwidthInterval = {0, bandwidthInPoints - 1};
251                // At each x, compute a local weighted linear regression
252                for (int i = 0; i < n; ++i) {
253                    final double x = xval[i];
254    
255                    // Find out the interval of source points on which
256                    // a regression is to be made.
257                    if (i > 0) {
258                        updateBandwidthInterval(xval, weights, i, bandwidthInterval);
259                    }
260    
261                    final int ileft = bandwidthInterval[0];
262                    final int iright = bandwidthInterval[1];
263    
264                    // Compute the point of the bandwidth interval that is
265                    // farthest from x
266                    final int edge;
267                    if (xval[i] - xval[ileft] > xval[iright] - xval[i]) {
268                        edge = ileft;
269                    } else {
270                        edge = iright;
271                    }
272    
273                    // Compute a least-squares linear fit weighted by
274                    // the product of robustness weights and the tricube
275                    // weight function.
276                    // See http://en.wikipedia.org/wiki/Linear_regression
277                    // (section "Univariate linear case")
278                    // and http://en.wikipedia.org/wiki/Weighted_least_squares
279                    // (section "Weighted least squares")
280                    double sumWeights = 0;
281                    double sumX = 0;
282                    double sumXSquared = 0;
283                    double sumY = 0;
284                    double sumXY = 0;
285                    double denom = FastMath.abs(1.0 / (xval[edge] - x));
286                    for (int k = ileft; k <= iright; ++k) {
287                        final double xk   = xval[k];
288                        final double yk   = yval[k];
289                        final double dist = (k < i) ? x - xk : xk - x;
290                        final double w    = tricube(dist * denom) * robustnessWeights[k] * weights[k];
291                        final double xkw  = xk * w;
292                        sumWeights += w;
293                        sumX += xkw;
294                        sumXSquared += xk * xkw;
295                        sumY += yk * w;
296                        sumXY += yk * xkw;
297                    }
298    
299                    final double meanX = sumX / sumWeights;
300                    final double meanY = sumY / sumWeights;
301                    final double meanXY = sumXY / sumWeights;
302                    final double meanXSquared = sumXSquared / sumWeights;
303    
304                    final double beta;
305                    if (FastMath.sqrt(FastMath.abs(meanXSquared - meanX * meanX)) < accuracy) {
306                        beta = 0;
307                    } else {
308                        beta = (meanXY - meanX * meanY) / (meanXSquared - meanX * meanX);
309                    }
310    
311                    final double alpha = meanY - beta * meanX;
312    
313                    res[i] = beta * x + alpha;
314                    residuals[i] = FastMath.abs(yval[i] - res[i]);
315                }
316    
317                // No need to recompute the robustness weights at the last
318                // iteration, they won't be needed anymore
319                if (iter == robustnessIters) {
320                    break;
321                }
322    
323                // Recompute the robustness weights.
324    
325                // Find the median residual.
326                // An arraycopy and a sort are completely tractable here,
327                // because the preceding loop is a lot more expensive
328                System.arraycopy(residuals, 0, sortedResiduals, 0, n);
329                Arrays.sort(sortedResiduals);
330                final double medianResidual = sortedResiduals[n / 2];
331    
332                if (FastMath.abs(medianResidual) < accuracy) {
333                    break;
334                }
335    
336                for (int i = 0; i < n; ++i) {
337                    final double arg = residuals[i] / (6 * medianResidual);
338                    if (arg >= 1) {
339                        robustnessWeights[i] = 0;
340                    } else {
341                        final double w = 1 - arg * arg;
342                        robustnessWeights[i] = w * w;
343                    }
344                }
345            }
346    
347            return res;
348        }
349    
350        /**
351         * Compute a loess fit on the data at the original abscissae.
352         *
353         * @param xval the arguments for the interpolation points
354         * @param yval the values for the interpolation points
355         * @return values of the loess fit at corresponding original abscissae
356         * @throws org.apache.commons.math.exception.NonMonotonicSequenceException
357         * if {@code xval} not sorted in strictly increasing order.
358         * @throws DimensionMismatchException if {@code xval} and {@code yval} have
359         * different sizes.
360         * @throws NoDataException if {@code xval} or {@code yval} has zero size.
361         * @throws org.apache.commons.math.exception.NotFiniteNumberException if
362         * any of the arguments and values are not finite real numbers.
363         * @throws NumberIsTooSmallException if the bandwidth is too small to
364         * accomodate the size of the input data (i.e. the bandwidth must be
365         * larger than 2/n).
366         */
367        public final double[] smooth(final double[] xval, final double[] yval) {
368            if (xval.length != yval.length) {
369                throw new DimensionMismatchException(xval.length, yval.length);
370            }
371    
372            final double[] unitWeights = new double[xval.length];
373            Arrays.fill(unitWeights, 1.0);
374    
375            return smooth(xval, yval, unitWeights);
376        }
377    
378        /**
379         * Given an index interval into xval that embraces a certain number of
380         * points closest to {@code xval[i-1]}, update the interval so that it
381         * embraces the same number of points closest to {@code xval[i]},
382         * ignoring zero weights.
383         *
384         * @param xval Arguments array.
385         * @param weights Weights array.
386         * @param i Index around which the new interval should be computed.
387         * @param bandwidthInterval a two-element array {left, right} such that:
388         * {@code (left==0 or xval[i] - xval[left-1] > xval[right] - xval[i])}
389         * and
390         * {@code (right==xval.length-1 or xval[right+1] - xval[i] > xval[i] - xval[left])}.
391         * The array will be updated.
392         */
393        private static void updateBandwidthInterval(final double[] xval, final double[] weights,
394                                                    final int i,
395                                                    final int[] bandwidthInterval) {
396            final int left = bandwidthInterval[0];
397            final int right = bandwidthInterval[1];
398    
399            // The right edge should be adjusted if the next point to the right
400            // is closer to xval[i] than the leftmost point of the current interval
401            int nextRight = nextNonzero(weights, right);
402            if (nextRight < xval.length && xval[nextRight] - xval[i] < xval[i] - xval[left]) {
403                int nextLeft = nextNonzero(weights, bandwidthInterval[0]);
404                bandwidthInterval[0] = nextLeft;
405                bandwidthInterval[1] = nextRight;
406            }
407        }
408    
409        /**
410         * Return the smallest index {@code j} such that
411         * {@code j > i && (j == weights.length || weights[j] != 0)}.
412         *
413         * @param weights Weights array.
414         * @param i Index from which to start search.
415         * @return the smallest compliant index.
416         */
417        private static int nextNonzero(final double[] weights, final int i) {
418            int j = i + 1;
419            while(j < weights.length && weights[j] == 0) {
420                ++j;
421            }
422            return j;
423        }
424    
425        /**
426         * Compute the
427         * <a href="http://en.wikipedia.org/wiki/Local_regression#Weight_function">tricube</a>
428         * weight function
429         *
430         * @param x Argument.
431         * @return <code>(1 - |x|<sup>3</sup>)<sup>3</sup></code> for |x| &lt; 1, 0 otherwise.
432         */
433        private static double tricube(final double x) {
434            final double absX = FastMath.abs(x);
435            if (absX >= 1.0) {
436                return 0.0;
437            }
438            final double tmp = 1 - absX * absX * absX;
439            return tmp * tmp * tmp;
440        }
441    
442        /**
443         * Check that all elements of an array are finite real numbers.
444         *
445         * @param values Values array.
446         * @throws org.apache.commons.math.exception.NotFiniteNumberException
447         * if one of the values is not a finite real number.
448         */
449        private static void checkAllFiniteReal(final double[] values) {
450            for (int i = 0; i < values.length; i++) {
451                MathUtils.checkFinite(values[i]);
452            }
453        }
454    }