001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017 package org.apache.commons.math.analysis.interpolation;
018
019 import java.io.Serializable;
020 import java.util.Arrays;
021
022 import org.apache.commons.math.analysis.polynomials.PolynomialSplineFunction;
023 import org.apache.commons.math.exception.NotPositiveException;
024 import org.apache.commons.math.exception.OutOfRangeException;
025 import org.apache.commons.math.exception.DimensionMismatchException;
026 import org.apache.commons.math.exception.NoDataException;
027 import org.apache.commons.math.exception.NumberIsTooSmallException;
028 import org.apache.commons.math.exception.util.LocalizedFormats;
029 import org.apache.commons.math.util.FastMath;
030 import org.apache.commons.math.util.MathUtils;
031 import org.apache.commons.math.util.MathArrays;
032
033 /**
034 * Implements the <a href="http://en.wikipedia.org/wiki/Local_regression">
035 * Local Regression Algorithm</a> (also Loess, Lowess) for interpolation of
036 * real univariate functions.
037 * <p/>
038 * For reference, see
039 * <a href="http://www.math.tau.ac.il/~yekutiel/MA seminar/Cleveland 1979.pdf">
040 * William S. Cleveland - Robust Locally Weighted Regression and Smoothing
041 * Scatterplots</a>
042 * <p/>
043 * This class implements both the loess method and serves as an interpolation
044 * adapter to it, allowing one to build a spline on the obtained loess fit.
045 *
046 * @version $Id: LoessInterpolator.java 1182134 2011-10-11 22:55:08Z erans $
047 * @since 2.0
048 */
049 public class LoessInterpolator
050 implements UnivariateRealInterpolator, Serializable {
051 /** Default value of the bandwidth parameter. */
052 public static final double DEFAULT_BANDWIDTH = 0.3;
053 /** Default value of the number of robustness iterations. */
054 public static final int DEFAULT_ROBUSTNESS_ITERS = 2;
055 /**
056 * Default value for accuracy.
057 * @since 2.1
058 */
059 public static final double DEFAULT_ACCURACY = 1e-12;
060 /** serializable version identifier. */
061 private static final long serialVersionUID = 5204927143605193821L;
062 /**
063 * The bandwidth parameter: when computing the loess fit at
064 * a particular point, this fraction of source points closest
065 * to the current point is taken into account for computing
066 * a least-squares regression.
067 * <p/>
068 * A sensible value is usually 0.25 to 0.5.
069 */
070 private final double bandwidth;
071 /**
072 * The number of robustness iterations parameter: this many
073 * robustness iterations are done.
074 * <p/>
075 * A sensible value is usually 0 (just the initial fit without any
076 * robustness iterations) to 4.
077 */
078 private final int robustnessIters;
079 /**
080 * If the median residual at a certain robustness iteration
081 * is less than this amount, no more iterations are done.
082 */
083 private final double accuracy;
084
085 /**
086 * Constructs a new {@link LoessInterpolator}
087 * with a bandwidth of {@link #DEFAULT_BANDWIDTH},
088 * {@link #DEFAULT_ROBUSTNESS_ITERS} robustness iterations
089 * and an accuracy of {#link #DEFAULT_ACCURACY}.
090 * See {@link #LoessInterpolator(double, int, double)} for an explanation of
091 * the parameters.
092 */
093 public LoessInterpolator() {
094 this.bandwidth = DEFAULT_BANDWIDTH;
095 this.robustnessIters = DEFAULT_ROBUSTNESS_ITERS;
096 this.accuracy = DEFAULT_ACCURACY;
097 }
098
099 /**
100 * Construct a new {@link LoessInterpolator}
101 * with given bandwidth and number of robustness iterations.
102 * <p>
103 * Calling this constructor is equivalent to calling {link {@link
104 * #LoessInterpolator(double, int, double) LoessInterpolator(bandwidth,
105 * robustnessIters, LoessInterpolator.DEFAULT_ACCURACY)}
106 * </p>
107 *
108 * @param bandwidth when computing the loess fit at
109 * a particular point, this fraction of source points closest
110 * to the current point is taken into account for computing
111 * a least-squares regression.</br>
112 * A sensible value is usually 0.25 to 0.5, the default value is
113 * {@link #DEFAULT_BANDWIDTH}.
114 * @param robustnessIters This many robustness iterations are done.</br>
115 * A sensible value is usually 0 (just the initial fit without any
116 * robustness iterations) to 4, the default value is
117 * {@link #DEFAULT_ROBUSTNESS_ITERS}.
118
119 * @see #LoessInterpolator(double, int, double)
120 */
121 public LoessInterpolator(double bandwidth, int robustnessIters) {
122 this(bandwidth, robustnessIters, DEFAULT_ACCURACY);
123 }
124
125 /**
126 * Construct a new {@link LoessInterpolator}
127 * with given bandwidth, number of robustness iterations and accuracy.
128 *
129 * @param bandwidth when computing the loess fit at
130 * a particular point, this fraction of source points closest
131 * to the current point is taken into account for computing
132 * a least-squares regression.</br>
133 * A sensible value is usually 0.25 to 0.5, the default value is
134 * {@link #DEFAULT_BANDWIDTH}.
135 * @param robustnessIters This many robustness iterations are done.</br>
136 * A sensible value is usually 0 (just the initial fit without any
137 * robustness iterations) to 4, the default value is
138 * {@link #DEFAULT_ROBUSTNESS_ITERS}.
139 * @param accuracy If the median residual at a certain robustness iteration
140 * is less than this amount, no more iterations are done.
141 * @throws OutOfRangeException if bandwidth does not lie in the interval [0,1].
142 * @throws NotPositiveException if {@code robustnessIters} is negative.
143 * @see #LoessInterpolator(double, int)
144 * @since 2.1
145 */
146 public LoessInterpolator(double bandwidth, int robustnessIters, double accuracy) {
147 if (bandwidth < 0 ||
148 bandwidth > 1) {
149 throw new OutOfRangeException(LocalizedFormats.BANDWIDTH, bandwidth, 0, 1);
150 }
151 this.bandwidth = bandwidth;
152 if (robustnessIters < 0) {
153 throw new NotPositiveException(LocalizedFormats.ROBUSTNESS_ITERATIONS, robustnessIters);
154 }
155 this.robustnessIters = robustnessIters;
156 this.accuracy = accuracy;
157 }
158
159 /**
160 * Compute an interpolating function by performing a loess fit
161 * on the data at the original abscissae and then building a cubic spline
162 * with a
163 * {@link org.apache.commons.math.analysis.interpolation.SplineInterpolator}
164 * on the resulting fit.
165 *
166 * @param xval the arguments for the interpolation points
167 * @param yval the values for the interpolation points
168 * @return A cubic spline built upon a loess fit to the data at the original abscissae
169 * @throws org.apache.commons.math.exception.NonMonotonicSequenceException
170 * if {@code xval} not sorted in strictly increasing order.
171 * @throws DimensionMismatchException if {@code xval} and {@code yval} have
172 * different sizes.
173 * @throws NoDataException if {@code xval} or {@code yval} has zero size.
174 * @throws org.apache.commons.math.exception.NotFiniteNumberException if
175 * any of the arguments and values are not finite real numbers.
176 * @throws NumberIsTooSmallException if the bandwidth is too small to
177 * accomodate the size of the input data (i.e. the bandwidth must be
178 * larger than 2/n).
179 */
180 public final PolynomialSplineFunction interpolate(final double[] xval, final double[] yval) {
181 return new SplineInterpolator().interpolate(xval, smooth(xval, yval));
182 }
183
184 /**
185 * Compute a weighted loess fit on the data at the original abscissae.
186 *
187 * @param xval Arguments for the interpolation points.
188 * @param yval Values for the interpolation points.
189 * @param weights point weights: coefficients by which the robustness weight
190 * of a point is multiplied.
191 * @return the values of the loess fit at corresponding original abscissae.
192 * @throws org.apache.commons.math.exception.NonMonotonicSequenceException
193 * if {@code xval} not sorted in strictly increasing order.
194 * @throws DimensionMismatchException if {@code xval} and {@code yval} have
195 * different sizes.
196 * @throws NoDataException if {@code xval} or {@code yval} has zero size.
197 * @throws org.apache.commons.math.exception.NotFiniteNumberException if
198 * any of the arguments and values are not finite real numbers.
199 * @throws NumberIsTooSmallException if the bandwidth is too small to
200 * accomodate the size of the input data (i.e. the bandwidth must be
201 * larger than 2/n).
202 * @since 2.1
203 */
204 public final double[] smooth(final double[] xval, final double[] yval,
205 final double[] weights) {
206 if (xval.length != yval.length) {
207 throw new DimensionMismatchException(xval.length, yval.length);
208 }
209
210 final int n = xval.length;
211
212 if (n == 0) {
213 throw new NoDataException();
214 }
215
216 checkAllFiniteReal(xval);
217 checkAllFiniteReal(yval);
218 checkAllFiniteReal(weights);
219
220 MathArrays.checkOrder(xval);
221
222 if (n == 1) {
223 return new double[]{yval[0]};
224 }
225
226 if (n == 2) {
227 return new double[]{yval[0], yval[1]};
228 }
229
230 int bandwidthInPoints = (int) (bandwidth * n);
231
232 if (bandwidthInPoints < 2) {
233 throw new NumberIsTooSmallException(LocalizedFormats.BANDWIDTH,
234 bandwidthInPoints, 2, true);
235 }
236
237 final double[] res = new double[n];
238
239 final double[] residuals = new double[n];
240 final double[] sortedResiduals = new double[n];
241
242 final double[] robustnessWeights = new double[n];
243
244 // Do an initial fit and 'robustnessIters' robustness iterations.
245 // This is equivalent to doing 'robustnessIters+1' robustness iterations
246 // starting with all robustness weights set to 1.
247 Arrays.fill(robustnessWeights, 1);
248
249 for (int iter = 0; iter <= robustnessIters; ++iter) {
250 final int[] bandwidthInterval = {0, bandwidthInPoints - 1};
251 // At each x, compute a local weighted linear regression
252 for (int i = 0; i < n; ++i) {
253 final double x = xval[i];
254
255 // Find out the interval of source points on which
256 // a regression is to be made.
257 if (i > 0) {
258 updateBandwidthInterval(xval, weights, i, bandwidthInterval);
259 }
260
261 final int ileft = bandwidthInterval[0];
262 final int iright = bandwidthInterval[1];
263
264 // Compute the point of the bandwidth interval that is
265 // farthest from x
266 final int edge;
267 if (xval[i] - xval[ileft] > xval[iright] - xval[i]) {
268 edge = ileft;
269 } else {
270 edge = iright;
271 }
272
273 // Compute a least-squares linear fit weighted by
274 // the product of robustness weights and the tricube
275 // weight function.
276 // See http://en.wikipedia.org/wiki/Linear_regression
277 // (section "Univariate linear case")
278 // and http://en.wikipedia.org/wiki/Weighted_least_squares
279 // (section "Weighted least squares")
280 double sumWeights = 0;
281 double sumX = 0;
282 double sumXSquared = 0;
283 double sumY = 0;
284 double sumXY = 0;
285 double denom = FastMath.abs(1.0 / (xval[edge] - x));
286 for (int k = ileft; k <= iright; ++k) {
287 final double xk = xval[k];
288 final double yk = yval[k];
289 final double dist = (k < i) ? x - xk : xk - x;
290 final double w = tricube(dist * denom) * robustnessWeights[k] * weights[k];
291 final double xkw = xk * w;
292 sumWeights += w;
293 sumX += xkw;
294 sumXSquared += xk * xkw;
295 sumY += yk * w;
296 sumXY += yk * xkw;
297 }
298
299 final double meanX = sumX / sumWeights;
300 final double meanY = sumY / sumWeights;
301 final double meanXY = sumXY / sumWeights;
302 final double meanXSquared = sumXSquared / sumWeights;
303
304 final double beta;
305 if (FastMath.sqrt(FastMath.abs(meanXSquared - meanX * meanX)) < accuracy) {
306 beta = 0;
307 } else {
308 beta = (meanXY - meanX * meanY) / (meanXSquared - meanX * meanX);
309 }
310
311 final double alpha = meanY - beta * meanX;
312
313 res[i] = beta * x + alpha;
314 residuals[i] = FastMath.abs(yval[i] - res[i]);
315 }
316
317 // No need to recompute the robustness weights at the last
318 // iteration, they won't be needed anymore
319 if (iter == robustnessIters) {
320 break;
321 }
322
323 // Recompute the robustness weights.
324
325 // Find the median residual.
326 // An arraycopy and a sort are completely tractable here,
327 // because the preceding loop is a lot more expensive
328 System.arraycopy(residuals, 0, sortedResiduals, 0, n);
329 Arrays.sort(sortedResiduals);
330 final double medianResidual = sortedResiduals[n / 2];
331
332 if (FastMath.abs(medianResidual) < accuracy) {
333 break;
334 }
335
336 for (int i = 0; i < n; ++i) {
337 final double arg = residuals[i] / (6 * medianResidual);
338 if (arg >= 1) {
339 robustnessWeights[i] = 0;
340 } else {
341 final double w = 1 - arg * arg;
342 robustnessWeights[i] = w * w;
343 }
344 }
345 }
346
347 return res;
348 }
349
350 /**
351 * Compute a loess fit on the data at the original abscissae.
352 *
353 * @param xval the arguments for the interpolation points
354 * @param yval the values for the interpolation points
355 * @return values of the loess fit at corresponding original abscissae
356 * @throws org.apache.commons.math.exception.NonMonotonicSequenceException
357 * if {@code xval} not sorted in strictly increasing order.
358 * @throws DimensionMismatchException if {@code xval} and {@code yval} have
359 * different sizes.
360 * @throws NoDataException if {@code xval} or {@code yval} has zero size.
361 * @throws org.apache.commons.math.exception.NotFiniteNumberException if
362 * any of the arguments and values are not finite real numbers.
363 * @throws NumberIsTooSmallException if the bandwidth is too small to
364 * accomodate the size of the input data (i.e. the bandwidth must be
365 * larger than 2/n).
366 */
367 public final double[] smooth(final double[] xval, final double[] yval) {
368 if (xval.length != yval.length) {
369 throw new DimensionMismatchException(xval.length, yval.length);
370 }
371
372 final double[] unitWeights = new double[xval.length];
373 Arrays.fill(unitWeights, 1.0);
374
375 return smooth(xval, yval, unitWeights);
376 }
377
378 /**
379 * Given an index interval into xval that embraces a certain number of
380 * points closest to {@code xval[i-1]}, update the interval so that it
381 * embraces the same number of points closest to {@code xval[i]},
382 * ignoring zero weights.
383 *
384 * @param xval Arguments array.
385 * @param weights Weights array.
386 * @param i Index around which the new interval should be computed.
387 * @param bandwidthInterval a two-element array {left, right} such that:
388 * {@code (left==0 or xval[i] - xval[left-1] > xval[right] - xval[i])}
389 * and
390 * {@code (right==xval.length-1 or xval[right+1] - xval[i] > xval[i] - xval[left])}.
391 * The array will be updated.
392 */
393 private static void updateBandwidthInterval(final double[] xval, final double[] weights,
394 final int i,
395 final int[] bandwidthInterval) {
396 final int left = bandwidthInterval[0];
397 final int right = bandwidthInterval[1];
398
399 // The right edge should be adjusted if the next point to the right
400 // is closer to xval[i] than the leftmost point of the current interval
401 int nextRight = nextNonzero(weights, right);
402 if (nextRight < xval.length && xval[nextRight] - xval[i] < xval[i] - xval[left]) {
403 int nextLeft = nextNonzero(weights, bandwidthInterval[0]);
404 bandwidthInterval[0] = nextLeft;
405 bandwidthInterval[1] = nextRight;
406 }
407 }
408
409 /**
410 * Return the smallest index {@code j} such that
411 * {@code j > i && (j == weights.length || weights[j] != 0)}.
412 *
413 * @param weights Weights array.
414 * @param i Index from which to start search.
415 * @return the smallest compliant index.
416 */
417 private static int nextNonzero(final double[] weights, final int i) {
418 int j = i + 1;
419 while(j < weights.length && weights[j] == 0) {
420 ++j;
421 }
422 return j;
423 }
424
425 /**
426 * Compute the
427 * <a href="http://en.wikipedia.org/wiki/Local_regression#Weight_function">tricube</a>
428 * weight function
429 *
430 * @param x Argument.
431 * @return <code>(1 - |x|<sup>3</sup>)<sup>3</sup></code> for |x| < 1, 0 otherwise.
432 */
433 private static double tricube(final double x) {
434 final double absX = FastMath.abs(x);
435 if (absX >= 1.0) {
436 return 0.0;
437 }
438 final double tmp = 1 - absX * absX * absX;
439 return tmp * tmp * tmp;
440 }
441
442 /**
443 * Check that all elements of an array are finite real numbers.
444 *
445 * @param values Values array.
446 * @throws org.apache.commons.math.exception.NotFiniteNumberException
447 * if one of the values is not a finite real number.
448 */
449 private static void checkAllFiniteReal(final double[] values) {
450 for (int i = 0; i < values.length; i++) {
451 MathUtils.checkFinite(values[i]);
452 }
453 }
454 }