1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.math4.legacy.analysis.interpolation;
18
19 import java.util.Arrays;
20
21 import org.apache.commons.math4.legacy.analysis.polynomials.PolynomialSplineFunction;
22 import org.apache.commons.math4.legacy.exception.DimensionMismatchException;
23 import org.apache.commons.math4.legacy.exception.NoDataException;
24 import org.apache.commons.math4.legacy.exception.NonMonotonicSequenceException;
25 import org.apache.commons.math4.legacy.exception.NotFiniteNumberException;
26 import org.apache.commons.math4.legacy.exception.NotPositiveException;
27 import org.apache.commons.math4.legacy.exception.NumberIsTooSmallException;
28 import org.apache.commons.math4.legacy.exception.OutOfRangeException;
29 import org.apache.commons.math4.legacy.exception.util.LocalizedFormats;
30 import org.apache.commons.math4.core.jdkmath.JdkMath;
31 import org.apache.commons.math4.legacy.core.MathArrays;
32
33 /**
34 * Implements the <a href="http://en.wikipedia.org/wiki/Local_regression">
35 * Local Regression Algorithm</a> (also Loess, Lowess) for interpolation of
36 * real univariate functions.
37 * <p>
38 * For reference, see
39 * <a href="http://amstat.tandfonline.com/doi/abs/10.1080/01621459.1979.10481038">
40 * William S. Cleveland - Robust Locally Weighted Regression and Smoothing
41 * Scatterplots</a></p>
42 * <p>
43 * This class implements both the loess method and serves as an interpolation
44 * adapter to it, allowing one to build a spline on the obtained loess fit.</p>
45 *
46 * @since 2.0
47 */
48 public class LoessInterpolator
49 implements UnivariateInterpolator {
50 /** Default value of the bandwidth parameter. */
51 public static final double DEFAULT_BANDWIDTH = 0.3;
52 /** Default value of the number of robustness iterations. */
53 public static final int DEFAULT_ROBUSTNESS_ITERS = 2;
54 /**
55 * Default value for accuracy.
56 * @since 2.1
57 */
58 public static final double DEFAULT_ACCURACY = 1e-12;
59 /**
60 * The bandwidth parameter: when computing the loess fit at
61 * a particular point, this fraction of source points closest
62 * to the current point is taken into account for computing
63 * a least-squares regression.
64 * <p>
65 * A sensible value is usually 0.25 to 0.5.</p>
66 */
67 private final double bandwidth;
68 /**
69 * The number of robustness iterations parameter: this many
70 * robustness iterations are done.
71 * <p>
72 * A sensible value is usually 0 (just the initial fit without any
73 * robustness iterations) to 4.</p>
74 */
75 private final int robustnessIters;
76 /**
77 * If the median residual at a certain robustness iteration
78 * is less than this amount, no more iterations are done.
79 */
80 private final double accuracy;
81
82 /**
83 * Constructs a new {@link LoessInterpolator}
84 * with a bandwidth of {@link #DEFAULT_BANDWIDTH},
85 * {@link #DEFAULT_ROBUSTNESS_ITERS} robustness iterations
86 * and an accuracy of {#link #DEFAULT_ACCURACY}.
87 * See {@link #LoessInterpolator(double, int, double)} for an explanation of
88 * the parameters.
89 */
90 public LoessInterpolator() {
91 this.bandwidth = DEFAULT_BANDWIDTH;
92 this.robustnessIters = DEFAULT_ROBUSTNESS_ITERS;
93 this.accuracy = DEFAULT_ACCURACY;
94 }
95
96 /**
97 * Construct a new {@link LoessInterpolator}
98 * with given bandwidth and number of robustness iterations.
99 * <p>
100 * Calling this constructor is equivalent to calling {link {@link
101 * #LoessInterpolator(double, int, double) LoessInterpolator(bandwidth,
102 * robustnessIters, LoessInterpolator.DEFAULT_ACCURACY)}
103 * </p>
104 *
105 * @param bandwidth when computing the loess fit at
106 * a particular point, this fraction of source points closest
107 * to the current point is taken into account for computing
108 * a least-squares regression.
109 * A sensible value is usually 0.25 to 0.5, the default value is
110 * {@link #DEFAULT_BANDWIDTH}.
111 * @param robustnessIters This many robustness iterations are done.
112 * A sensible value is usually 0 (just the initial fit without any
113 * robustness iterations) to 4, the default value is
114 * {@link #DEFAULT_ROBUSTNESS_ITERS}.
115
116 * @see #LoessInterpolator(double, int, double)
117 */
118 public LoessInterpolator(double bandwidth, int robustnessIters) {
119 this(bandwidth, robustnessIters, DEFAULT_ACCURACY);
120 }
121
122 /**
123 * Construct a new {@link LoessInterpolator}
124 * with given bandwidth, number of robustness iterations and accuracy.
125 *
126 * @param bandwidth when computing the loess fit at
127 * a particular point, this fraction of source points closest
128 * to the current point is taken into account for computing
129 * a least-squares regression.
130 * A sensible value is usually 0.25 to 0.5, the default value is
131 * {@link #DEFAULT_BANDWIDTH}.
132 * @param robustnessIters This many robustness iterations are done.
133 * A sensible value is usually 0 (just the initial fit without any
134 * robustness iterations) to 4, the default value is
135 * {@link #DEFAULT_ROBUSTNESS_ITERS}.
136 * @param accuracy If the median residual at a certain robustness iteration
137 * is less than this amount, no more iterations are done.
138 * @throws OutOfRangeException if bandwidth does not lie in the interval [0,1].
139 * @throws NotPositiveException if {@code robustnessIters} is negative.
140 * @see #LoessInterpolator(double, int)
141 * @since 2.1
142 */
143 public LoessInterpolator(double bandwidth, int robustnessIters, double accuracy)
144 throws OutOfRangeException,
145 NotPositiveException {
146 if (bandwidth < 0 ||
147 bandwidth > 1) {
148 throw new OutOfRangeException(LocalizedFormats.BANDWIDTH, bandwidth, 0, 1);
149 }
150 this.bandwidth = bandwidth;
151 if (robustnessIters < 0) {
152 throw new NotPositiveException(LocalizedFormats.ROBUSTNESS_ITERATIONS, robustnessIters);
153 }
154 this.robustnessIters = robustnessIters;
155 this.accuracy = accuracy;
156 }
157
158 /**
159 * Compute an interpolating function by performing a loess fit
160 * on the data at the original abscissae and then building a cubic spline
161 * with a
162 * {@link org.apache.commons.math4.legacy.analysis.interpolation.SplineInterpolator}
163 * on the resulting fit.
164 *
165 * @param xval the arguments for the interpolation points
166 * @param yval the values for the interpolation points
167 * @return A cubic spline built upon a loess fit to the data at the original abscissae
168 * @throws NonMonotonicSequenceException if {@code xval} not sorted in
169 * strictly increasing order.
170 * @throws DimensionMismatchException if {@code xval} and {@code yval} have
171 * different sizes.
172 * @throws NoDataException if {@code xval} or {@code yval} has zero size.
173 * @throws NotFiniteNumberException if any of the arguments and values are
174 * not finite real numbers.
175 * @throws NumberIsTooSmallException if the bandwidth is too small to
176 * accommodate the size of the input data (i.e. the bandwidth must be
177 * larger than 2/n).
178 */
179 @Override
180 public final PolynomialSplineFunction interpolate(final double[] xval,
181 final double[] yval)
182 throws NonMonotonicSequenceException,
183 DimensionMismatchException,
184 NoDataException,
185 NotFiniteNumberException,
186 NumberIsTooSmallException {
187 return new SplineInterpolator().interpolate(xval, smooth(xval, yval));
188 }
189
190 /**
191 * Compute a weighted loess fit on the data at the original abscissae.
192 *
193 * @param xval Arguments for the interpolation points.
194 * @param yval Values for the interpolation points.
195 * @param weights point weights: coefficients by which the robustness weight
196 * of a point is multiplied.
197 * @return the values of the loess fit at corresponding original abscissae.
198 * @throws NonMonotonicSequenceException if {@code xval} not sorted in
199 * strictly increasing order.
200 * @throws DimensionMismatchException if {@code xval} and {@code yval} have
201 * different sizes.
202 * @throws NoDataException if {@code xval} or {@code yval} has zero size.
203 * @throws NotFiniteNumberException if any of the arguments and values are
204 not finite real numbers.
205 * @throws NumberIsTooSmallException if the bandwidth is too small to
206 * accommodate the size of the input data (i.e. the bandwidth must be
207 * larger than 2/n).
208 * @since 2.1
209 */
210 public final double[] smooth(final double[] xval, final double[] yval,
211 final double[] weights)
212 throws NonMonotonicSequenceException,
213 DimensionMismatchException,
214 NoDataException,
215 NotFiniteNumberException,
216 NumberIsTooSmallException {
217 if (xval.length != yval.length) {
218 throw new DimensionMismatchException(xval.length, yval.length);
219 }
220
221 final int n = xval.length;
222
223 if (n == 0) {
224 throw new NoDataException();
225 }
226
227 NotFiniteNumberException.check(xval);
228 NotFiniteNumberException.check(yval);
229 NotFiniteNumberException.check(weights);
230
231 MathArrays.checkOrder(xval);
232
233 if (n == 1) {
234 return new double[]{yval[0]};
235 }
236
237 if (n == 2) {
238 return new double[]{yval[0], yval[1]};
239 }
240
241 int bandwidthInPoints = (int) (bandwidth * n);
242
243 if (bandwidthInPoints < 2) {
244 throw new NumberIsTooSmallException(LocalizedFormats.BANDWIDTH,
245 bandwidthInPoints, 2, true);
246 }
247
248 final double[] res = new double[n];
249
250 final double[] residuals = new double[n];
251 final double[] sortedResiduals = new double[n];
252
253 final double[] robustnessWeights = new double[n];
254
255 // Do an initial fit and 'robustnessIters' robustness iterations.
256 // This is equivalent to doing 'robustnessIters+1' robustness iterations
257 // starting with all robustness weights set to 1.
258 Arrays.fill(robustnessWeights, 1);
259
260 for (int iter = 0; iter <= robustnessIters; ++iter) {
261 final int[] bandwidthInterval = {0, bandwidthInPoints - 1};
262 // At each x, compute a local weighted linear regression
263 for (int i = 0; i < n; ++i) {
264 final double x = xval[i];
265
266 // Find out the interval of source points on which
267 // a regression is to be made.
268 if (i > 0) {
269 updateBandwidthInterval(xval, weights, i, bandwidthInterval);
270 }
271
272 final int ileft = bandwidthInterval[0];
273 final int iright = bandwidthInterval[1];
274
275 // Compute the point of the bandwidth interval that is
276 // farthest from x
277 final int edge;
278 if (xval[i] - xval[ileft] > xval[iright] - xval[i]) {
279 edge = ileft;
280 } else {
281 edge = iright;
282 }
283
284 // Compute a least-squares linear fit weighted by
285 // the product of robustness weights and the tricube
286 // weight function.
287 // See http://en.wikipedia.org/wiki/Linear_regression
288 // (section "Univariate linear case")
289 // and http://en.wikipedia.org/wiki/Weighted_least_squares
290 // (section "Weighted least squares")
291 double sumWeights = 0;
292 double sumX = 0;
293 double sumXSquared = 0;
294 double sumY = 0;
295 double sumXY = 0;
296 double denom = JdkMath.abs(1.0 / (xval[edge] - x));
297 for (int k = ileft; k <= iright; ++k) {
298 final double xk = xval[k];
299 final double yk = yval[k];
300 final double dist = (k < i) ? x - xk : xk - x;
301 final double w = tricube(dist * denom) * robustnessWeights[k] * weights[k];
302 final double xkw = xk * w;
303 sumWeights += w;
304 sumX += xkw;
305 sumXSquared += xk * xkw;
306 sumY += yk * w;
307 sumXY += yk * xkw;
308 }
309
310 final double meanX = sumX / sumWeights;
311 final double meanY = sumY / sumWeights;
312 final double meanXY = sumXY / sumWeights;
313 final double meanXSquared = sumXSquared / sumWeights;
314
315 final double beta;
316 if (JdkMath.sqrt(JdkMath.abs(meanXSquared - meanX * meanX)) < accuracy) {
317 beta = 0;
318 } else {
319 beta = (meanXY - meanX * meanY) / (meanXSquared - meanX * meanX);
320 }
321
322 final double alpha = meanY - beta * meanX;
323
324 res[i] = beta * x + alpha;
325 residuals[i] = JdkMath.abs(yval[i] - res[i]);
326 }
327
328 // No need to recompute the robustness weights at the last
329 // iteration, they won't be needed anymore
330 if (iter == robustnessIters) {
331 break;
332 }
333
334 // Recompute the robustness weights.
335
336 // Find the median residual.
337 // An arraycopy and a sort are completely tractable here,
338 // because the preceding loop is a lot more expensive
339 System.arraycopy(residuals, 0, sortedResiduals, 0, n);
340 Arrays.sort(sortedResiduals);
341 final double medianResidual = sortedResiduals[n / 2];
342
343 if (JdkMath.abs(medianResidual) < accuracy) {
344 break;
345 }
346
347 for (int i = 0; i < n; ++i) {
348 final double arg = residuals[i] / (6 * medianResidual);
349 if (arg >= 1) {
350 robustnessWeights[i] = 0;
351 } else {
352 final double w = 1 - arg * arg;
353 robustnessWeights[i] = w * w;
354 }
355 }
356 }
357
358 return res;
359 }
360
361 /**
362 * Compute a loess fit on the data at the original abscissae.
363 *
364 * @param xval the arguments for the interpolation points
365 * @param yval the values for the interpolation points
366 * @return values of the loess fit at corresponding original abscissae
367 * @throws NonMonotonicSequenceException if {@code xval} not sorted in
368 * strictly increasing order.
369 * @throws DimensionMismatchException if {@code xval} and {@code yval} have
370 * different sizes.
371 * @throws NoDataException if {@code xval} or {@code yval} has zero size.
372 * @throws NotFiniteNumberException if any of the arguments and values are
373 * not finite real numbers.
374 * @throws NumberIsTooSmallException if the bandwidth is too small to
375 * accommodate the size of the input data (i.e. the bandwidth must be
376 * larger than 2/n).
377 */
378 public final double[] smooth(final double[] xval, final double[] yval)
379 throws NonMonotonicSequenceException,
380 DimensionMismatchException,
381 NoDataException,
382 NotFiniteNumberException,
383 NumberIsTooSmallException {
384 if (xval.length != yval.length) {
385 throw new DimensionMismatchException(xval.length, yval.length);
386 }
387
388 final double[] unitWeights = new double[xval.length];
389 Arrays.fill(unitWeights, 1.0);
390
391 return smooth(xval, yval, unitWeights);
392 }
393
394 /**
395 * Given an index interval into xval that embraces a certain number of
396 * points closest to {@code xval[i-1]}, update the interval so that it
397 * embraces the same number of points closest to {@code xval[i]},
398 * ignoring zero weights.
399 *
400 * @param xval Arguments array.
401 * @param weights Weights array.
402 * @param i Index around which the new interval should be computed.
403 * @param bandwidthInterval a two-element array {left, right} such that:
404 * {@code (left==0 or xval[i] - xval[left-1] > xval[right] - xval[i])}
405 * and
406 * {@code (right==xval.length-1 or xval[right+1] - xval[i] > xval[i] - xval[left])}.
407 * The array will be updated.
408 */
409 private static void updateBandwidthInterval(final double[] xval,
410 final double[] weights,
411 final int i,
412 final int[] bandwidthInterval) {
413 final int left = bandwidthInterval[0];
414 final int right = bandwidthInterval[1];
415
416 // The right edge should be adjusted if the next point to the right
417 // is closer to xval[i] than the leftmost point of the current interval
418 int nextRight = nextNonzero(weights, right);
419 int nextLeft = left;
420 while (nextRight < xval.length &&
421 xval[nextRight] - xval[i] < xval[i] - xval[nextLeft]) {
422 nextLeft = nextNonzero(weights, bandwidthInterval[0]);
423 bandwidthInterval[0] = nextLeft;
424 bandwidthInterval[1] = nextRight;
425 nextRight = nextNonzero(weights, nextRight);
426 }
427 }
428
429 /**
430 * Return the smallest index {@code j} such that
431 * {@code j > i && (j == weights.length || weights[j] != 0)}.
432 *
433 * @param weights Weights array.
434 * @param i Index from which to start search.
435 * @return the smallest compliant index.
436 */
437 private static int nextNonzero(final double[] weights, final int i) {
438 int j = i + 1;
439 while(j < weights.length && weights[j] == 0) {
440 ++j;
441 }
442 return j;
443 }
444
445 /**
446 * Compute the
447 * <a href="http://en.wikipedia.org/wiki/Local_regression#Weight_function">tricube</a>
448 * weight function.
449 *
450 * @param x Argument.
451 * @return <code>(1 - |x|<sup>3</sup>)<sup>3</sup></code> for |x| < 1, 0 otherwise.
452 */
453 private static double tricube(final double x) {
454 final double absX = JdkMath.abs(x);
455 if (absX >= 1.0) {
456 return 0.0;
457 }
458 final double tmp = 1 - absX * absX * absX;
459 return tmp * tmp * tmp;
460 }
461 }