1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.commons.math4.legacy.analysis.interpolation; 18 19 import java.util.Arrays; 20 21 import org.apache.commons.math4.legacy.analysis.polynomials.PolynomialSplineFunction; 22 import org.apache.commons.math4.legacy.exception.DimensionMismatchException; 23 import org.apache.commons.math4.legacy.exception.NoDataException; 24 import org.apache.commons.math4.legacy.exception.NonMonotonicSequenceException; 25 import org.apache.commons.math4.legacy.exception.NotFiniteNumberException; 26 import org.apache.commons.math4.legacy.exception.NotPositiveException; 27 import org.apache.commons.math4.legacy.exception.NumberIsTooSmallException; 28 import org.apache.commons.math4.legacy.exception.OutOfRangeException; 29 import org.apache.commons.math4.legacy.exception.util.LocalizedFormats; 30 import org.apache.commons.math4.core.jdkmath.JdkMath; 31 import org.apache.commons.math4.legacy.core.MathArrays; 32 33 /** 34 * Implements the <a href="http://en.wikipedia.org/wiki/Local_regression"> 35 * Local Regression Algorithm</a> (also Loess, Lowess) for interpolation of 36 * real univariate functions. 37 * <p> 38 * For reference, see 39 * <a href="http://amstat.tandfonline.com/doi/abs/10.1080/01621459.1979.10481038"> 40 * William S. Cleveland - Robust Locally Weighted Regression and Smoothing 41 * Scatterplots</a></p> 42 * <p> 43 * This class implements both the loess method and serves as an interpolation 44 * adapter to it, allowing one to build a spline on the obtained loess fit.</p> 45 * 46 * @since 2.0 47 */ 48 public class LoessInterpolator 49 implements UnivariateInterpolator { 50 /** Default value of the bandwidth parameter. */ 51 public static final double DEFAULT_BANDWIDTH = 0.3; 52 /** Default value of the number of robustness iterations. */ 53 public static final int DEFAULT_ROBUSTNESS_ITERS = 2; 54 /** 55 * Default value for accuracy. 56 * @since 2.1 57 */ 58 public static final double DEFAULT_ACCURACY = 1e-12; 59 /** 60 * The bandwidth parameter: when computing the loess fit at 61 * a particular point, this fraction of source points closest 62 * to the current point is taken into account for computing 63 * a least-squares regression. 64 * <p> 65 * A sensible value is usually 0.25 to 0.5.</p> 66 */ 67 private final double bandwidth; 68 /** 69 * The number of robustness iterations parameter: this many 70 * robustness iterations are done. 71 * <p> 72 * A sensible value is usually 0 (just the initial fit without any 73 * robustness iterations) to 4.</p> 74 */ 75 private final int robustnessIters; 76 /** 77 * If the median residual at a certain robustness iteration 78 * is less than this amount, no more iterations are done. 79 */ 80 private final double accuracy; 81 82 /** 83 * Constructs a new {@link LoessInterpolator} 84 * with a bandwidth of {@link #DEFAULT_BANDWIDTH}, 85 * {@link #DEFAULT_ROBUSTNESS_ITERS} robustness iterations 86 * and an accuracy of {#link #DEFAULT_ACCURACY}. 87 * See {@link #LoessInterpolator(double, int, double)} for an explanation of 88 * the parameters. 89 */ 90 public LoessInterpolator() { 91 this.bandwidth = DEFAULT_BANDWIDTH; 92 this.robustnessIters = DEFAULT_ROBUSTNESS_ITERS; 93 this.accuracy = DEFAULT_ACCURACY; 94 } 95 96 /** 97 * Construct a new {@link LoessInterpolator} 98 * with given bandwidth and number of robustness iterations. 99 * <p> 100 * Calling this constructor is equivalent to calling {link {@link 101 * #LoessInterpolator(double, int, double) LoessInterpolator(bandwidth, 102 * robustnessIters, LoessInterpolator.DEFAULT_ACCURACY)} 103 * </p> 104 * 105 * @param bandwidth when computing the loess fit at 106 * a particular point, this fraction of source points closest 107 * to the current point is taken into account for computing 108 * a least-squares regression. 109 * A sensible value is usually 0.25 to 0.5, the default value is 110 * {@link #DEFAULT_BANDWIDTH}. 111 * @param robustnessIters This many robustness iterations are done. 112 * A sensible value is usually 0 (just the initial fit without any 113 * robustness iterations) to 4, the default value is 114 * {@link #DEFAULT_ROBUSTNESS_ITERS}. 115 116 * @see #LoessInterpolator(double, int, double) 117 */ 118 public LoessInterpolator(double bandwidth, int robustnessIters) { 119 this(bandwidth, robustnessIters, DEFAULT_ACCURACY); 120 } 121 122 /** 123 * Construct a new {@link LoessInterpolator} 124 * with given bandwidth, number of robustness iterations and accuracy. 125 * 126 * @param bandwidth when computing the loess fit at 127 * a particular point, this fraction of source points closest 128 * to the current point is taken into account for computing 129 * a least-squares regression. 130 * A sensible value is usually 0.25 to 0.5, the default value is 131 * {@link #DEFAULT_BANDWIDTH}. 132 * @param robustnessIters This many robustness iterations are done. 133 * A sensible value is usually 0 (just the initial fit without any 134 * robustness iterations) to 4, the default value is 135 * {@link #DEFAULT_ROBUSTNESS_ITERS}. 136 * @param accuracy If the median residual at a certain robustness iteration 137 * is less than this amount, no more iterations are done. 138 * @throws OutOfRangeException if bandwidth does not lie in the interval [0,1]. 139 * @throws NotPositiveException if {@code robustnessIters} is negative. 140 * @see #LoessInterpolator(double, int) 141 * @since 2.1 142 */ 143 public LoessInterpolator(double bandwidth, int robustnessIters, double accuracy) 144 throws OutOfRangeException, 145 NotPositiveException { 146 if (bandwidth < 0 || 147 bandwidth > 1) { 148 throw new OutOfRangeException(LocalizedFormats.BANDWIDTH, bandwidth, 0, 1); 149 } 150 this.bandwidth = bandwidth; 151 if (robustnessIters < 0) { 152 throw new NotPositiveException(LocalizedFormats.ROBUSTNESS_ITERATIONS, robustnessIters); 153 } 154 this.robustnessIters = robustnessIters; 155 this.accuracy = accuracy; 156 } 157 158 /** 159 * Compute an interpolating function by performing a loess fit 160 * on the data at the original abscissae and then building a cubic spline 161 * with a 162 * {@link org.apache.commons.math4.legacy.analysis.interpolation.SplineInterpolator} 163 * on the resulting fit. 164 * 165 * @param xval the arguments for the interpolation points 166 * @param yval the values for the interpolation points 167 * @return A cubic spline built upon a loess fit to the data at the original abscissae 168 * @throws NonMonotonicSequenceException if {@code xval} not sorted in 169 * strictly increasing order. 170 * @throws DimensionMismatchException if {@code xval} and {@code yval} have 171 * different sizes. 172 * @throws NoDataException if {@code xval} or {@code yval} has zero size. 173 * @throws NotFiniteNumberException if any of the arguments and values are 174 * not finite real numbers. 175 * @throws NumberIsTooSmallException if the bandwidth is too small to 176 * accommodate the size of the input data (i.e. the bandwidth must be 177 * larger than 2/n). 178 */ 179 @Override 180 public final PolynomialSplineFunction interpolate(final double[] xval, 181 final double[] yval) 182 throws NonMonotonicSequenceException, 183 DimensionMismatchException, 184 NoDataException, 185 NotFiniteNumberException, 186 NumberIsTooSmallException { 187 return new SplineInterpolator().interpolate(xval, smooth(xval, yval)); 188 } 189 190 /** 191 * Compute a weighted loess fit on the data at the original abscissae. 192 * 193 * @param xval Arguments for the interpolation points. 194 * @param yval Values for the interpolation points. 195 * @param weights point weights: coefficients by which the robustness weight 196 * of a point is multiplied. 197 * @return the values of the loess fit at corresponding original abscissae. 198 * @throws NonMonotonicSequenceException if {@code xval} not sorted in 199 * strictly increasing order. 200 * @throws DimensionMismatchException if {@code xval} and {@code yval} have 201 * different sizes. 202 * @throws NoDataException if {@code xval} or {@code yval} has zero size. 203 * @throws NotFiniteNumberException if any of the arguments and values are 204 not finite real numbers. 205 * @throws NumberIsTooSmallException if the bandwidth is too small to 206 * accommodate the size of the input data (i.e. the bandwidth must be 207 * larger than 2/n). 208 * @since 2.1 209 */ 210 public final double[] smooth(final double[] xval, final double[] yval, 211 final double[] weights) 212 throws NonMonotonicSequenceException, 213 DimensionMismatchException, 214 NoDataException, 215 NotFiniteNumberException, 216 NumberIsTooSmallException { 217 if (xval.length != yval.length) { 218 throw new DimensionMismatchException(xval.length, yval.length); 219 } 220 221 final int n = xval.length; 222 223 if (n == 0) { 224 throw new NoDataException(); 225 } 226 227 NotFiniteNumberException.check(xval); 228 NotFiniteNumberException.check(yval); 229 NotFiniteNumberException.check(weights); 230 231 MathArrays.checkOrder(xval); 232 233 if (n == 1) { 234 return new double[]{yval[0]}; 235 } 236 237 if (n == 2) { 238 return new double[]{yval[0], yval[1]}; 239 } 240 241 int bandwidthInPoints = (int) (bandwidth * n); 242 243 if (bandwidthInPoints < 2) { 244 throw new NumberIsTooSmallException(LocalizedFormats.BANDWIDTH, 245 bandwidthInPoints, 2, true); 246 } 247 248 final double[] res = new double[n]; 249 250 final double[] residuals = new double[n]; 251 final double[] sortedResiduals = new double[n]; 252 253 final double[] robustnessWeights = new double[n]; 254 255 // Do an initial fit and 'robustnessIters' robustness iterations. 256 // This is equivalent to doing 'robustnessIters+1' robustness iterations 257 // starting with all robustness weights set to 1. 258 Arrays.fill(robustnessWeights, 1); 259 260 for (int iter = 0; iter <= robustnessIters; ++iter) { 261 final int[] bandwidthInterval = {0, bandwidthInPoints - 1}; 262 // At each x, compute a local weighted linear regression 263 for (int i = 0; i < n; ++i) { 264 final double x = xval[i]; 265 266 // Find out the interval of source points on which 267 // a regression is to be made. 268 if (i > 0) { 269 updateBandwidthInterval(xval, weights, i, bandwidthInterval); 270 } 271 272 final int ileft = bandwidthInterval[0]; 273 final int iright = bandwidthInterval[1]; 274 275 // Compute the point of the bandwidth interval that is 276 // farthest from x 277 final int edge; 278 if (xval[i] - xval[ileft] > xval[iright] - xval[i]) { 279 edge = ileft; 280 } else { 281 edge = iright; 282 } 283 284 // Compute a least-squares linear fit weighted by 285 // the product of robustness weights and the tricube 286 // weight function. 287 // See http://en.wikipedia.org/wiki/Linear_regression 288 // (section "Univariate linear case") 289 // and http://en.wikipedia.org/wiki/Weighted_least_squares 290 // (section "Weighted least squares") 291 double sumWeights = 0; 292 double sumX = 0; 293 double sumXSquared = 0; 294 double sumY = 0; 295 double sumXY = 0; 296 double denom = JdkMath.abs(1.0 / (xval[edge] - x)); 297 for (int k = ileft; k <= iright; ++k) { 298 final double xk = xval[k]; 299 final double yk = yval[k]; 300 final double dist = (k < i) ? x - xk : xk - x; 301 final double w = tricube(dist * denom) * robustnessWeights[k] * weights[k]; 302 final double xkw = xk * w; 303 sumWeights += w; 304 sumX += xkw; 305 sumXSquared += xk * xkw; 306 sumY += yk * w; 307 sumXY += yk * xkw; 308 } 309 310 final double meanX = sumX / sumWeights; 311 final double meanY = sumY / sumWeights; 312 final double meanXY = sumXY / sumWeights; 313 final double meanXSquared = sumXSquared / sumWeights; 314 315 final double beta; 316 if (JdkMath.sqrt(JdkMath.abs(meanXSquared - meanX * meanX)) < accuracy) { 317 beta = 0; 318 } else { 319 beta = (meanXY - meanX * meanY) / (meanXSquared - meanX * meanX); 320 } 321 322 final double alpha = meanY - beta * meanX; 323 324 res[i] = beta * x + alpha; 325 residuals[i] = JdkMath.abs(yval[i] - res[i]); 326 } 327 328 // No need to recompute the robustness weights at the last 329 // iteration, they won't be needed anymore 330 if (iter == robustnessIters) { 331 break; 332 } 333 334 // Recompute the robustness weights. 335 336 // Find the median residual. 337 // An arraycopy and a sort are completely tractable here, 338 // because the preceding loop is a lot more expensive 339 System.arraycopy(residuals, 0, sortedResiduals, 0, n); 340 Arrays.sort(sortedResiduals); 341 final double medianResidual = sortedResiduals[n / 2]; 342 343 if (JdkMath.abs(medianResidual) < accuracy) { 344 break; 345 } 346 347 for (int i = 0; i < n; ++i) { 348 final double arg = residuals[i] / (6 * medianResidual); 349 if (arg >= 1) { 350 robustnessWeights[i] = 0; 351 } else { 352 final double w = 1 - arg * arg; 353 robustnessWeights[i] = w * w; 354 } 355 } 356 } 357 358 return res; 359 } 360 361 /** 362 * Compute a loess fit on the data at the original abscissae. 363 * 364 * @param xval the arguments for the interpolation points 365 * @param yval the values for the interpolation points 366 * @return values of the loess fit at corresponding original abscissae 367 * @throws NonMonotonicSequenceException if {@code xval} not sorted in 368 * strictly increasing order. 369 * @throws DimensionMismatchException if {@code xval} and {@code yval} have 370 * different sizes. 371 * @throws NoDataException if {@code xval} or {@code yval} has zero size. 372 * @throws NotFiniteNumberException if any of the arguments and values are 373 * not finite real numbers. 374 * @throws NumberIsTooSmallException if the bandwidth is too small to 375 * accommodate the size of the input data (i.e. the bandwidth must be 376 * larger than 2/n). 377 */ 378 public final double[] smooth(final double[] xval, final double[] yval) 379 throws NonMonotonicSequenceException, 380 DimensionMismatchException, 381 NoDataException, 382 NotFiniteNumberException, 383 NumberIsTooSmallException { 384 if (xval.length != yval.length) { 385 throw new DimensionMismatchException(xval.length, yval.length); 386 } 387 388 final double[] unitWeights = new double[xval.length]; 389 Arrays.fill(unitWeights, 1.0); 390 391 return smooth(xval, yval, unitWeights); 392 } 393 394 /** 395 * Given an index interval into xval that embraces a certain number of 396 * points closest to {@code xval[i-1]}, update the interval so that it 397 * embraces the same number of points closest to {@code xval[i]}, 398 * ignoring zero weights. 399 * 400 * @param xval Arguments array. 401 * @param weights Weights array. 402 * @param i Index around which the new interval should be computed. 403 * @param bandwidthInterval a two-element array {left, right} such that: 404 * {@code (left==0 or xval[i] - xval[left-1] > xval[right] - xval[i])} 405 * and 406 * {@code (right==xval.length-1 or xval[right+1] - xval[i] > xval[i] - xval[left])}. 407 * The array will be updated. 408 */ 409 private static void updateBandwidthInterval(final double[] xval, 410 final double[] weights, 411 final int i, 412 final int[] bandwidthInterval) { 413 final int left = bandwidthInterval[0]; 414 final int right = bandwidthInterval[1]; 415 416 // The right edge should be adjusted if the next point to the right 417 // is closer to xval[i] than the leftmost point of the current interval 418 int nextRight = nextNonzero(weights, right); 419 int nextLeft = left; 420 while (nextRight < xval.length && 421 xval[nextRight] - xval[i] < xval[i] - xval[nextLeft]) { 422 nextLeft = nextNonzero(weights, bandwidthInterval[0]); 423 bandwidthInterval[0] = nextLeft; 424 bandwidthInterval[1] = nextRight; 425 nextRight = nextNonzero(weights, nextRight); 426 } 427 } 428 429 /** 430 * Return the smallest index {@code j} such that 431 * {@code j > i && (j == weights.length || weights[j] != 0)}. 432 * 433 * @param weights Weights array. 434 * @param i Index from which to start search. 435 * @return the smallest compliant index. 436 */ 437 private static int nextNonzero(final double[] weights, final int i) { 438 int j = i + 1; 439 while(j < weights.length && weights[j] == 0) { 440 ++j; 441 } 442 return j; 443 } 444 445 /** 446 * Compute the 447 * <a href="http://en.wikipedia.org/wiki/Local_regression#Weight_function">tricube</a> 448 * weight function. 449 * 450 * @param x Argument. 451 * @return <code>(1 - |x|<sup>3</sup>)<sup>3</sup></code> for |x| < 1, 0 otherwise. 452 */ 453 private static double tricube(final double x) { 454 final double absX = JdkMath.abs(x); 455 if (absX >= 1.0) { 456 return 0.0; 457 } 458 final double tmp = 1 - absX * absX * absX; 459 return tmp * tmp * tmp; 460 } 461 }