1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.commons.numbers.core; 18 19 /** 20 * Computes double-double floating-point operations. 21 * 22 * <p>This class contains extension methods to supplement the functionality in {@link DD}. 23 * The methods are tested in {@link DDTest}. These include: 24 * <ul> 25 * <li>Arithmetic operations that have 105+ bits of precision and are typically 2-3 bits more 26 * accurate than the versions in {@link DD}. 27 * <li>A power function based on {@link Math#pow(double, double)} with approximately 50+ bits of 28 * precision. 29 * </ul> 30 * 31 * <p><b>Note</b> 32 * 33 * <p>This class is public and has public methods to allow testing within the examples JMH module. 34 * 35 * @since 1.2 36 */ 37 public final class DDExt { 38 /** Threshold for large n where the Taylor series for (1+z)^n is not applicable. */ 39 private static final int LARGE_N = 100000000; 40 /** Threshold for (x, xx)^n where x=0.5 and low-part will not be sub-normal. 41 * Note x has an exponent of -1; xx of -54 (if normalized); the min normal exponent is -1022; 42 * add 10-bits headroom in case xx is below epsilon * x: 1022 - 54 - 10. 0.5^958 = 4.1e-289. */ 43 private static final int SAFE_EXPONENT_F = 958; 44 /** Threshold for (x, xx)^n where n ~ 2^31 and low-part will not be sub-normal. 45 * x ~ exp(log(2^-958) / 2^31). 46 * Note: floor(-958 * ln(2) / ln(nextDown(SAFE_F))) < 2^31. */ 47 private static final double SAFE_F = 0.9999996907846553; 48 /** Threshold for (x, xx)^n where x=2 and high-part is finite. 49 * For consistency we use 10-bits headroom down from max exponent 1023. 0.5^1013 = 8.78e304. */ 50 private static final int SAFE_EXPONENT_2F = 1013; 51 /** Threshold for (x, xx)^n where n ~ 2^31 and high-part is finite. 52 * x ~ exp(log(2^1013) / 2^31) 53 * Note: floor(1013 * ln(2) / ln(nextUp(SAFE_2F))) < 2^31. */ 54 private static final double SAFE_2F = 1.0000003269678954; 55 /** log(2) (20-digits). */ 56 private static final double LN2 = 0.69314718055994530941; 57 /** sqrt(0.5) == 1 / sqrt(2). */ 58 private static final double ROOT_HALF = 0.707106781186547524400; 59 /** The limit for safe multiplication of {@code x*y}, assuming values above 1. 60 * Used to maintain positive values during the power computation. */ 61 private static final double SAFE_MULTIPLY = 0x1.0p500; 62 /** Used to downscale values before multiplication. Downscaling of any value 63 * strictly above SAFE_MULTIPLY will be above 1 even including a double-double 64 * roundoff that lowers the magnitude. */ 65 private static final double SAFE_MULTIPLY_DOWNSCALE = 0x1.0p-500; 66 67 /** 68 * No instances. 69 */ 70 private DDExt() {} 71 72 /** 73 * Compute the sum of {@code x} and {@code y}. 74 * 75 * <p>This computes the same result as 76 * {@link #add(DD, DD) add(x, DD.of(y))}. 77 * 78 * <p>The performance is approximately 1.5-fold slower than {@link DD#add(double)}. 79 * 80 * @param x x. 81 * @param y y. 82 * @return the sum 83 */ 84 public static DD add(DD x, double y) { 85 return DD.accurateAdd(x.hi(), x.lo(), y); 86 } 87 88 /** 89 * Compute the sum of {@code x} and {@code y}. 90 * 91 * <p>The high-part of the result is within 1 ulp of the true sum {@code e}. 92 * The low-part of the result is within 1 ulp of the result of the high-part 93 * subtracted from the true sum {@code e - hi}. 94 * 95 * <p>The performance is approximately 2-fold slower than {@link DD#add(DD)}. 96 * 97 * @param x x. 98 * @param y y. 99 * @return the sum 100 */ 101 public static DD add(DD x, DD y) { 102 return DD.accurateAdd(x.hi(), x.lo(), y.hi(), y.lo()); 103 } 104 105 /** 106 * Compute the subtraction of {@code y} from {@code x}. 107 * 108 * <p>This computes the same result as 109 * {@link #add(DD, double) add(x, -y)}. 110 * 111 * @param x x. 112 * @param y y. 113 * @return the difference 114 */ 115 public static DD subtract(DD x, double y) { 116 return DD.accurateAdd(x.hi(), x.lo(), -y); 117 } 118 119 /** 120 * Compute the subtraction of {@code y} from {@code x}. 121 * 122 * <p>This computes the same result as 123 * {@link #add(DD, DD) add(x, y.negate())}. 124 * 125 * @param x x. 126 * @param y y. 127 * @return the difference 128 */ 129 public static DD subtract(DD x, DD y) { 130 return DD.accurateAdd(x.hi(), x.lo(), -y.hi(), -y.lo()); 131 } 132 133 /** 134 * Compute the multiplication product of {@code x} and {@code y}. 135 * 136 * <p>The computed result is within 0.5 eps of the exact result where eps is 2<sup>-106</sup>. 137 * 138 * <p>The performance is approximately 2.5-fold slower than {@link DD#multiply(double)}. 139 * 140 * @param x x. 141 * @param y y. 142 * @return the product 143 */ 144 public static DD multiply(DD x, double y) { 145 return accurateMultiply(x.hi(), x.lo(), y); 146 } 147 148 /** 149 * Compute the multiplication product of {@code (x, xx)} and {@code y}. 150 * 151 * <p>The computed result is within 0.5 eps of the exact result where eps is 2<sup>-106</sup>. 152 * 153 * <p>The performance is approximately 2.5-fold slower than {@link DD#multiply(double)}. 154 * 155 * @param x High part of x. 156 * @param xx Low part of x. 157 * @param y y. 158 * @return the product 159 */ 160 private static DD accurateMultiply(double x, double xx, double y) { 161 // For working see accurateMultiply(double x, double xx, double y, double yy) 162 163 final double xh = DD.highPart(x); 164 final double xl = x - xh; 165 final double xxh = DD.highPart(xx); 166 final double xxl = xx - xxh; 167 final double yh = DD.highPart(y); 168 final double yl = y - yh; 169 170 final double p00 = x * y; 171 final double q00 = DD.twoProductLow(xh, xl, yh, yl, p00); 172 final double p10 = xx * y; 173 final double q10 = DD.twoProductLow(xxh, xxl, yh, yl, p10); 174 175 // The code below collates the O(eps) terms with a round-off 176 // so O(eps^2) terms can be added to it. 177 178 final double s0 = p00; 179 // Sum (p10, q00) -> (s1, r2) Order(eps) 180 final double s1 = p10 + q00; 181 final double r2 = DD.twoSumLow(p10, q00, s1); 182 183 // Collect (s0, s1, r2 + q10) 184 final double u = s0 + s1; 185 final double v = DD.fastTwoSumLow(s0, s1, u); 186 return DD.fastTwoSum(u, r2 + q10 + v); 187 } 188 189 /** 190 * Compute the multiplication product of {@code x} and {@code y}. 191 * 192 * <p>The computed result is within 0.5 eps of the exact result where eps is 2<sup>-106</sup>. 193 * 194 * <p>The performance is approximately 4.5-fold slower than {@link DD#multiply(DD)}. 195 * 196 * @param x x. 197 * @param y y. 198 * @return the product 199 */ 200 public static DD multiply(DD x, DD y) { 201 return accurateMultiply(x.hi(), x.lo(), y.hi(), y.lo()); 202 } 203 204 /** 205 * Compute the multiplication product of {@code (x, xx)} and {@code (y, yy)}. 206 * 207 * <p>The computed result is within 0.5 eps of the exact result where eps is 2<sup>-106</sup>. 208 * 209 * <p>The performance is approximately 4.5-fold slower than {@link DD#multiply(DD)}. 210 * 211 * @param x High part of x. 212 * @param xx Low part of x. 213 * @param y High part of y. 214 * @param yy Low part of y. 215 * @return the product 216 */ 217 private static DD accurateMultiply(double x, double xx, double y, double yy) { 218 // double-double multiplication: 219 // (a0, a1) * (b0, b1) 220 // a x b ~ a0b0 O(1) term 221 // + a0b1 + a1b0 O(eps) terms 222 // + a1b1 O(eps^2) term 223 // Higher terms require two-prod if the round-off is <= O(eps^2). 224 // (pij,qij) = two-prod(ai, bj); pij = O(eps^i+j); qij = O(eps^i+j+1) 225 // p00 O(1) 226 // p01, p10, q00 O(eps) 227 // p11, q01, q10 O(eps^2) 228 // q11 O(eps^3) (not required for the first 106 bits) 229 // Sum terms of the same order. Carry round-off to lower order: 230 // s0 = p00 Order(1) 231 // Sum (p01, p10, q00) -> (s1, r2) Order(eps) 232 // Sum (p11, q01, q10, r2) -> s2 Order(eps^2) 233 234 final double xh = DD.highPart(x); 235 final double xl = x - xh; 236 final double xxh = DD.highPart(xx); 237 final double xxl = xx - xxh; 238 final double yh = DD.highPart(y); 239 final double yl = y - yh; 240 final double yyh = DD.highPart(yy); 241 final double yyl = yy - yyh; 242 243 final double p00 = x * y; 244 final double q00 = DD.twoProductLow(xh, xl, yh, yl, p00); 245 final double p01 = x * yy; 246 final double q01 = DD.twoProductLow(xh, xl, yyh, yyl, p01); 247 final double p10 = xx * y; 248 final double q10 = DD.twoProductLow(xxh, xxl, yh, yl, p10); 249 final double p11 = xx * yy; 250 251 // Note: Directly adding same order terms (error = 2 eps^2): 252 // DD.fastTwoSum(p00, (p11 + q01 + q10) + (p01 + p10 + q00)) 253 254 // The code below collates the O(eps) terms with a round-off 255 // so O(eps^2) terms can be added to it. 256 257 final double s0 = p00; 258 // Sum (p01, p10, q00) -> (s1, r2) Order(eps) 259 double u = p01 + p10; 260 double v = DD.twoSumLow(p01, p10, u); 261 final double s1 = q00 + u; 262 final double w = DD.twoSumLow(q00, u, s1); 263 final double r2 = v + w; 264 265 // Collect (s0, s1, r2 + p11 + q01 + q10) 266 u = s0 + s1; 267 v = DD.fastTwoSumLow(s0, s1, u); 268 return DD.fastTwoSum(u, r2 + p11 + q01 + q10 + v); 269 } 270 271 /** 272 * Compute the square of {@code x}. 273 * 274 * <p>The computed result is within 0.5 eps of the exact result where eps is 2<sup>-106</sup>. 275 * 276 * <p>The performance is approximately 4.5-fold slower than {@link DD#square()}. 277 * 278 * @param x x. 279 * @return the square 280 */ 281 public static DD square(DD x) { 282 return accurateSquare(x.hi(), x.lo()); 283 } 284 285 /** 286 * Compute the square of {@code (x, xx)}. 287 * 288 * <p>The computed result is within 0.5 eps of the exact result where eps is 2<sup>-106</sup>. 289 * 290 * <p>The performance is approximately 4.5-fold slower than {@link DD#square()}. 291 * 292 * @param x High part of x. 293 * @param xx Low part of x. 294 * @return the square 295 */ 296 private static DD accurateSquare(double x, double xx) { 297 // For working see accurateMultiply(double x, double xx, double y, double yy) 298 299 final double xh = DD.highPart(x); 300 final double xl = x - xh; 301 final double xxh = DD.highPart(xx); 302 final double xxl = xx - xxh; 303 304 final double p00 = x * x; 305 final double q00 = DD.twoSquareLow(xh, xl, p00); 306 final double p01 = x * xx; 307 final double q01 = DD.twoProductLow(xh, xl, xxh, xxl, p01); 308 final double p11 = xx * xx; 309 310 // The code below collates the O(eps) terms with a round-off 311 // so O(eps^2) terms can be added to it. 312 313 final double s0 = p00; 314 // Sum (p01, p10, q00) -> (s1, r2) Order(eps) 315 final double s1 = q00 + 2 * p01; 316 final double r2 = DD.twoSumLow(q00, 2 * p01, s1); 317 318 // Collect (s0, s1, r2 + p11 + q01 + q10) 319 final double u = s0 + s1; 320 final double v = DD.fastTwoSumLow(s0, s1, u); 321 return DD.fastTwoSum(u, r2 + p11 + 2 * q01 + v); 322 } 323 324 /** 325 * Compute the division of {@code x} by {@code y}. 326 * If {@code y = 0} the result is undefined. 327 * 328 * <p>The computed result is within 1 eps of the exact result where eps is 2<sup>-106</sup>. 329 * 330 * <p>The performance is approximately 1.25-fold slower than {@link DD#divide(DD)}. 331 * Note that division is an order of magnitude slower than multiplication and the 332 * absolute performance difference is significant. 333 * 334 * @param x x. 335 * @param y y. 336 * @return the quotient 337 */ 338 public static DD divide(DD x, DD y) { 339 return accurateDivide(x.hi(), x.lo(), y.hi(), y.lo()); 340 } 341 342 /** 343 * Compute the division of {@code (x, xx)} by {@code y}. 344 * If {@code y = 0} the result is undefined. 345 * 346 * <p>The computed result is within 1 eps of the exact result where eps is 2<sup>-106</sup>. 347 * 348 * <p>The performance is approximately 1.25-fold slower than {@link DD#divide(DD)}. 349 * Note that division is an order of magnitude slower than multiplication and the 350 * absolute performance difference is significant. 351 * 352 * @param x High part of x. 353 * @param xx Low part of x. 354 * @param y High part of y. 355 * @param yy Low part of y. 356 * @return the quotient 357 */ 358 private static DD accurateDivide(double x, double xx, double y, double yy) { 359 // Long division 360 // quotient q0 = x / y 361 final double q0 = x / y; 362 // remainder r0 = x - q0 * y 363 DD p = accurateMultiply(y, yy, q0); 364 DD r = DD.accurateAdd(x, xx, -p.hi(), -p.lo()); 365 // next quotient q1 = r0 / y 366 final double q1 = r.hi() / y; 367 // remainder r1 = r0 - q1 * y 368 p = accurateMultiply(y, yy, q1); 369 r = DD.accurateAdd(r.hi(), r.lo(), -p.hi(), -p.lo()); 370 // next quotient q2 = r1 / y 371 final double q2 = r.hi() / y; 372 // Collect (q0, q1, q2) 373 final DD q = DD.fastTwoSum(q0, q1); 374 return DD.twoSum(q.hi(), q.lo() + q2); 375 } 376 377 /** 378 * Compute the inverse of {@code y}. 379 * If {@code y = 0} the result is undefined. 380 * 381 * <p>The computed result is within 1 eps of the exact result where eps is 2<sup>-106</sup>. 382 * 383 * <p>The performance is approximately 2-fold slower than {@link DD#reciprocal()}. 384 * 385 * @param y y. 386 * @return the inverse 387 */ 388 public static DD reciprocal(DD y) { 389 return accurateReciprocal(y.hi(), y.lo()); 390 } 391 392 /** 393 * Compute the inverse of {@code (y, yy)}. 394 * If {@code y = 0} the result is undefined. 395 * 396 * <p>The computed result is within 1 eps of the exact result where eps is 2<sup>-106</sup>. 397 * 398 * <p>The performance is approximately 2-fold slower than {@link DD#reciprocal()}. 399 * 400 * @param y High part of y. 401 * @param yy Low part of y. 402 * @return the inverse 403 */ 404 private static DD accurateReciprocal(double y, double yy) { 405 // As per divide using (x, xx) = (1, 0) 406 // quotient q0 = x / y 407 final double q0 = 1 / y; 408 // remainder r0 = x - q0 * y 409 DD p = accurateMultiply(y, yy, q0); 410 // High accuracy add required 411 // This add saves 2 twoSum and 3 fastTwoSum (24 FLOPS) by ignoring the zero low part 412 DD r = DD.accurateAdd(-p.hi(), -p.lo(), 1); 413 // next quotient q1 = r0 / y 414 final double q1 = r.hi() / y; 415 // remainder r1 = r0 - q1 * y 416 p = accurateMultiply(y, yy, q1); 417 // accurateAdd not used as we do not need r1.xx() 418 r = DD.accurateAdd(r.hi(), r.lo(), -p.hi(), -p.lo()); 419 // next quotient q2 = r1 / y 420 final double q2 = r.hi() / y; 421 // Collect (q0, q1, q2) 422 final DD q = DD.fastTwoSum(q0, q1); 423 return DD.twoSum(q.hi(), q.lo() + q2); 424 } 425 426 /** 427 * Compute the square root of {@code x}. 428 * 429 * <p>Uses the result {@code Math.sqrt(x)} 430 * if that result is not a finite normalized {@code double}. 431 * 432 * <p>Special cases: 433 * <ul> 434 * <li>If {@code x} is NaN or less than zero, then the result is {@code (NaN, 0)}. 435 * <li>If {@code x} is positive infinity, then the result is {@code (+infinity, 0)}. 436 * <li>If {@code x} is positive zero or negative zero, then the result is {@code (x, 0)}. 437 * </ul> 438 * 439 * <p>The computed result is within 1 eps of the exact result where eps is 2<sup>-106</sup>. 440 * 441 * <p>The performance is approximately 5.5-fold slower than {@link DD#sqrt()}. 442 * 443 * @param x x. 444 * @return {@code sqrt(x)} 445 * @see Math#sqrt(double) 446 * @see Double#MIN_NORMAL 447 */ 448 public static DD sqrt(DD x) { 449 // Standard sqrt 450 final DD c = x.sqrt(); 451 452 // Here we support {negative, +infinity, nan and zero} edge cases. 453 // (This is the same condition as in DD.sqrt) 454 if (DD.isNotNormal(c.hi())) { 455 return c; 456 } 457 458 // Repeat Dekker's iteration from DD.sqrt with an accurate DD square. 459 // Using an accurate sum for cc does not improve accuracy. 460 final DD u = square(c); 461 final double cc = (x.hi() - u.hi() - u.lo() + x.lo()) * 0.5 / c.hi(); 462 return DD.fastTwoSum(c.hi(), c.lo() + cc); 463 } 464 465 /** 466 * Compute the number {@code x} raised to the power {@code n}. 467 * 468 * <p>This uses the powDSimple algorithm of van Mulbregt [1] which applies a Taylor series 469 * adjustment to the result of {@code x^n}: 470 * <pre> 471 * (x+xx)^n = x^n * (1 + xx/x)^n 472 * = x^n + x^n * (exp(n log(1 + xx/x)) - 1) 473 * </pre> 474 * 475 * <ol> 476 * <li> 477 * van Mulbregt, P. (2018). 478 * <a href="https://doi.org/10.48550/arxiv.1802.06966">Computing the Cumulative Distribution 479 * Function and Quantiles of the One-sided Kolmogorov-Smirnov Statistic</a> 480 * arxiv:1802.06966. 481 * </ol> 482 * 483 * @param x High part of x. 484 * @param xx Low part of x. 485 * @param n Power. 486 * @return the result 487 * @see Math#pow(double, double) 488 */ 489 public static DD simplePow(double x, double xx, int n) { 490 // Edge cases. These ignore (x, xx) = (+/-1, 0). The result is Math.pow(x, n). 491 if (n == 0) { 492 return DD.ONE; 493 } 494 // IEEE result for non-finite or zero 495 if (!Double.isFinite(x) || x == 0) { 496 return DD.of(Math.pow(x, n)); 497 } 498 // Here the number is non-zero finite 499 if (n < 0) { 500 DD r = computeSimplePow(x, xx, -1L * n); 501 // Safe inversion of small/large values. Reuse the existing multiply scaling factors. 502 // 1 / x = b * 1 / bx 503 if (Math.abs(r.hi()) < SAFE_MULTIPLY_DOWNSCALE) { 504 r = DD.of(r.hi() * SAFE_MULTIPLY, r.lo() * SAFE_MULTIPLY).reciprocal(); 505 final double hi = r.hi() * SAFE_MULTIPLY; 506 // Return signed zero by multiplication for infinite 507 final double lo = r.lo() * (Double.isInfinite(hi) ? 0 : SAFE_MULTIPLY); 508 return DD.of(hi, lo); 509 } 510 if (Math.abs(r.hi()) > SAFE_MULTIPLY) { 511 r = DD.of(r.hi() * SAFE_MULTIPLY_DOWNSCALE, r.lo() * SAFE_MULTIPLY_DOWNSCALE).reciprocal(); 512 final double hi = r.hi() * SAFE_MULTIPLY_DOWNSCALE; 513 final double lo = r.lo() * SAFE_MULTIPLY_DOWNSCALE; 514 return DD.of(hi, lo); 515 } 516 return r.reciprocal(); 517 } 518 return computeSimplePow(x, xx, n); 519 } 520 521 /** 522 * Compute the number {@code x} raised to the power {@code n} (must be strictly positive). 523 * 524 * <p>This method exists to allow negation of the power when it is {@link Integer#MIN_VALUE} 525 * by casting to a long. It is called directly by simplePow and computeSimplePowScaled 526 * when the arguments have already been validated. 527 * 528 * @param x High part of x. 529 * @param xx Low part of x. 530 * @param n Power (must be positive). 531 * @return the result 532 */ 533 private static DD computeSimplePow(double x, double xx, long n) { 534 final double y = Math.pow(x, n); 535 final double z = xx / x; 536 // Taylor series: (1 + z)^n = n*z * (1 + ((n-1)*z/2)) 537 // Applicable when n*z is small. 538 // Assume xx < epsilon * x. 539 // n > 1e8 => n * xx/x > 1e8 * xx/x == n*z > 1e8 * 1e-16 > 1e-8 540 double w; 541 if (n > LARGE_N) { 542 w = Math.expm1(n * Math.log1p(z)); 543 } else { 544 w = n * z * (1 + (n - 1) * z * 0.5); 545 } 546 // w ~ (1+z)^n : z ~ 2^-53 547 // Math.pow(1 + 2 * Math.ulp(1.0), 2^31) ~ 1.0000000000000129 548 // Math.pow(1 - 2 * Math.ulp(1.0), 2^31) ~ 0.9999999999999871 549 // If (x, xx) is normalized a fast-two-sum can be used. 550 // fast-two-sum propagates sign changes for input of (+/-1.0, +/-0.0) (two-sum does not). 551 return DD.fastTwoSum(y, y * w); 552 } 553 554 /** 555 * Compute the number {@code x} raised to the power {@code n}. 556 * 557 * <p>The value is returned as fractional {@code f} and integral 558 * {@code 2^exp} components. 559 * <pre> 560 * (x+xx)^n = (f+ff) * 2^exp 561 * </pre> 562 * 563 * <p>The combined fractional part (f, ff) is in the range {@code [0.5, 1)}. 564 * 565 * <p>Special cases: 566 * 567 * <ul> 568 * <li>If {@code (x, xx)} is zero the high part of the fractional part is 569 * computed using {@link Math#pow(double, double) Math.pow(x, n)} and the exponent is 0. 570 * <li>If {@code n = 0} the fractional part is 0.5 and the exponent is 1. 571 * <li>If {@code (x, xx)} is an exact power of 2 the fractional part is 0.5 and the exponent 572 * is the power of 2 minus 1. 573 * <li>If the result high-part is an exact power of 2 and the low-part has an opposite 574 * signed non-zero magnitude then the fraction high-part {@code f} will be {@code +/-1} such that 575 * the double-double number is in the range {@code [0.5, 1)}. 576 * <li>If the argument is not finite then a fractional representation is not possible. 577 * In this case the fraction and the scale factor is undefined. 578 * </ul> 579 * 580 * @param x High part of x. 581 * @param xx Low part of x. 582 * @param n Power. 583 * @param exp Power of two scale factor (integral exponent). 584 * @return Fraction part. 585 * @see #simplePow(double, double, int) 586 * @see DD#frexp(int[]) 587 */ 588 public static DD simplePowScaled(double x, double xx, int n, long[] exp) { 589 // Edge cases. 590 if (n == 0) { 591 exp[0] = 1; 592 return DD.of(0.5); 593 } 594 // IEEE result for non-finite or zero 595 if (!Double.isFinite(x) || x == 0) { 596 exp[0] = 0; 597 return DD.of(Math.pow(x, n)); 598 } 599 // Here the number is non-zero finite 600 final int[] ie = {0}; 601 DD f = DD.of(x, xx).frexp(ie); 602 final long b = ie[0]; 603 if (n < 0) { 604 f = computeSimplePowScaled(b, f.hi(), f.lo(), -1L * n, exp); 605 // Result is a non-zero fraction part so inversion is safe 606 f = f.reciprocal(); 607 // Rescale to [0.5, 1.0) 608 f = f.frexp(ie); 609 exp[0] = ie[0] - exp[0]; 610 return f; 611 } 612 return computeSimplePowScaled(b, f.hi(), f.lo(), n, exp); 613 } 614 615 /** 616 * Compute the number {@code x} (non-zero finite) raised to the power {@code n} (must be strictly positive). 617 * 618 * <p>This method exists to allow negation of the power when it is {@link Integer#MIN_VALUE} 619 * by casting to a long. By using a fractional representation for the argument 620 * the recursive calls avoid a step to normalise the input. 621 * 622 * @param bx Integral component 2^bx of x. 623 * @param x Fractional high part of x. 624 * @param xx Fractional low part of x. 625 * @param n Power (in [1, 2^31]). 626 * @param exp Power of two scale factor (integral exponent). 627 * @return Fraction part. 628 */ 629 private static DD computeSimplePowScaled(long bx, double x, double xx, long n, long[] exp) { 630 // By normalising x we can break apart the power to avoid over/underflow: 631 // x^n = (f * 2^b)^n = 2^bn * f^n 632 long b = bx; 633 double f0 = x; 634 double f1 = xx; 635 636 // Minimise the amount we have to decompose the power. This is done 637 // using either f (<=1) or 2f (>=1) as the fractional representation, 638 // based on which can use a larger exponent without over/underflow. 639 // We approximate the power as 2^b and require a result with the 640 // smallest absolute b. An additional consideration is the low-part ff 641 // which sets a more conservative underflow limit: 642 // f^n = 2^(-b+53) => b = -n log2(f) - 53 643 // (2f)^n = 2^n*f^n = 2^b => b = n log2(f) + n 644 // Switch-over point for f is at: 645 // -n log2(f) - 53 = n log2(f) + n 646 // 2n log2(f) = -53 - n 647 // f = 2^(-53/2n) * 2^(-1/2) 648 // Avoid a power computation to find the threshold by dropping the first term: 649 // f = 2^(-1/2) = 1/sqrt(2) = sqrt(0.5) = 0.707 650 // This will bias towards choosing f even when (2f)^n would not overflow. 651 // It allows the same safe exponent to be used for both cases. 652 653 // Safe maximum for exponentiation. 654 long m; 655 double af = Math.abs(f0); 656 if (af < ROOT_HALF) { 657 // Choose 2f. 658 // This case will handle (x, xx) = (1, 0) in a single power operation 659 f0 *= 2; 660 f1 *= 2; 661 af *= 2; 662 b -= 1; 663 if (n <= SAFE_EXPONENT_2F || af <= SAFE_2F) { 664 m = n; 665 } else { 666 // f^m < 2^1013 667 // m ~ 1013 / log2(f) 668 m = Math.max(SAFE_EXPONENT_2F, (long) (SAFE_EXPONENT_2F * LN2 / Math.log(af))); 669 } 670 } else { 671 // Choose f 672 if (n <= SAFE_EXPONENT_F || af >= SAFE_F) { 673 m = n; 674 } else { 675 // f^m > 2^-958 676 // m ~ -958 / log2(f) 677 m = Math.max(SAFE_EXPONENT_F, (long) (-SAFE_EXPONENT_F * LN2 / Math.log(af))); 678 } 679 } 680 681 DD f; 682 final int[] expi = {0}; 683 684 if (n <= m) { 685 f = computeSimplePow(f0, f1, n); 686 f = f.frexp(expi); 687 exp[0] = b * n + expi[0]; 688 return f; 689 } 690 691 // Decompose the power function. 692 // quotient q = n / m 693 // remainder r = n % m 694 // f^n = (f^m)^(n/m) * f^(n%m) 695 696 final long q = n / m; 697 final long r = n % m; 698 // (f^m) 699 // m is safe and > 1 700 f = computeSimplePow(f0, f1, m); 701 f = f.frexp(expi); 702 long qb = expi[0]; 703 // (f^m)^(n/m) 704 // q is non-zero but may be 1 705 if (q > 1) { 706 // full simple-pow to ensure safe exponentiation 707 f = computeSimplePowScaled(qb, f.hi(), f.lo(), q, exp); 708 qb = exp[0]; 709 } 710 // f^(n%m) 711 // r may be zero or one which do not require another power 712 if (r == 0) { 713 f = f.frexp(expi); 714 exp[0] = b * n + qb + expi[0]; 715 return f; 716 } 717 if (r == 1) { 718 f = f.multiply(DD.of(f0, f1)); 719 f = f.frexp(expi); 720 exp[0] = b * n + qb + expi[0]; 721 return f; 722 } 723 // Here r is safe 724 final DD t = f; 725 f = computeSimplePow(f0, f1, r); 726 f = f.frexp(expi); 727 final long rb = expi[0]; 728 // (f^m)^(n/m) * f^(n%m) 729 f = f.multiply(t); 730 // 2^bn * (f^m)^(n/m) * f^(n%m) 731 f = f.frexp(expi); 732 exp[0] = b * n + qb + rb + expi[0]; 733 return f; 734 } 735 }