1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.numbers.core;
18
19 /**
20 * Computes double-double floating-point operations.
21 *
22 * <p>This class contains extension methods to supplement the functionality in {@link DD}.
23 * The methods are tested in {@link DDTest}. These include:
24 * <ul>
25 * <li>Arithmetic operations that have 105+ bits of precision and are typically 2-3 bits more
26 * accurate than the versions in {@link DD}.
27 * <li>A power function based on {@link Math#pow(double, double)} with approximately 50+ bits of
28 * precision.
29 * </ul>
30 *
31 * <p><b>Note</b>
32 *
33 * <p>This class is public and has public methods to allow testing within the examples JMH module.
34 *
35 * @since 1.2
36 */
37 public final class DDExt {
38 /** Threshold for large n where the Taylor series for (1+z)^n is not applicable. */
39 private static final int LARGE_N = 100000000;
40 /** Threshold for (x, xx)^n where x=0.5 and low-part will not be sub-normal.
41 * Note x has an exponent of -1; xx of -54 (if normalized); the min normal exponent is -1022;
42 * add 10-bits headroom in case xx is below epsilon * x: 1022 - 54 - 10. 0.5^958 = 4.1e-289. */
43 private static final int SAFE_EXPONENT_F = 958;
44 /** Threshold for (x, xx)^n where n ~ 2^31 and low-part will not be sub-normal.
45 * x ~ exp(log(2^-958) / 2^31).
46 * Note: floor(-958 * ln(2) / ln(nextDown(SAFE_F))) < 2^31. */
47 private static final double SAFE_F = 0.9999996907846553;
48 /** Threshold for (x, xx)^n where x=2 and high-part is finite.
49 * For consistency we use 10-bits headroom down from max exponent 1023. 0.5^1013 = 8.78e304. */
50 private static final int SAFE_EXPONENT_2F = 1013;
51 /** Threshold for (x, xx)^n where n ~ 2^31 and high-part is finite.
52 * x ~ exp(log(2^1013) / 2^31)
53 * Note: floor(1013 * ln(2) / ln(nextUp(SAFE_2F))) < 2^31. */
54 private static final double SAFE_2F = 1.0000003269678954;
55 /** log(2) (20-digits). */
56 private static final double LN2 = 0.69314718055994530941;
57 /** sqrt(0.5) == 1 / sqrt(2). */
58 private static final double ROOT_HALF = 0.707106781186547524400;
59 /** The limit for safe multiplication of {@code x*y}, assuming values above 1.
60 * Used to maintain positive values during the power computation. */
61 private static final double SAFE_MULTIPLY = 0x1.0p500;
62 /** Used to downscale values before multiplication. Downscaling of any value
63 * strictly above SAFE_MULTIPLY will be above 1 even including a double-double
64 * roundoff that lowers the magnitude. */
65 private static final double SAFE_MULTIPLY_DOWNSCALE = 0x1.0p-500;
66
67 /**
68 * No instances.
69 */
70 private DDExt() {}
71
72 /**
73 * Compute the sum of {@code x} and {@code y}.
74 *
75 * <p>This computes the same result as
76 * {@link #add(DD, DD) add(x, DD.of(y))}.
77 *
78 * <p>The performance is approximately 1.5-fold slower than {@link DD#add(double)}.
79 *
80 * @param x x.
81 * @param y y.
82 * @return the sum
83 */
84 public static DD add(DD x, double y) {
85 return DD.accurateAdd(x.hi(), x.lo(), y);
86 }
87
88 /**
89 * Compute the sum of {@code x} and {@code y}.
90 *
91 * <p>The high-part of the result is within 1 ulp of the true sum {@code e}.
92 * The low-part of the result is within 1 ulp of the result of the high-part
93 * subtracted from the true sum {@code e - hi}.
94 *
95 * <p>The performance is approximately 2-fold slower than {@link DD#add(DD)}.
96 *
97 * @param x x.
98 * @param y y.
99 * @return the sum
100 */
101 public static DD add(DD x, DD y) {
102 return DD.accurateAdd(x.hi(), x.lo(), y.hi(), y.lo());
103 }
104
105 /**
106 * Compute the subtraction of {@code y} from {@code x}.
107 *
108 * <p>This computes the same result as
109 * {@link #add(DD, double) add(x, -y)}.
110 *
111 * @param x x.
112 * @param y y.
113 * @return the difference
114 */
115 public static DD subtract(DD x, double y) {
116 return DD.accurateAdd(x.hi(), x.lo(), -y);
117 }
118
119 /**
120 * Compute the subtraction of {@code y} from {@code x}.
121 *
122 * <p>This computes the same result as
123 * {@link #add(DD, DD) add(x, y.negate())}.
124 *
125 * @param x x.
126 * @param y y.
127 * @return the difference
128 */
129 public static DD subtract(DD x, DD y) {
130 return DD.accurateAdd(x.hi(), x.lo(), -y.hi(), -y.lo());
131 }
132
133 /**
134 * Compute the multiplication product of {@code x} and {@code y}.
135 *
136 * <p>The computed result is within 0.5 eps of the exact result where eps is 2<sup>-106</sup>.
137 *
138 * <p>The performance is approximately 2.5-fold slower than {@link DD#multiply(double)}.
139 *
140 * @param x x.
141 * @param y y.
142 * @return the product
143 */
144 public static DD multiply(DD x, double y) {
145 return accurateMultiply(x.hi(), x.lo(), y);
146 }
147
148 /**
149 * Compute the multiplication product of {@code (x, xx)} and {@code y}.
150 *
151 * <p>The computed result is within 0.5 eps of the exact result where eps is 2<sup>-106</sup>.
152 *
153 * <p>The performance is approximately 2.5-fold slower than {@link DD#multiply(double)}.
154 *
155 * @param x High part of x.
156 * @param xx Low part of x.
157 * @param y y.
158 * @return the product
159 */
160 private static DD accurateMultiply(double x, double xx, double y) {
161 // For working see accurateMultiply(double x, double xx, double y, double yy)
162
163 final double xh = DD.highPart(x);
164 final double xl = x - xh;
165 final double xxh = DD.highPart(xx);
166 final double xxl = xx - xxh;
167 final double yh = DD.highPart(y);
168 final double yl = y - yh;
169
170 final double p00 = x * y;
171 final double q00 = DD.twoProductLow(xh, xl, yh, yl, p00);
172 final double p10 = xx * y;
173 final double q10 = DD.twoProductLow(xxh, xxl, yh, yl, p10);
174
175 // The code below collates the O(eps) terms with a round-off
176 // so O(eps^2) terms can be added to it.
177
178 final double s0 = p00;
179 // Sum (p10, q00) -> (s1, r2) Order(eps)
180 final double s1 = p10 + q00;
181 final double r2 = DD.twoSumLow(p10, q00, s1);
182
183 // Collect (s0, s1, r2 + q10)
184 final double u = s0 + s1;
185 final double v = DD.fastTwoSumLow(s0, s1, u);
186 return DD.fastTwoSum(u, r2 + q10 + v);
187 }
188
189 /**
190 * Compute the multiplication product of {@code x} and {@code y}.
191 *
192 * <p>The computed result is within 0.5 eps of the exact result where eps is 2<sup>-106</sup>.
193 *
194 * <p>The performance is approximately 4.5-fold slower than {@link DD#multiply(DD)}.
195 *
196 * @param x x.
197 * @param y y.
198 * @return the product
199 */
200 public static DD multiply(DD x, DD y) {
201 return accurateMultiply(x.hi(), x.lo(), y.hi(), y.lo());
202 }
203
204 /**
205 * Compute the multiplication product of {@code (x, xx)} and {@code (y, yy)}.
206 *
207 * <p>The computed result is within 0.5 eps of the exact result where eps is 2<sup>-106</sup>.
208 *
209 * <p>The performance is approximately 4.5-fold slower than {@link DD#multiply(DD)}.
210 *
211 * @param x High part of x.
212 * @param xx Low part of x.
213 * @param y High part of y.
214 * @param yy Low part of y.
215 * @return the product
216 */
217 private static DD accurateMultiply(double x, double xx, double y, double yy) {
218 // double-double multiplication:
219 // (a0, a1) * (b0, b1)
220 // a x b ~ a0b0 O(1) term
221 // + a0b1 + a1b0 O(eps) terms
222 // + a1b1 O(eps^2) term
223 // Higher terms require two-prod if the round-off is <= O(eps^2).
224 // (pij,qij) = two-prod(ai, bj); pij = O(eps^i+j); qij = O(eps^i+j+1)
225 // p00 O(1)
226 // p01, p10, q00 O(eps)
227 // p11, q01, q10 O(eps^2)
228 // q11 O(eps^3) (not required for the first 106 bits)
229 // Sum terms of the same order. Carry round-off to lower order:
230 // s0 = p00 Order(1)
231 // Sum (p01, p10, q00) -> (s1, r2) Order(eps)
232 // Sum (p11, q01, q10, r2) -> s2 Order(eps^2)
233
234 final double xh = DD.highPart(x);
235 final double xl = x - xh;
236 final double xxh = DD.highPart(xx);
237 final double xxl = xx - xxh;
238 final double yh = DD.highPart(y);
239 final double yl = y - yh;
240 final double yyh = DD.highPart(yy);
241 final double yyl = yy - yyh;
242
243 final double p00 = x * y;
244 final double q00 = DD.twoProductLow(xh, xl, yh, yl, p00);
245 final double p01 = x * yy;
246 final double q01 = DD.twoProductLow(xh, xl, yyh, yyl, p01);
247 final double p10 = xx * y;
248 final double q10 = DD.twoProductLow(xxh, xxl, yh, yl, p10);
249 final double p11 = xx * yy;
250
251 // Note: Directly adding same order terms (error = 2 eps^2):
252 // DD.fastTwoSum(p00, (p11 + q01 + q10) + (p01 + p10 + q00))
253
254 // The code below collates the O(eps) terms with a round-off
255 // so O(eps^2) terms can be added to it.
256
257 final double s0 = p00;
258 // Sum (p01, p10, q00) -> (s1, r2) Order(eps)
259 double u = p01 + p10;
260 double v = DD.twoSumLow(p01, p10, u);
261 final double s1 = q00 + u;
262 final double w = DD.twoSumLow(q00, u, s1);
263 final double r2 = v + w;
264
265 // Collect (s0, s1, r2 + p11 + q01 + q10)
266 u = s0 + s1;
267 v = DD.fastTwoSumLow(s0, s1, u);
268 return DD.fastTwoSum(u, r2 + p11 + q01 + q10 + v);
269 }
270
271 /**
272 * Compute the square of {@code x}.
273 *
274 * <p>The computed result is within 0.5 eps of the exact result where eps is 2<sup>-106</sup>.
275 *
276 * <p>The performance is approximately 4.5-fold slower than {@link DD#square()}.
277 *
278 * @param x x.
279 * @return the square
280 */
281 public static DD square(DD x) {
282 return accurateSquare(x.hi(), x.lo());
283 }
284
285 /**
286 * Compute the square of {@code (x, xx)}.
287 *
288 * <p>The computed result is within 0.5 eps of the exact result where eps is 2<sup>-106</sup>.
289 *
290 * <p>The performance is approximately 4.5-fold slower than {@link DD#square()}.
291 *
292 * @param x High part of x.
293 * @param xx Low part of x.
294 * @return the square
295 */
296 private static DD accurateSquare(double x, double xx) {
297 // For working see accurateMultiply(double x, double xx, double y, double yy)
298
299 final double xh = DD.highPart(x);
300 final double xl = x - xh;
301 final double xxh = DD.highPart(xx);
302 final double xxl = xx - xxh;
303
304 final double p00 = x * x;
305 final double q00 = DD.twoSquareLow(xh, xl, p00);
306 final double p01 = x * xx;
307 final double q01 = DD.twoProductLow(xh, xl, xxh, xxl, p01);
308 final double p11 = xx * xx;
309
310 // The code below collates the O(eps) terms with a round-off
311 // so O(eps^2) terms can be added to it.
312
313 final double s0 = p00;
314 // Sum (p01, p10, q00) -> (s1, r2) Order(eps)
315 final double s1 = q00 + 2 * p01;
316 final double r2 = DD.twoSumLow(q00, 2 * p01, s1);
317
318 // Collect (s0, s1, r2 + p11 + q01 + q10)
319 final double u = s0 + s1;
320 final double v = DD.fastTwoSumLow(s0, s1, u);
321 return DD.fastTwoSum(u, r2 + p11 + 2 * q01 + v);
322 }
323
324 /**
325 * Compute the division of {@code x} by {@code y}.
326 * If {@code y = 0} the result is undefined.
327 *
328 * <p>The computed result is within 1 eps of the exact result where eps is 2<sup>-106</sup>.
329 *
330 * <p>The performance is approximately 1.25-fold slower than {@link DD#divide(DD)}.
331 * Note that division is an order of magnitude slower than multiplication and the
332 * absolute performance difference is significant.
333 *
334 * @param x x.
335 * @param y y.
336 * @return the quotient
337 */
338 public static DD divide(DD x, DD y) {
339 return accurateDivide(x.hi(), x.lo(), y.hi(), y.lo());
340 }
341
342 /**
343 * Compute the division of {@code (x, xx)} by {@code y}.
344 * If {@code y = 0} the result is undefined.
345 *
346 * <p>The computed result is within 1 eps of the exact result where eps is 2<sup>-106</sup>.
347 *
348 * <p>The performance is approximately 1.25-fold slower than {@link DD#divide(DD)}.
349 * Note that division is an order of magnitude slower than multiplication and the
350 * absolute performance difference is significant.
351 *
352 * @param x High part of x.
353 * @param xx Low part of x.
354 * @param y High part of y.
355 * @param yy Low part of y.
356 * @return the quotient
357 */
358 private static DD accurateDivide(double x, double xx, double y, double yy) {
359 // Long division
360 // quotient q0 = x / y
361 final double q0 = x / y;
362 // remainder r0 = x - q0 * y
363 DD p = accurateMultiply(y, yy, q0);
364 DD r = DD.accurateAdd(x, xx, -p.hi(), -p.lo());
365 // next quotient q1 = r0 / y
366 final double q1 = r.hi() / y;
367 // remainder r1 = r0 - q1 * y
368 p = accurateMultiply(y, yy, q1);
369 r = DD.accurateAdd(r.hi(), r.lo(), -p.hi(), -p.lo());
370 // next quotient q2 = r1 / y
371 final double q2 = r.hi() / y;
372 // Collect (q0, q1, q2)
373 final DD q = DD.fastTwoSum(q0, q1);
374 return DD.twoSum(q.hi(), q.lo() + q2);
375 }
376
377 /**
378 * Compute the inverse of {@code y}.
379 * If {@code y = 0} the result is undefined.
380 *
381 * <p>The computed result is within 1 eps of the exact result where eps is 2<sup>-106</sup>.
382 *
383 * <p>The performance is approximately 2-fold slower than {@link DD#reciprocal()}.
384 *
385 * @param y y.
386 * @return the inverse
387 */
388 public static DD reciprocal(DD y) {
389 return accurateReciprocal(y.hi(), y.lo());
390 }
391
392 /**
393 * Compute the inverse of {@code (y, yy)}.
394 * If {@code y = 0} the result is undefined.
395 *
396 * <p>The computed result is within 1 eps of the exact result where eps is 2<sup>-106</sup>.
397 *
398 * <p>The performance is approximately 2-fold slower than {@link DD#reciprocal()}.
399 *
400 * @param y High part of y.
401 * @param yy Low part of y.
402 * @return the inverse
403 */
404 private static DD accurateReciprocal(double y, double yy) {
405 // As per divide using (x, xx) = (1, 0)
406 // quotient q0 = x / y
407 final double q0 = 1 / y;
408 // remainder r0 = x - q0 * y
409 DD p = accurateMultiply(y, yy, q0);
410 // High accuracy add required
411 // This add saves 2 twoSum and 3 fastTwoSum (24 FLOPS) by ignoring the zero low part
412 DD r = DD.accurateAdd(-p.hi(), -p.lo(), 1);
413 // next quotient q1 = r0 / y
414 final double q1 = r.hi() / y;
415 // remainder r1 = r0 - q1 * y
416 p = accurateMultiply(y, yy, q1);
417 // accurateAdd not used as we do not need r1.xx()
418 r = DD.accurateAdd(r.hi(), r.lo(), -p.hi(), -p.lo());
419 // next quotient q2 = r1 / y
420 final double q2 = r.hi() / y;
421 // Collect (q0, q1, q2)
422 final DD q = DD.fastTwoSum(q0, q1);
423 return DD.twoSum(q.hi(), q.lo() + q2);
424 }
425
426 /**
427 * Compute the square root of {@code x}.
428 *
429 * <p>Uses the result {@code Math.sqrt(x)}
430 * if that result is not a finite normalized {@code double}.
431 *
432 * <p>Special cases:
433 * <ul>
434 * <li>If {@code x} is NaN or less than zero, then the result is {@code (NaN, 0)}.
435 * <li>If {@code x} is positive infinity, then the result is {@code (+infinity, 0)}.
436 * <li>If {@code x} is positive zero or negative zero, then the result is {@code (x, 0)}.
437 * </ul>
438 *
439 * <p>The computed result is within 1 eps of the exact result where eps is 2<sup>-106</sup>.
440 *
441 * <p>The performance is approximately 5.5-fold slower than {@link DD#sqrt()}.
442 *
443 * @param x x.
444 * @return {@code sqrt(x)}
445 * @see Math#sqrt(double)
446 * @see Double#MIN_NORMAL
447 */
448 public static DD sqrt(DD x) {
449 // Standard sqrt
450 final DD c = x.sqrt();
451
452 // Here we support {negative, +infinity, nan and zero} edge cases.
453 // (This is the same condition as in DD.sqrt)
454 if (DD.isNotNormal(c.hi())) {
455 return c;
456 }
457
458 // Repeat Dekker's iteration from DD.sqrt with an accurate DD square.
459 // Using an accurate sum for cc does not improve accuracy.
460 final DD u = square(c);
461 final double cc = (x.hi() - u.hi() - u.lo() + x.lo()) * 0.5 / c.hi();
462 return DD.fastTwoSum(c.hi(), c.lo() + cc);
463 }
464
465 /**
466 * Compute the number {@code x} raised to the power {@code n}.
467 *
468 * <p>This uses the powDSimple algorithm of van Mulbregt [1] which applies a Taylor series
469 * adjustment to the result of {@code x^n}:
470 * <pre>
471 * (x+xx)^n = x^n * (1 + xx/x)^n
472 * = x^n + x^n * (exp(n log(1 + xx/x)) - 1)
473 * </pre>
474 *
475 * <ol>
476 * <li>
477 * van Mulbregt, P. (2018).
478 * <a href="https://doi.org/10.48550/arxiv.1802.06966">Computing the Cumulative Distribution
479 * Function and Quantiles of the One-sided Kolmogorov-Smirnov Statistic</a>
480 * arxiv:1802.06966.
481 * </ol>
482 *
483 * @param x High part of x.
484 * @param xx Low part of x.
485 * @param n Power.
486 * @return the result
487 * @see Math#pow(double, double)
488 */
489 public static DD simplePow(double x, double xx, int n) {
490 // Edge cases. These ignore (x, xx) = (+/-1, 0). The result is Math.pow(x, n).
491 if (n == 0) {
492 return DD.ONE;
493 }
494 // IEEE result for non-finite or zero
495 if (!Double.isFinite(x) || x == 0) {
496 return DD.of(Math.pow(x, n));
497 }
498 // Here the number is non-zero finite
499 if (n < 0) {
500 DD r = computeSimplePow(x, xx, -1L * n);
501 // Safe inversion of small/large values. Reuse the existing multiply scaling factors.
502 // 1 / x = b * 1 / bx
503 if (Math.abs(r.hi()) < SAFE_MULTIPLY_DOWNSCALE) {
504 r = DD.of(r.hi() * SAFE_MULTIPLY, r.lo() * SAFE_MULTIPLY).reciprocal();
505 final double hi = r.hi() * SAFE_MULTIPLY;
506 // Return signed zero by multiplication for infinite
507 final double lo = r.lo() * (Double.isInfinite(hi) ? 0 : SAFE_MULTIPLY);
508 return DD.of(hi, lo);
509 }
510 if (Math.abs(r.hi()) > SAFE_MULTIPLY) {
511 r = DD.of(r.hi() * SAFE_MULTIPLY_DOWNSCALE, r.lo() * SAFE_MULTIPLY_DOWNSCALE).reciprocal();
512 final double hi = r.hi() * SAFE_MULTIPLY_DOWNSCALE;
513 final double lo = r.lo() * SAFE_MULTIPLY_DOWNSCALE;
514 return DD.of(hi, lo);
515 }
516 return r.reciprocal();
517 }
518 return computeSimplePow(x, xx, n);
519 }
520
521 /**
522 * Compute the number {@code x} raised to the power {@code n} (must be strictly positive).
523 *
524 * <p>This method exists to allow negation of the power when it is {@link Integer#MIN_VALUE}
525 * by casting to a long. It is called directly by simplePow and computeSimplePowScaled
526 * when the arguments have already been validated.
527 *
528 * @param x High part of x.
529 * @param xx Low part of x.
530 * @param n Power (must be positive).
531 * @return the result
532 */
533 private static DD computeSimplePow(double x, double xx, long n) {
534 final double y = Math.pow(x, n);
535 final double z = xx / x;
536 // Taylor series: (1 + z)^n = n*z * (1 + ((n-1)*z/2))
537 // Applicable when n*z is small.
538 // Assume xx < epsilon * x.
539 // n > 1e8 => n * xx/x > 1e8 * xx/x == n*z > 1e8 * 1e-16 > 1e-8
540 double w;
541 if (n > LARGE_N) {
542 w = Math.expm1(n * Math.log1p(z));
543 } else {
544 w = n * z * (1 + (n - 1) * z * 0.5);
545 }
546 // w ~ (1+z)^n : z ~ 2^-53
547 // Math.pow(1 + 2 * Math.ulp(1.0), 2^31) ~ 1.0000000000000129
548 // Math.pow(1 - 2 * Math.ulp(1.0), 2^31) ~ 0.9999999999999871
549 // If (x, xx) is normalized a fast-two-sum can be used.
550 // fast-two-sum propagates sign changes for input of (+/-1.0, +/-0.0) (two-sum does not).
551 return DD.fastTwoSum(y, y * w);
552 }
553
554 /**
555 * Compute the number {@code x} raised to the power {@code n}.
556 *
557 * <p>The value is returned as fractional {@code f} and integral
558 * {@code 2^exp} components.
559 * <pre>
560 * (x+xx)^n = (f+ff) * 2^exp
561 * </pre>
562 *
563 * <p>The combined fractional part (f, ff) is in the range {@code [0.5, 1)}.
564 *
565 * <p>Special cases:
566 *
567 * <ul>
568 * <li>If {@code (x, xx)} is zero the high part of the fractional part is
569 * computed using {@link Math#pow(double, double) Math.pow(x, n)} and the exponent is 0.
570 * <li>If {@code n = 0} the fractional part is 0.5 and the exponent is 1.
571 * <li>If {@code (x, xx)} is an exact power of 2 the fractional part is 0.5 and the exponent
572 * is the power of 2 minus 1.
573 * <li>If the result high-part is an exact power of 2 and the low-part has an opposite
574 * signed non-zero magnitude then the fraction high-part {@code f} will be {@code +/-1} such that
575 * the double-double number is in the range {@code [0.5, 1)}.
576 * <li>If the argument is not finite then a fractional representation is not possible.
577 * In this case the fraction and the scale factor is undefined.
578 * </ul>
579 *
580 * @param x High part of x.
581 * @param xx Low part of x.
582 * @param n Power.
583 * @param exp Power of two scale factor (integral exponent).
584 * @return Fraction part.
585 * @see #simplePow(double, double, int)
586 * @see DD#frexp(int[])
587 */
588 public static DD simplePowScaled(double x, double xx, int n, long[] exp) {
589 // Edge cases.
590 if (n == 0) {
591 exp[0] = 1;
592 return DD.of(0.5);
593 }
594 // IEEE result for non-finite or zero
595 if (!Double.isFinite(x) || x == 0) {
596 exp[0] = 0;
597 return DD.of(Math.pow(x, n));
598 }
599 // Here the number is non-zero finite
600 final int[] ie = {0};
601 DD f = DD.of(x, xx).frexp(ie);
602 final long b = ie[0];
603 if (n < 0) {
604 f = computeSimplePowScaled(b, f.hi(), f.lo(), -1L * n, exp);
605 // Result is a non-zero fraction part so inversion is safe
606 f = f.reciprocal();
607 // Rescale to [0.5, 1.0)
608 f = f.frexp(ie);
609 exp[0] = ie[0] - exp[0];
610 return f;
611 }
612 return computeSimplePowScaled(b, f.hi(), f.lo(), n, exp);
613 }
614
615 /**
616 * Compute the number {@code x} (non-zero finite) raised to the power {@code n} (must be strictly positive).
617 *
618 * <p>This method exists to allow negation of the power when it is {@link Integer#MIN_VALUE}
619 * by casting to a long. By using a fractional representation for the argument
620 * the recursive calls avoid a step to normalise the input.
621 *
622 * @param bx Integral component 2^bx of x.
623 * @param x Fractional high part of x.
624 * @param xx Fractional low part of x.
625 * @param n Power (in [1, 2^31]).
626 * @param exp Power of two scale factor (integral exponent).
627 * @return Fraction part.
628 */
629 private static DD computeSimplePowScaled(long bx, double x, double xx, long n, long[] exp) {
630 // By normalising x we can break apart the power to avoid over/underflow:
631 // x^n = (f * 2^b)^n = 2^bn * f^n
632 long b = bx;
633 double f0 = x;
634 double f1 = xx;
635
636 // Minimise the amount we have to decompose the power. This is done
637 // using either f (<=1) or 2f (>=1) as the fractional representation,
638 // based on which can use a larger exponent without over/underflow.
639 // We approximate the power as 2^b and require a result with the
640 // smallest absolute b. An additional consideration is the low-part ff
641 // which sets a more conservative underflow limit:
642 // f^n = 2^(-b+53) => b = -n log2(f) - 53
643 // (2f)^n = 2^n*f^n = 2^b => b = n log2(f) + n
644 // Switch-over point for f is at:
645 // -n log2(f) - 53 = n log2(f) + n
646 // 2n log2(f) = -53 - n
647 // f = 2^(-53/2n) * 2^(-1/2)
648 // Avoid a power computation to find the threshold by dropping the first term:
649 // f = 2^(-1/2) = 1/sqrt(2) = sqrt(0.5) = 0.707
650 // This will bias towards choosing f even when (2f)^n would not overflow.
651 // It allows the same safe exponent to be used for both cases.
652
653 // Safe maximum for exponentiation.
654 long m;
655 double af = Math.abs(f0);
656 if (af < ROOT_HALF) {
657 // Choose 2f.
658 // This case will handle (x, xx) = (1, 0) in a single power operation
659 f0 *= 2;
660 f1 *= 2;
661 af *= 2;
662 b -= 1;
663 if (n <= SAFE_EXPONENT_2F || af <= SAFE_2F) {
664 m = n;
665 } else {
666 // f^m < 2^1013
667 // m ~ 1013 / log2(f)
668 m = Math.max(SAFE_EXPONENT_2F, (long) (SAFE_EXPONENT_2F * LN2 / Math.log(af)));
669 }
670 } else {
671 // Choose f
672 if (n <= SAFE_EXPONENT_F || af >= SAFE_F) {
673 m = n;
674 } else {
675 // f^m > 2^-958
676 // m ~ -958 / log2(f)
677 m = Math.max(SAFE_EXPONENT_F, (long) (-SAFE_EXPONENT_F * LN2 / Math.log(af)));
678 }
679 }
680
681 DD f;
682 final int[] expi = {0};
683
684 if (n <= m) {
685 f = computeSimplePow(f0, f1, n);
686 f = f.frexp(expi);
687 exp[0] = b * n + expi[0];
688 return f;
689 }
690
691 // Decompose the power function.
692 // quotient q = n / m
693 // remainder r = n % m
694 // f^n = (f^m)^(n/m) * f^(n%m)
695
696 final long q = n / m;
697 final long r = n % m;
698 // (f^m)
699 // m is safe and > 1
700 f = computeSimplePow(f0, f1, m);
701 f = f.frexp(expi);
702 long qb = expi[0];
703 // (f^m)^(n/m)
704 // q is non-zero but may be 1
705 if (q > 1) {
706 // full simple-pow to ensure safe exponentiation
707 f = computeSimplePowScaled(qb, f.hi(), f.lo(), q, exp);
708 qb = exp[0];
709 }
710 // f^(n%m)
711 // r may be zero or one which do not require another power
712 if (r == 0) {
713 f = f.frexp(expi);
714 exp[0] = b * n + qb + expi[0];
715 return f;
716 }
717 if (r == 1) {
718 f = f.multiply(DD.of(f0, f1));
719 f = f.frexp(expi);
720 exp[0] = b * n + qb + expi[0];
721 return f;
722 }
723 // Here r is safe
724 final DD t = f;
725 f = computeSimplePow(f0, f1, r);
726 f = f.frexp(expi);
727 final long rb = expi[0];
728 // (f^m)^(n/m) * f^(n%m)
729 f = f.multiply(t);
730 // 2^bn * (f^m)^(n/m) * f^(n%m)
731 f = f.frexp(expi);
732 exp[0] = b * n + qb + rb + expi[0];
733 return f;
734 }
735 }