1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.numbers.core;
18
19 import java.io.Serializable;
20 import java.math.BigDecimal;
21 import java.util.function.DoubleUnaryOperator;
22
23 /**
24 * Computes double-double floating-point operations.
25 *
26 * <p>A double-double is an unevaluated sum of two IEEE double precision numbers capable of
27 * representing at least 106 bits of significand. A normalized double-double number {@code (x, xx)}
28 * satisfies the condition that the parts are non-overlapping in magnitude such that:
29 * <pre>
30 * |x| > |xx|
31 * x == x + xx
32 * </pre>
33 *
34 * <p>This implementation assumes a normalized representation during operations on a {@code DD}
35 * number and computes results as a normalized representation. Any double-double number
36 * can be normalized by summation of the parts (see {@link #ofSum(double, double) ofSum}).
37 * Note that the number {@code (x, xx)} may also be referred to using the labels high and low
38 * to indicate the magnitude of the parts as
39 * {@code (x}<sub>hi</sub>{@code , x}<sub>lo</sub>{@code )}, or using a numerical suffix for the
40 * parts as {@code (x}<sub>0</sub>{@code , x}<sub>1</sub>{@code )}. The numerical suffix is
41 * typically used when the number has an arbitrary number of parts.
42 *
43 * <p>The double-double class is immutable.
44 *
45 * <p><b>Construction</b>
46 *
47 * <p>Factory methods to create a {@code DD} that are exact use the prefix {@code of}. Methods
48 * that create the closest possible representation use the prefix {@code from}. These methods
49 * may suffer a possible loss of precision during conversion.
50 *
51 * <p>Primitive values of type {@code double}, {@code int} and {@code long} are
52 * converted exactly to a {@code DD}.
53 *
54 * <p>The {@code DD} class can also be created as the result of an arithmetic operation on a pair
55 * of {@code double} operands. The resulting {@code DD} has the IEEE754 {@code double} result
56 * of the operation in the first part, and the second part contains the round-off lost from the
57 * operation due to rounding. Construction using add ({@code +}), subtract ({@code -}) and
58 * multiply ({@code *}) operators are exact. Construction using division ({@code /}) may be
59 * inexact if the quotient is not representable.
60 *
61 * <p>Note that it is more efficient to create a {@code DD} from a {@code double} operation than
62 * to create two {@code DD} values and combine them with the same operation. The result will be
63 * the same for add, subtract and multiply but may lose precision for divide.
64 * <pre>{@code
65 * // Inefficient
66 * DD a = DD.of(1.23).add(DD.of(4.56));
67 * // Optimal
68 * DD b = DD.ofSum(1.23, 4.56);
69 *
70 * // Inefficient and may lose precision
71 * DD c = DD.of(1.23).divide(DD.of(4.56));
72 * // Optimal
73 * DD d = DD.fromQuotient(1.23, 4.56);
74 * }</pre>
75 *
76 * <p>It is not possible to directly specify the two parts of the number.
77 * The two parts must be added using {@link #ofSum(double, double) ofSum}.
78 * If the two parts already represent a number {@code (x, xx)} such that {@code x == x + xx}
79 * then the magnitudes of the parts will be unchanged; any signed zeros may be subject to a sign
80 * change.
81 *
82 * <p><b>Primitive operands</b>
83 *
84 * <p>Operations are provided using a {@code DD} operand or a {@code double} operand.
85 * Implicit type conversion allows methods with a {@code double} operand to be used
86 * with other primitives such as {@code int} or {@code long}. Note that casting of a {@code long}
87 * to a {@code double} may result in loss of precision.
88 * To maintain the full precision of a {@code long} first convert the value to a {@code DD} using
89 * {@link #of(long)} and use the same arithmetic operation using the {@code DD} operand.
90 *
91 * <p><b>Accuracy</b>
92 *
93 * <p>Add and multiply operations using two {@code double} values operands are computed to an
94 * exact {@code DD} result (see {@link #ofSum(double, double) ofSum} and
95 * {@link #ofProduct(double, double) ofProduct}). Operations involving a {@code DD} and another
96 * operand, either {@code double} or {@code DD}, are not exact.
97 *
98 * <p>This class is not intended to perform exact arithmetic. Arbitrary precision arithmetic is
99 * available using {@link BigDecimal}. Single operations will compute the {@code DD} result within
100 * a tolerance of the 106-bit exact result. This far exceeds the accuracy of {@code double}
101 * arithmetic. The reduced accuracy is a compromise to deliver increased performance.
102 * The class is intended to reduce error in equivalent {@code double} arithmetic operations where
103 * the {@code double} valued result is required to high accuracy. Although it
104 * is possible to reduce error to 2<sup>-106</sup> for all operations, the additional computation
105 * would impact performance and would require multiple chained operations to potentially
106 * observe a different result when the final {@code DD} is converted to a {@code double}.
107 *
108 * <p><b>Canonical representation</b>
109 *
110 * <p>The double-double number is the sum of its parts. The canonical representation of the
111 * number is the explicit value of the parts. The {@link #toString()} method is provided to
112 * convert to a String representation of the parts formatted as a tuple.
113 *
114 * <p>The class implements {@link #equals(Object)} and {@link #hashCode()} and allows usage as
115 * a key in a Set or Map. Equality requires <em>binary</em> equivalence of the parts. Note that
116 * representations of zero using different combinations of +/- 0.0 are not considered equal.
117 * Also note that many non-normalized double-double numbers can represent the same number.
118 * Double-double numbers can be normalized before operations that involve {@link #equals(Object)}
119 * by {@link #ofSum(double, double) adding} the parts; this is exact for a finite sum
120 * and provides equality support for non-zero numbers. Alternatively exact numerical equality
121 * and comparisons are supported by conversion to a {@link #bigDecimalValue() BigDecimal}
122 * representation. Note that {@link BigDecimal} does not support non-finite values.
123 *
124 * <p><b>Overflow, underflow and non-finite support</b>
125 *
126 * <p>A double-double number is limited to the same finite range as a {@code double}
127 * ({@value Double#MIN_VALUE} to {@value Double#MAX_VALUE}). This class is intended for use when
128 * the ultimate result is finite and intermediate values do not approach infinity or zero.
129 *
130 * <p>This implementation does not support IEEE standards for handling infinite and NaN when used
131 * in arithmetic operations. Computations may split a 64-bit double into two parts and/or use
132 * subtraction of intermediate terms to compute round-off parts. These operations may generate
133 * infinite values due to overflow which then propagate through further operations to NaN,
134 * for example computing the round-off using {@code Inf - Inf = NaN}.
135 *
136 * <p>Operations that involve splitting a double (multiply, divide) are safe
137 * when the base 2 exponent is below 996. This puts an upper limit of approximately +/-6.7e299 on
138 * any values to be split; in practice the arguments to multiply and divide operations are further
139 * constrained by the expected finite value of the product or quotient.
140 *
141 * <p>Likewise the smallest value that can be represented is {@link Double#MIN_VALUE}. The full
142 * 106-bit accuracy will be lost when intermediates are within 2<sup>53</sup> of
143 * {@link Double#MIN_NORMAL}.
144 *
145 * <p>The {@code DD} result can be verified by checking it is a {@link #isFinite() finite}
146 * evaluated sum. Computations expecting to approach over or underflow must use scaling of
147 * intermediate terms (see {@link #frexp(int[]) frexp} and {@link #scalb(int) scalb}) and
148 * appropriate management of the current base 2 scale.
149 *
150 * <p>References:
151 * <ol>
152 * <li>
153 * Dekker, T.J. (1971)
154 * <a href="https://doi.org/10.1007/BF01397083">
155 * A floating-point technique for extending the available precision</a>
156 * Numerische Mathematik, 18:224–242.
157 * <li>
158 * Shewchuk, J.R. (1997)
159 * <a href="https://www-2.cs.cmu.edu/afs/cs/project/quake/public/papers/robust-arithmetic.ps">
160 * Arbitrary Precision Floating-Point Arithmetic</a>.
161 * <li>
162 * Hide, Y, Li, X.S. and Bailey, D.H. (2008)
163 * <a href="https://www.davidhbailey.com/dhbpapers/qd.pdf">
164 * Library for Double-Double and Quad-Double Arithmetic</a>.
165 * </ol>
166 *
167 * @since 1.2
168 */
169 public final class DD
170 extends Number
171 implements NativeOperators<DD>,
172 Serializable {
173 // Caveat:
174 //
175 // The code below uses many additions/subtractions that may
176 // appear redundant. However, they should NOT be simplified, as they
177 // do use IEEE754 floating point arithmetic rounding properties.
178 //
179 // Algorithms are based on computing the product or sum of two values x and y in
180 // extended precision. The standard result is stored using a double (high part z) and
181 // the round-off error (or low part zz) is stored in a second double, e.g:
182 // x * y = (z, zz); z + zz = x * y
183 // x + y = (z, zz); z + zz = x + y
184 //
185 // The building blocks for double-double arithmetic are:
186 //
187 // Fast-Two-Sum: Addition of two doubles (ordered |x| > |y|) to a double-double
188 // Two-Sum: Addition of two doubles (unordered) to a double-double
189 // Two-Prod: Multiplication of two doubles to a double-double
190 //
191 // These are used to create functions operating on double and double-double numbers.
192 //
193 // To sum multiple (z, zz) results ideally the parts are sorted in order of
194 // non-decreasing magnitude and summed. This is exact if each number's most significant
195 // bit is below the least significant bit of the next (i.e. does not
196 // overlap). Creating non-overlapping parts requires a rebalancing
197 // of adjacent pairs using a summation z + zz = (z1, zz1) iteratively through the parts
198 // (see Shewchuk (1997) Grow-Expansion and Expansion-Sum [2]).
199 //
200 // Accurate summation of an expansion (more than one double value) to a double-double
201 // performs a two-sum through the expansion e (length m).
202 // The single pass with two-sum ensures that the final term e_m is a good approximation
203 // for e: |e - e_m| < ulp(e_m); and the sum of the parts to
204 // e_(m-1) is within 1 ULP of the round-off ulp(|e - e_m|).
205 // These final two terms create the double-double result using two-sum.
206 //
207 // Maintenance of 1 ULP precision in the round-off component for all double-double
208 // operations is a performance burden. This class avoids this requirement to provide
209 // a compromise between accuracy and performance.
210
211 /**
212 * A double-double number representing one.
213 */
214 public static final DD ONE = new DD(1, 0);
215 /**
216 * A double-double number representing zero.
217 */
218 public static final DD ZERO = new DD(0, 0);
219
220 /**
221 * The multiplier used to split the double value into high and low parts. From
222 * Dekker (1971): "The constant should be chosen equal to 2^(p - p/2) + 1,
223 * where p is the number of binary digits in the mantissa". Here p is 53
224 * and the multiplier is {@code 2^27 + 1}.
225 */
226 private static final double MULTIPLIER = 1.0 + 0x1.0p27;
227 /** The mask to extract the raw 11-bit exponent.
228 * The value must be shifted 52-bits to remove the mantissa bits. */
229 private static final int EXP_MASK = 0x7ff;
230 /** The value 2046 converted for use if using {@link Integer#compareUnsigned(int, int)}.
231 * This requires adding {@link Integer#MIN_VALUE} to 2046. */
232 private static final int CMP_UNSIGNED_2046 = Integer.MIN_VALUE + 2046;
233 /** The value -1 converted for use if using {@link Integer#compareUnsigned(int, int)}.
234 * This requires adding {@link Integer#MIN_VALUE} to -1. */
235 private static final int CMP_UNSIGNED_MINUS_1 = Integer.MIN_VALUE - 1;
236 /** The value 1022 converted for use if using {@link Integer#compareUnsigned(int, int)}.
237 * This requires adding {@link Integer#MIN_VALUE} to 1022. */
238 private static final int CMP_UNSIGNED_1022 = Integer.MIN_VALUE + 1022;
239 /** 2^512. */
240 private static final double TWO_POW_512 = 0x1.0p512;
241 /** 2^-512. */
242 private static final double TWO_POW_M512 = 0x1.0p-512;
243 /** 2^53. Any double with a magnitude above this is an even integer. */
244 private static final double TWO_POW_53 = 0x1.0p53;
245 /** Mask to extract the high 32-bits from a long. */
246 private static final long HIGH32_MASK = 0xffff_ffff_0000_0000L;
247 /** Mask to remove the sign bit from a long. */
248 private static final long UNSIGN_MASK = 0x7fff_ffff_ffff_ffffL;
249 /** Mask to extract the 52-bit mantissa from a long representation of a double. */
250 private static final long MANTISSA_MASK = 0x000f_ffff_ffff_ffffL;
251 /** Exponent offset in IEEE754 representation. */
252 private static final int EXPONENT_OFFSET = 1023;
253 /** 0.5. */
254 private static final double HALF = 0.5;
255 /** The limit for safe multiplication of {@code x*y}, assuming values above 1.
256 * Used to maintain positive values during the power computation. */
257 private static final double SAFE_MULTIPLY = 0x1.0p500;
258
259 /**
260 * The size of the buffer for {@link #toString()}.
261 *
262 * <p>The longest double will require a sign, a maximum of 17 digits, the decimal place
263 * and the exponent, e.g. for max value this is 24 chars: -1.7976931348623157e+308.
264 * Set the buffer size to twice this and round up to a power of 2 thus
265 * allowing for formatting characters. The size is 64.
266 */
267 private static final int TO_STRING_SIZE = 64;
268 /** {@link #toString() String representation}. */
269 private static final char FORMAT_START = '(';
270 /** {@link #toString() String representation}. */
271 private static final char FORMAT_END = ')';
272 /** {@link #toString() String representation}. */
273 private static final char FORMAT_SEP = ',';
274
275 /** Serializable version identifier. */
276 private static final long serialVersionUID = 20230701L;
277
278 /** The high part of the double-double number. */
279 private final double x;
280 /** The low part of the double-double number. */
281 private final double xx;
282
283 /**
284 * Create a double-double number {@code (x, xx)}.
285 *
286 * @param x High part.
287 * @param xx Low part.
288 */
289 private DD(double x, double xx) {
290 this.x = x;
291 this.xx = xx;
292 }
293
294 // Conversion constructors
295
296 /**
297 * Creates the double-double number as the value {@code (x, 0)}.
298 *
299 * @param x Value.
300 * @return the double-double
301 */
302 public static DD of(double x) {
303 return new DD(x, 0);
304 }
305
306 /**
307 * Creates the double-double number as the value {@code (x, xx)}.
308 *
309 * <p><strong>Warning</strong>
310 *
311 * <p>The arguments are used directly. No checks are made that they represent
312 * a normalized double-double number: {@code x == x + xx}.
313 *
314 * <p>This method is exposed for testing.
315 *
316 * @param x High part.
317 * @param xx Low part.
318 * @return the double-double
319 * @see #twoSum(double, double)
320 */
321 static DD of(double x, double xx) {
322 return new DD(x, xx);
323 }
324
325 /**
326 * Creates the double-double number as the value {@code (x, 0)}.
327 *
328 * <p>Note this method exists to avoid using {@link #of(long)} for {@code integer}
329 * arguments; the {@code long} variation is slower as it preserves all 64-bits
330 * of information.
331 *
332 * @param x Value.
333 * @return the double-double
334 * @see #of(long)
335 */
336 public static DD of(int x) {
337 return new DD(x, 0);
338 }
339
340 /**
341 * Creates the double-double number with the high part equal to {@code (double) x}
342 * and the low part equal to any remaining bits.
343 *
344 * <p>Note this method preserves all 64-bits of precision. Faster construction can be
345 * achieved using up to 53-bits of precision using {@code of((double) x)}.
346 *
347 * @param x Value.
348 * @return the double-double
349 * @see #of(double)
350 */
351 public static DD of(long x) {
352 // Note: Casting the long to a double can lose bits due to rounding.
353 // These are not recoverable using lo = x - (long)((double) x)
354 // if the double is rounded outside the range of a long (i.e. 2^53).
355 // Split the long into two 32-bit numbers that are exactly representable
356 // and add them.
357 final long a = x & HIGH32_MASK;
358 final long b = x - a;
359 // When x is positive: a > b or a == 0
360 // When x is negative: |a| > |b|
361 return fastTwoSum(a, b);
362 }
363
364 /**
365 * Creates the double-double number {@code (z, zz)} using the {@code double} representation
366 * of the argument {@code x}; the low part is the {@code double} representation of the
367 * round-off error.
368 * <pre>
369 * double z = x.doubleValue();
370 * double zz = x.subtract(new BigDecimal(z)).doubleValue();
371 * </pre>
372 * <p>If the value cannot be represented as a finite value the result will have an
373 * infinite high part and the low part is undefined.
374 *
375 * <p>Note: This conversion can lose information about the precision of the BigDecimal value.
376 * The result is the closest double-double representation to the value.
377 *
378 * @param x Value.
379 * @return the double-double
380 */
381 public static DD from(BigDecimal x) {
382 final double z = x.doubleValue();
383 // Guard against an infinite throwing a exception
384 final double zz = Double.isInfinite(z) ? 0 : x.subtract(new BigDecimal(z)).doubleValue();
385 // No normalisation here
386 return new DD(z, zz);
387 }
388
389 // Arithmetic constructors:
390
391 /**
392 * Returns a {@code DD} whose value is {@code (x + y)}.
393 * The values are not required to be ordered by magnitude,
394 * i.e. the result is commutative: {@code x + y == y + x}.
395 *
396 * <p>This method ignores special handling of non-normal numbers and
397 * overflow within the extended precision computation.
398 * This creates the following special cases:
399 *
400 * <ul>
401 * <li>If {@code x + y} is infinite then the low part is NaN.
402 * <li>If {@code x} or {@code y} is infinite or NaN then the low part is NaN.
403 * <li>If {@code x + y} is sub-normal or zero then the low part is +/-0.0.
404 * </ul>
405 *
406 * <p>An invalid result can be identified using {@link #isFinite()}.
407 *
408 * <p>The result is the exact double-double representation of the sum.
409 *
410 * @param x Addend.
411 * @param y Addend.
412 * @return the sum {@code x + y}.
413 * @see #ofDifference(double, double)
414 */
415 public static DD ofSum(double x, double y) {
416 return twoSum(x, y);
417 }
418
419 /**
420 * Returns a {@code DD} whose value is {@code (x - y)}.
421 * The values are not required to be ordered by magnitude,
422 * i.e. the result matches a negation and addition: {@code x - y == -y + x}.
423 *
424 * <p>Computes the same results as {@link #ofSum(double, double) ofSum(a, -b)}.
425 * See that method for details of special cases.
426 *
427 * <p>An invalid result can be identified using {@link #isFinite()}.
428 *
429 * <p>The result is the exact double-double representation of the difference.
430 *
431 * @param x Minuend.
432 * @param y Subtrahend.
433 * @return {@code x - y}.
434 * @see #ofSum(double, double)
435 */
436 public static DD ofDifference(double x, double y) {
437 return twoDiff(x, y);
438 }
439
440 /**
441 * Returns a {@code DD} whose value is {@code (x * y)}.
442 *
443 * <p>This method ignores special handling of non-normal numbers and intermediate
444 * overflow within the extended precision computation.
445 * This creates the following special cases:
446 *
447 * <ul>
448 * <li>If either {@code |x|} or {@code |y|} multiplied by {@code 1 + 2^27}
449 * is infinite (intermediate overflow) then the low part is NaN.
450 * <li>If {@code x * y} is infinite then the low part is NaN.
451 * <li>If {@code x} or {@code y} is infinite or NaN then the low part is NaN.
452 * <li>If {@code x * y} is sub-normal or zero then the low part is +/-0.0.
453 * </ul>
454 *
455 * <p>An invalid result can be identified using {@link #isFinite()}.
456 *
457 * <p>Note: Ignoring special cases is a design choice for performance. The
458 * method is therefore not a drop-in replacement for
459 * {@code roundOff = Math.fma(x, y, -x * y)}.
460 *
461 * <p>The result is the exact double-double representation of the product.
462 *
463 * @param x Factor.
464 * @param y Factor.
465 * @return the product {@code x * y}.
466 */
467 public static DD ofProduct(double x, double y) {
468 return twoProd(x, y);
469 }
470
471 /**
472 * Returns a {@code DD} whose value is {@code (x * x)}.
473 *
474 * <p>This method is an optimisation of {@link #ofProduct(double, double) multiply(x, x)}.
475 * See that method for details of special cases.
476 *
477 * <p>An invalid result can be identified using {@link #isFinite()}.
478 *
479 * <p>The result is the exact double-double representation of the square.
480 *
481 * @param x Factor.
482 * @return the square {@code x * x}.
483 * @see #ofProduct(double, double)
484 */
485 public static DD ofSquare(double x) {
486 return twoSquare(x);
487 }
488
489 /**
490 * Returns a {@code DD} whose value is {@code (x / y)}.
491 * If {@code y = 0} the result is undefined.
492 *
493 * <p>This method ignores special handling of non-normal numbers and intermediate
494 * overflow within the extended precision computation.
495 * This creates the following special cases:
496 *
497 * <ul>
498 * <li>If either {@code |x / y|} or {@code |y|} multiplied by {@code 1 + 2^27}
499 * is infinite (intermediate overflow) then the low part is NaN.
500 * <li>If {@code x / y} is infinite then the low part is NaN.
501 * <li>If {@code x} or {@code y} is infinite or NaN then the low part is NaN.
502 * <li>If {@code x / y} is sub-normal or zero, excluding the previous cases,
503 * then the low part is +/-0.0.
504 * </ul>
505 *
506 * <p>An invalid result can be identified using {@link #isFinite()}.
507 *
508 * <p>The result is the closest double-double representation to the quotient.
509 *
510 * @param x Dividend.
511 * @param y Divisor.
512 * @return the quotient {@code x / y}.
513 */
514 public static DD fromQuotient(double x, double y) {
515 // Long division
516 // quotient q0 = x / y
517 final double q0 = x / y;
518 // remainder r = x - q0 * y
519 final double p0 = q0 * y;
520 final double p1 = twoProductLow(q0, y, p0);
521 final double r0 = x - p0;
522 final double r1 = twoDiffLow(x, p0, r0) - p1;
523 // correction term q1 = r0 / y
524 final double q1 = (r0 + r1) / y;
525 return new DD(q0, q1);
526 }
527
528 // Properties
529
530 /**
531 * Gets the first part {@code x} of the double-double number {@code (x, xx)}.
532 * In a normalized double-double number this part will have the greatest magnitude.
533 *
534 * <p>This is equivalent to returning the high-part {@code x}<sub>hi</sub> for the number
535 * {@code (x}<sub>hi</sub>{@code , x}<sub>lo</sub>{@code )}.
536 *
537 * @return the first part
538 */
539 public double hi() {
540 return x;
541 }
542
543 /**
544 * Gets the second part {@code xx} of the double-double number {@code (x, xx)}.
545 * In a normalized double-double number this part will have the smallest magnitude.
546 *
547 * <p>This is equivalent to returning the low part {@code x}<sub>lo</sub> for the number
548 * {@code (x}<sub>hi</sub>{@code , x}<sub>lo</sub>{@code )}.
549 *
550 * @return the second part
551 */
552 public double lo() {
553 return xx;
554 }
555
556 /**
557 * Returns {@code true} if the evaluated sum of the parts is finite.
558 *
559 * <p>This method is provided as a utility to check the result of a {@code DD} computation.
560 * Note that for performance the {@code DD} class does not follow IEEE754 arithmetic
561 * for infinite and NaN, and does not protect from overflow of intermediate values in
562 * multiply and divide operations. If this method returns {@code false} following
563 * {@code DD} arithmetic then the computation is not supported to extended precision.
564 *
565 * <p>Note: Any number that returns {@code true} may be converted to the exact
566 * {@link BigDecimal} value.
567 *
568 * @return {@code true} if this instance represents a finite {@code double} value.
569 * @see Double#isFinite(double)
570 * @see #bigDecimalValue()
571 */
572 public boolean isFinite() {
573 return Double.isFinite(x + xx);
574 }
575
576 // Number conversions
577
578 /**
579 * Get the value as a {@code double}. This is the evaluated sum of the parts.
580 *
581 * <p>Note that even when the return value is finite, this conversion can lose
582 * information about the precision of the {@code DD} value.
583 *
584 * <p>Conversion of a finite {@code DD} can also be performed using the
585 * {@link #bigDecimalValue() BigDecimal} representation.
586 *
587 * @return the value converted to a {@code double}
588 * @see #bigDecimalValue()
589 */
590 @Override
591 public double doubleValue() {
592 return x + xx;
593 }
594
595 /**
596 * Get the value as a {@code float}. This is the narrowing primitive conversion of the
597 * {@link #doubleValue()}. This conversion can lose range, resulting in a
598 * {@code float} zero from a nonzero {@code double} and a {@code float} infinity from
599 * a finite {@code double}. A {@code double} NaN is converted to a {@code float} NaN
600 * and a {@code double} infinity is converted to the same-signed {@code float}
601 * infinity.
602 *
603 * <p>Note that even when the return value is finite, this conversion can lose
604 * information about the precision of the {@code DD} value.
605 *
606 * <p>Conversion of a finite {@code DD} can also be performed using the
607 * {@link #bigDecimalValue() BigDecimal} representation.
608 *
609 * @return the value converted to a {@code float}
610 * @see #bigDecimalValue()
611 */
612 @Override
613 public float floatValue() {
614 return (float) doubleValue();
615 }
616
617 /**
618 * Get the value as an {@code int}. This conversion discards the fractional part of the
619 * number and effectively rounds the value to the closest whole number in the direction
620 * of zero. This is the equivalent of a cast of a floating-point number to an integer, for
621 * example {@code (int) -2.75 => -2}.
622 *
623 * <p>Note that this conversion can lose information about the precision of the
624 * {@code DD} value.
625 *
626 * <p>Special cases:
627 * <ul>
628 * <li>If the {@code DD} value is infinite the result is {@link Integer#MAX_VALUE}.
629 * <li>If the {@code DD} value is -infinite the result is {@link Integer#MIN_VALUE}.
630 * <li>If the {@code DD} value is NaN the result is 0.
631 * </ul>
632 *
633 * <p>Conversion of a finite {@code DD} can also be performed using the
634 * {@link #bigDecimalValue() BigDecimal} representation. Note that {@link BigDecimal}
635 * conversion rounds to the {@link java.math.BigInteger BigInteger} whole number
636 * representation and returns the low-order 32-bits. Numbers too large for an {@code int}
637 * may change sign. This method ensures the sign is correct by directly rounding to
638 * an {@code int} and returning the respective upper or lower limit for numbers too
639 * large for an {@code int}.
640 *
641 * @return the value converted to an {@code int}
642 * @see #bigDecimalValue()
643 */
644 @Override
645 public int intValue() {
646 // Clip the long value
647 return (int) Math.max(Integer.MIN_VALUE, Math.min(Integer.MAX_VALUE, longValue()));
648 }
649
650 /**
651 * Get the value as a {@code long}. This conversion discards the fractional part of the
652 * number and effectively rounds the value to the closest whole number in the direction
653 * of zero. This is the equivalent of a cast of a floating-point number to an integer, for
654 * example {@code (long) -2.75 => -2}.
655 *
656 * <p>Note that this conversion can lose information about the precision of the
657 * {@code DD} value.
658 *
659 * <p>Special cases:
660 * <ul>
661 * <li>If the {@code DD} value is infinite the result is {@link Long#MAX_VALUE}.
662 * <li>If the {@code DD} value is -infinite the result is {@link Long#MIN_VALUE}.
663 * <li>If the {@code DD} value is NaN the result is 0.
664 * </ul>
665 *
666 * <p>Conversion of a finite {@code DD} can also be performed using the
667 * {@link #bigDecimalValue() BigDecimal} representation. Note that {@link BigDecimal}
668 * conversion rounds to the {@link java.math.BigInteger BigInteger} whole number
669 * representation and returns the low-order 64-bits. Numbers too large for a {@code long}
670 * may change sign. This method ensures the sign is correct by directly rounding to
671 * a {@code long} and returning the respective upper or lower limit for numbers too
672 * large for a {@code long}.
673 *
674 * @return the value converted to an {@code int}
675 * @see #bigDecimalValue()
676 */
677 @Override
678 public long longValue() {
679 // Assume |hi| > |lo|, i.e. the low part is the round-off
680 final long a = (long) x;
681 // The cast will truncate the value to the range [Long.MIN_VALUE, Long.MAX_VALUE].
682 // If the long converted back to a double is the same value then the high part
683 // was a representable integer and we must use the low part.
684 // Note: The floating-point comparison is intentional.
685 if (a == x) {
686 // Edge case: Any double value above 2^53 is even. To workaround representation
687 // of 2^63 as Long.MAX_VALUE (which is 2^63-1) we can split a into two parts.
688 long a1;
689 long a2;
690 if (Math.abs(x) > TWO_POW_53) {
691 a1 = (long) (x * 0.5);
692 a2 = a1;
693 } else {
694 a1 = a;
695 a2 = 0;
696 }
697
698 // To truncate the fractional part of the double-double towards zero we
699 // convert the low part to a whole number. This must be rounded towards zero
700 // with respect to the sign of the high part.
701 final long b = (long) (a < 0 ? Math.ceil(xx) : Math.floor(xx));
702
703 final long sum = a1 + b + a2;
704 // Avoid overflow. If the sum has changed sign then an overflow occurred.
705 // This happens when high == 2^63 and the low part is additional magnitude.
706 // The xor operation creates a negative if the signs are different.
707 if ((sum ^ a) >= 0) {
708 return sum;
709 }
710 }
711 // Here the high part had a fractional part, was non-finite or was 2^63.
712 // Ignore the low part.
713 return a;
714 }
715
716 /**
717 * Get the value as a {@code BigDecimal}. This is the evaluated sum of the parts;
718 * the conversion is exact.
719 *
720 * <p>The conversion will raise a {@link NumberFormatException} if the number
721 * is non-finite.
722 *
723 * @return the double-double as a {@code BigDecimal}.
724 * @throws NumberFormatException if any part of the number is {@code infinite} or {@code NaN}
725 * @see BigDecimal
726 */
727 public BigDecimal bigDecimalValue() {
728 return new BigDecimal(x).add(new BigDecimal(xx));
729 }
730
731 // Static extended precision methods for computing the round-off component
732 // for double addition and multiplication
733
734 /**
735 * Compute the sum of two numbers {@code a} and {@code b} using
736 * Dekker's two-sum algorithm. The values are required to be ordered by magnitude:
737 * {@code |a| >= |b|}.
738 *
739 * <p>If {@code a} is zero and {@code b} is non-zero the returned value is {@code (b, 0)}.
740 *
741 * @param a First part of sum.
742 * @param b Second part of sum.
743 * @return the sum
744 * @see #fastTwoDiff(double, double)
745 * @see <a href="http://www-2.cs.cmu.edu/afs/cs/project/quake/public/papers/robust-arithmetic.ps">
746 * Shewchuk (1997) Theorum 6</a>
747 */
748 static DD fastTwoSum(double a, double b) {
749 final double x = a + b;
750 return new DD(x, fastTwoSumLow(a, b, x));
751 }
752
753 /**
754 * Compute the round-off of the sum of two numbers {@code a} and {@code b} using
755 * Dekker's two-sum algorithm. The values are required to be ordered by magnitude:
756 * {@code |a| >= |b|}.
757 *
758 * <p>If {@code a} is zero and {@code b} is non-zero the returned value is zero.
759 *
760 * @param a First part of sum.
761 * @param b Second part of sum.
762 * @param x Sum.
763 * @return the sum round-off
764 * @see #fastTwoSum(double, double)
765 */
766 static double fastTwoSumLow(double a, double b, double x) {
767 // (x, xx) = a + b
768 // bVirtual = x - a
769 // xx = b - bVirtual
770 return b - (x - a);
771 }
772
773 /**
774 * Compute the difference of two numbers {@code a} and {@code b} using
775 * Dekker's two-sum algorithm. The values are required to be ordered by magnitude:
776 * {@code |a| >= |b|}.
777 *
778 * <p>Computes the same results as {@link #fastTwoSum(double, double) fastTwoSum(a, -b)}.
779 *
780 * @param a Minuend.
781 * @param b Subtrahend.
782 * @return the difference
783 * @see #fastTwoSum(double, double)
784 * @see <a href="http://www-2.cs.cmu.edu/afs/cs/project/quake/public/papers/robust-arithmetic.ps">
785 * Shewchuk (1997) Theorum 6</a>
786 */
787 static DD fastTwoDiff(double a, double b) {
788 final double x = a - b;
789 return new DD(x, fastTwoDiffLow(a, b, x));
790 }
791
792 /**
793 * Compute the round-off of the difference of two numbers {@code a} and {@code b} using
794 * Dekker's two-sum algorithm. The values are required to be ordered by magnitude:
795 * {@code |a| >= |b|}.
796 *
797 * @param a Minuend.
798 * @param b Subtrahend.
799 * @param x Difference.
800 * @return the difference round-off
801 * @see #fastTwoDiff(double, double)
802 */
803 private static double fastTwoDiffLow(double a, double b, double x) {
804 // (x, xx) = a - b
805 // bVirtual = a - x
806 // xx = bVirtual - b
807 return (a - x) - b;
808 }
809
810 /**
811 * Compute the sum of two numbers {@code a} and {@code b} using
812 * Knuth's two-sum algorithm. The values are not required to be ordered by magnitude,
813 * i.e. the result is commutative {@code s = a + b == b + a}.
814 *
815 * @param a First part of sum.
816 * @param b Second part of sum.
817 * @return the sum
818 * @see #twoDiff(double, double)
819 * @see <a href="http://www-2.cs.cmu.edu/afs/cs/project/quake/public/papers/robust-arithmetic.ps">
820 * Shewchuk (1997) Theorum 7</a>
821 */
822 static DD twoSum(double a, double b) {
823 final double x = a + b;
824 return new DD(x, twoSumLow(a, b, x));
825 }
826
827 /**
828 * Compute the round-off of the sum of two numbers {@code a} and {@code b} using
829 * Knuth two-sum algorithm. The values are not required to be ordered by magnitude,
830 * i.e. the result is commutative {@code s = a + b == b + a}.
831 *
832 * @param a First part of sum.
833 * @param b Second part of sum.
834 * @param x Sum.
835 * @return the sum round-off
836 * @see #twoSum(double, double)
837 */
838 static double twoSumLow(double a, double b, double x) {
839 // (x, xx) = a + b
840 // bVirtual = x - a
841 // aVirtual = x - bVirtual
842 // bRoundoff = b - bVirtual
843 // aRoundoff = a - aVirtual
844 // xx = aRoundoff + bRoundoff
845 final double bVirtual = x - a;
846 return (a - (x - bVirtual)) + (b - bVirtual);
847 }
848
849 /**
850 * Compute the difference of two numbers {@code a} and {@code b} using
851 * Knuth's two-sum algorithm. The values are not required to be ordered by magnitude.
852 *
853 * <p>Computes the same results as {@link #twoSum(double, double) twoSum(a, -b)}.
854 *
855 * @param a Minuend.
856 * @param b Subtrahend.
857 * @return the difference
858 * @see #twoSum(double, double)
859 */
860 static DD twoDiff(double a, double b) {
861 final double x = a - b;
862 return new DD(x, twoDiffLow(a, b, x));
863 }
864
865 /**
866 * Compute the round-off of the difference of two numbers {@code a} and {@code b} using
867 * Knuth two-sum algorithm. The values are not required to be ordered by magnitude,
868 *
869 * @param a Minuend.
870 * @param b Subtrahend.
871 * @param x Difference.
872 * @return the difference round-off
873 * @see #twoDiff(double, double)
874 */
875 private static double twoDiffLow(double a, double b, double x) {
876 // (x, xx) = a - b
877 // bVirtual = a - x
878 // aVirtual = x + bVirtual
879 // bRoundoff = b - bVirtual
880 // aRoundoff = a - aVirtual
881 // xx = aRoundoff - bRoundoff
882 final double bVirtual = a - x;
883 return (a - (x + bVirtual)) - (b - bVirtual);
884 }
885
886 /**
887 * Compute the double-double number {@code (z,zz)} for the exact
888 * product of {@code x} and {@code y}.
889 *
890 * <p>The high part of the number is equal to the product {@code z = x * y}.
891 * The low part is set to the round-off of the {@code double} product.
892 *
893 * <p>This method ignores special handling of non-normal numbers and intermediate
894 * overflow within the extended precision computation.
895 * This creates the following special cases:
896 *
897 * <ul>
898 * <li>If {@code x * y} is sub-normal or zero then the low part is +/-0.0.
899 * <li>If {@code x * y} is infinite then the low part is NaN.
900 * <li>If {@code x} or {@code y} is infinite or NaN then the low part is NaN.
901 * <li>If either {@code |x|} or {@code |y|} multiplied by {@code 1 + 2^27}
902 * is infinite (intermediate overflow) then the low part is NaN.
903 * </ul>
904 *
905 * <p>Note: Ignoring special cases is a design choice for performance. The
906 * method is therefore not a drop-in replacement for
907 * {@code round_off = Math.fma(x, y, -x * y)}.
908 *
909 * @param x First factor.
910 * @param y Second factor.
911 * @return the product
912 */
913 static DD twoProd(double x, double y) {
914 final double xy = x * y;
915 // No checks for non-normal xy, or overflow during the split of the arguments
916 return new DD(xy, twoProductLow(x, y, xy));
917 }
918
919 /**
920 * Compute the low part of the double length number {@code (z,zz)} for the exact
921 * product of {@code x} and {@code y} using Dekker's mult12 algorithm. The standard
922 * precision product {@code x*y} must be provided. The numbers {@code x} and {@code y}
923 * are split into high and low parts using Dekker's algorithm.
924 *
925 * <p>Warning: This method does not perform scaling in Dekker's split and large
926 * finite numbers can create NaN results.
927 *
928 * @param x First factor.
929 * @param y Second factor.
930 * @param xy Product of the factors (x * y).
931 * @return the low part of the product double length number
932 * @see #highPart(double)
933 */
934 static double twoProductLow(double x, double y, double xy) {
935 // Split the numbers using Dekker's algorithm without scaling
936 final double hx = highPart(x);
937 final double lx = x - hx;
938 final double hy = highPart(y);
939 final double ly = y - hy;
940 return twoProductLow(hx, lx, hy, ly, xy);
941 }
942
943 /**
944 * Compute the low part of the double length number {@code (z,zz)} for the exact
945 * product of {@code x} and {@code y} using Dekker's mult12 algorithm. The standard
946 * precision product {@code x*y}, and the high and low parts of the factors must be
947 * provided.
948 *
949 * @param hx High-part of first factor.
950 * @param lx Low-part of first factor.
951 * @param hy High-part of second factor.
952 * @param ly Low-part of second factor.
953 * @param xy Product of the factors (x * y).
954 * @return the low part of the product double length number
955 */
956 static double twoProductLow(double hx, double lx, double hy, double ly, double xy) {
957 // Compute the multiply low part:
958 // err1 = xy - hx * hy
959 // err2 = err1 - lx * hy
960 // err3 = err2 - hx * ly
961 // low = lx * ly - err3
962 return lx * ly - (((xy - hx * hy) - lx * hy) - hx * ly);
963 }
964
965 /**
966 * Compute the double-double number {@code (z,zz)} for the exact
967 * square of {@code x}.
968 *
969 * <p>The high part of the number is equal to the square {@code z = x * x}.
970 * The low part is set to the round-off of the {@code double} square.
971 *
972 * <p>This method is an optimisation of {@link #twoProd(double, double) twoProd(x, x)}.
973 * See that method for details of special cases.
974 *
975 * @param x Factor.
976 * @return the square
977 * @see #twoProd(double, double)
978 */
979 static DD twoSquare(double x) {
980 final double xx = x * x;
981 // No checks for non-normal xy, or overflow during the split of the arguments
982 return new DD(xx, twoSquareLow(x, xx));
983 }
984
985 /**
986 * Compute the low part of the double length number {@code (z,zz)} for the exact
987 * square of {@code x} using Dekker's mult12 algorithm. The standard
988 * precision square {@code x*x} must be provided. The number {@code x}
989 * is split into high and low parts using Dekker's algorithm.
990 *
991 * <p>Warning: This method does not perform scaling in Dekker's split and large
992 * finite numbers can create NaN results.
993 *
994 * @param x Factor.
995 * @param x2 Square of the factor (x * x).
996 * @return the low part of the square double length number
997 * @see #highPart(double)
998 * @see #twoProductLow(double, double, double)
999 */
1000 static double twoSquareLow(double x, double x2) {
1001 // See productLowUnscaled
1002 final double hx = highPart(x);
1003 final double lx = x - hx;
1004 return twoSquareLow(hx, lx, x2);
1005 }
1006
1007 /**
1008 * Compute the low part of the double length number {@code (z,zz)} for the exact
1009 * square of {@code x} using Dekker's mult12 algorithm. The standard
1010 * precision square {@code x*x}, and the high and low parts of the factors must be
1011 * provided.
1012 *
1013 * @param hx High-part of factor.
1014 * @param lx Low-part of factor.
1015 * @param x2 Square of the factor (x * x).
1016 * @return the low part of the square double length number
1017 */
1018 static double twoSquareLow(double hx, double lx, double x2) {
1019 return lx * lx - ((x2 - hx * hx) - 2 * lx * hx);
1020 }
1021
1022 /**
1023 * Implement Dekker's method to split a value into two parts. Multiplying by (2^s + 1) creates
1024 * a big value from which to derive the two split parts.
1025 * <pre>
1026 * c = (2^s + 1) * a
1027 * a_big = c - a
1028 * a_hi = c - a_big
1029 * a_lo = a - a_hi
1030 * a = a_hi + a_lo
1031 * </pre>
1032 *
1033 * <p>The multiplicand allows a p-bit value to be split into
1034 * (p-s)-bit value {@code a_hi} and a non-overlapping (s-1)-bit value {@code a_lo}.
1035 * Combined they have (p-1) bits of significand but the sign bit of {@code a_lo}
1036 * contains a bit of information. The constant is chosen so that s is ceil(p/2) where
1037 * the precision p for a double is 53-bits (1-bit of the mantissa is assumed to be
1038 * 1 for a non sub-normal number) and s is 27.
1039 *
1040 * <p>This conversion does not use scaling and the result of overflow is NaN. Overflow
1041 * may occur when the exponent of the input value is above 996.
1042 *
1043 * <p>Splitting a NaN or infinite value will return NaN.
1044 *
1045 * @param value Value.
1046 * @return the high part of the value.
1047 * @see Math#getExponent(double)
1048 */
1049 static double highPart(double value) {
1050 final double c = MULTIPLIER * value;
1051 return c - (c - value);
1052 }
1053
1054 // Public API operations
1055
1056 /**
1057 * Returns a {@code DD} whose value is the negation of both parts of double-double number.
1058 *
1059 * @return the negation
1060 */
1061 @Override
1062 public DD negate() {
1063 return new DD(-x, -xx);
1064 }
1065
1066 /**
1067 * Returns a {@code DD} whose value is the absolute value of the number {@code (x, xx)}
1068 * This method assumes that the low part {@code xx} is the smaller magnitude.
1069 *
1070 * <p>Cases:
1071 * <ul>
1072 * <li>If the {@code x} value is negative the result is {@code (-x, -xx)}.
1073 * <li>If the {@code x} value is +/- 0.0 the result is {@code (0.0, 0.0)}; this
1074 * will remove sign information from the round-off component assumed to be zero.
1075 * <li>Otherwise the result is {@code this}.
1076 * </ul>
1077 *
1078 * @return the absolute value
1079 * @see #negate()
1080 * @see #ZERO
1081 */
1082 public DD abs() {
1083 // Assume |hi| > |lo|, i.e. the low part is the round-off
1084 if (x < 0) {
1085 return negate();
1086 }
1087 // NaN, positive or zero
1088 // return a canonical absolute of zero
1089 return x == 0 ? ZERO : this;
1090 }
1091
1092 /**
1093 * Returns the largest (closest to positive infinity) {@code DD} value that is less
1094 * than or equal to {@code this} number {@code (x, xx)} and is equal to a mathematical integer.
1095 *
1096 * <p>This method may change the representation of zero and non-finite values; the
1097 * result is equivalent to {@code Math.floor(x)} and the {@code xx} part is ignored.
1098 *
1099 * <p>Cases:
1100 * <ul>
1101 * <li>If {@code x} is NaN, then the result is {@code (NaN, 0)}.
1102 * <li>If {@code x} is infinite, then the result is {@code (x, 0)}.
1103 * <li>If {@code x} is +/-0.0, then the result is {@code (x, 0)}.
1104 * <li>If {@code x != Math.floor(x)}, then the result is {@code (Math.floor(x), 0)}.
1105 * <li>Otherwise the result is the {@code DD} value equal to the sum
1106 * {@code Math.floor(x) + Math.floor(xx)}.
1107 * </ul>
1108 *
1109 * <p>The result may generate a high part smaller (closer to negative infinity) than
1110 * {@code Math.floor(x)} if {@code x} is a representable integer and the {@code xx} value
1111 * is negative.
1112 *
1113 * @return the largest (closest to positive infinity) value that is less than or equal
1114 * to {@code this} and is equal to a mathematical integer
1115 * @see Math#floor(double)
1116 * @see #isFinite()
1117 */
1118 public DD floor() {
1119 return floorOrCeil(x, xx, Math::floor);
1120 }
1121
1122 /**
1123 * Returns the smallest (closest to negative infinity) {@code DD} value that is greater
1124 * than or equal to {@code this} number {@code (x, xx)} and is equal to a mathematical integer.
1125 *
1126 * <p>This method may change the representation of zero and non-finite values; the
1127 * result is equivalent to {@code Math.ceil(x)} and the {@code xx} part is ignored.
1128 *
1129 * <p>Cases:
1130 * <ul>
1131 * <li>If {@code x} is NaN, then the result is {@code (NaN, 0)}.
1132 * <li>If {@code x} is infinite, then the result is {@code (x, 0)}.
1133 * <li>If {@code x} is +/-0.0, then the result is {@code (x, 0)}.
1134 * <li>If {@code x != Math.ceil(x)}, then the result is {@code (Math.ceil(x), 0)}.
1135 * <li>Otherwise the result is the {@code DD} value equal to the sum
1136 * {@code Math.ceil(x) + Math.ceil(xx)}.
1137 * </ul>
1138 *
1139 * <p>The result may generate a high part larger (closer to positive infinity) than
1140 * {@code Math.ceil(x)} if {@code x} is a representable integer and the {@code xx} value
1141 * is positive.
1142 *
1143 * @return the smallest (closest to negative infinity) value that is greater than or equal
1144 * to {@code this} and is equal to a mathematical integer
1145 * @see Math#ceil(double)
1146 * @see #isFinite()
1147 */
1148 public DD ceil() {
1149 return floorOrCeil(x, xx, Math::ceil);
1150 }
1151
1152 /**
1153 * Implementation of the floor and ceiling functions.
1154 *
1155 * <p>Cases:
1156 * <ul>
1157 * <li>If {@code x} is non-finite or zero, then the result is {@code (x, 0)}.
1158 * <li>If {@code x} is rounded by the operator to a new value {@code y}, then the
1159 * result is {@code (y, 0)}.
1160 * <li>Otherwise the result is the {@code DD} value equal to the sum {@code op(x) + op(xx)}.
1161 * </ul>
1162 *
1163 * @param x High part of x.
1164 * @param xx Low part of x.
1165 * @param op Floor or ceiling operator.
1166 * @return the result
1167 */
1168 private static DD floorOrCeil(double x, double xx, DoubleUnaryOperator op) {
1169 // Assume |hi| > |lo|, i.e. the low part is the round-off
1170 final double y = op.applyAsDouble(x);
1171 // Note: The floating-point comparison is intentional
1172 if (y == x) {
1173 // Handle non-finite and zero by ignoring the low part
1174 if (isNotNormal(y)) {
1175 return new DD(y, 0);
1176 }
1177 // High part is an integer, use the low part.
1178 // Any rounding must propagate to the high part.
1179 // Note: add 0.0 to convert -0.0 to 0.0. This is required to ensure
1180 // the round-off component of the fastTwoSum result is always 0.0
1181 // when yy == 0. This only applies in the ceiling operator when
1182 // xx is in (-1, 0] and will be converted to -0.0.
1183 final double yy = op.applyAsDouble(xx) + 0;
1184 return fastTwoSum(y, yy);
1185 }
1186 // NaN or already rounded.
1187 // xx has no effect on the rounding.
1188 return new DD(y, 0);
1189 }
1190
1191 /**
1192 * Returns a {@code DD} whose value is {@code (this + y)}.
1193 *
1194 * <p>This computes the same result as
1195 * {@link #add(DD) add(DD.of(y))}.
1196 *
1197 * <p>The computed result is within 2 eps of the exact result where eps is 2<sup>-106</sup>.
1198 *
1199 * @param y Value to be added to this number.
1200 * @return {@code this + y}.
1201 * @see #add(DD)
1202 */
1203 public DD add(double y) {
1204 // (s0, s1) = x + y
1205 final double s0 = x + y;
1206 final double s1 = twoSumLow(x, y, s0);
1207 // Note: if x + y cancel to a non-zero result then s0 is >= 1 ulp of x.
1208 // This is larger than xx so fast-two-sum can be used.
1209 return fastTwoSum(s0, s1 + xx);
1210 }
1211
1212 /**
1213 * Returns a {@code DD} whose value is {@code (this + y)}.
1214 *
1215 * <p>The computed result is within 4 eps of the exact result where eps is 2<sup>-106</sup>.
1216 *
1217 * @param y Value to be added to this number.
1218 * @return {@code this + y}.
1219 */
1220 @Override
1221 public DD add(DD y) {
1222 return add(x, xx, y.x, y.xx);
1223 }
1224
1225 /**
1226 * Compute the sum of {@code (x, xx)} and {@code (y, yy)}.
1227 *
1228 * <p>The computed result is within 4 eps of the exact result where eps is 2<sup>-106</sup>.
1229 *
1230 * @param x High part of x.
1231 * @param xx Low part of x.
1232 * @param y High part of y.
1233 * @param yy Low part of y.
1234 * @return the sum
1235 * @see #accurateAdd(double, double, double, double)
1236 */
1237 static DD add(double x, double xx, double y, double yy) {
1238 // Sum parts and save
1239 // (s0, s1) = x + y
1240 final double s0 = x + y;
1241 final double s1 = twoSumLow(x, y, s0);
1242 // (t0, t1) = xx + yy
1243 final double t0 = xx + yy;
1244 final double t1 = twoSumLow(xx, yy, t0);
1245 // result = s + t
1246 // |s1| is >= 1 ulp of max(|x|, |y|)
1247 // |t0| is >= 1 ulp of max(|xx|, |yy|)
1248 final DD zz = fastTwoSum(s0, s1 + t0);
1249 return fastTwoSum(zz.x, zz.xx + t1);
1250 }
1251
1252 /**
1253 * Compute the sum of {@code (x, xx)} and {@code y}.
1254 *
1255 * <p>This computes the same result as
1256 * {@link #accurateAdd(double, double, double, double) accurateAdd(x, xx, y, 0)}.
1257 *
1258 * <p>Note: This is an internal helper method used when accuracy is required.
1259 * The computed result is within 1 eps of the exact result where eps is 2<sup>-106</sup>.
1260 * The performance is approximately 1.5-fold slower than {@link #add(double)}.
1261 *
1262 * @param x High part of x.
1263 * @param xx Low part of x.
1264 * @param y y.
1265 * @return the sum
1266 */
1267 static DD accurateAdd(double x, double xx, double y) {
1268 // Grow expansion (Schewchuk): (x, xx) + y -> (s0, s1, s2)
1269 DD s = twoSum(xx, y);
1270 double s2 = s.xx;
1271 s = twoSum(x, s.x);
1272 final double s0 = s.x;
1273 final double s1 = s.xx;
1274 // Compress (Schewchuk Fig. 15): (s0, s1, s2) -> (s0, s1)
1275 s = fastTwoSum(s1, s2);
1276 s2 = s.xx;
1277 s = fastTwoSum(s0, s.x);
1278 // Here (s0, s1) = s
1279 // e = exact 159-bit result
1280 // |e - s0| <= ulp(s0)
1281 // |s1 + s2| <= ulp(e - s0)
1282 return fastTwoSum(s.x, s2 + s.xx);
1283 }
1284
1285 /**
1286 * Compute the sum of {@code (x, xx)} and {@code (y, yy)}.
1287 *
1288 * <p>The high-part of the result is within 1 ulp of the true sum {@code e}.
1289 * The low-part of the result is within 1 ulp of the result of the high-part
1290 * subtracted from the true sum {@code e - hi}.
1291 *
1292 * <p>Note: This is an internal helper method used when accuracy is required.
1293 * The computed result is within 1 eps of the exact result where eps is 2<sup>-106</sup>.
1294 * The performance is approximately 2-fold slower than {@link #add(DD)}.
1295 *
1296 * @param x High part of x.
1297 * @param xx Low part of x.
1298 * @param y High part of y.
1299 * @param yy Low part of y.
1300 * @return the sum
1301 */
1302 static DD accurateAdd(double x, double xx, double y, double yy) {
1303 // Expansion sum (Schewchuk Fig 7): (x, xx) + (x, yy) -> (s0, s1, s2, s3)
1304 DD s = twoSum(xx, yy);
1305 double s3 = s.xx;
1306 s = twoSum(x, s.x);
1307 // (s0, s1, s2) == (s.x, s.xx, s3)
1308 double s0 = s.x;
1309 s = twoSum(s.xx, y);
1310 double s2 = s.xx;
1311 s = twoSum(s0, s.x);
1312 // s1 = s.xx
1313 s0 = s.x;
1314 // Compress (Schewchuk Fig. 15) (s0, s1, s2, s3) -> (s0, s1)
1315 s = fastTwoSum(s.xx, s2);
1316 final double s1 = s.x;
1317 s = fastTwoSum(s.xx, s3);
1318 // s2 = s.x
1319 s3 = s.xx;
1320 s = fastTwoSum(s1, s.x);
1321 s2 = s.xx;
1322 s = fastTwoSum(s0, s.x);
1323 // Here (s0, s1) = s
1324 // e = exact 212-bit result
1325 // |e - s0| <= ulp(s0)
1326 // |s1 + s2 + s3| <= ulp(e - s0) (Sum magnitudes small to high)
1327 return fastTwoSum(s.x, s3 + s2 + s.xx);
1328 }
1329
1330 /**
1331 * Returns a {@code DD} whose value is {@code (this - y)}.
1332 *
1333 * <p>This computes the same result as {@link #add(DD) add(-y)}.
1334 *
1335 * <p>The computed result is within 2 eps of the exact result where eps is 2<sup>-106</sup>.
1336 *
1337 * @param y Value to be subtracted from this number.
1338 * @return {@code this - y}.
1339 * @see #subtract(DD)
1340 */
1341 public DD subtract(double y) {
1342 return add(-y);
1343 }
1344
1345 /**
1346 * Returns a {@code DD} whose value is {@code (this - y)}.
1347 *
1348 * <p>This computes the same result as {@link #add(DD) add(y.negate())}.
1349 *
1350 * <p>The computed result is within 4 eps of the exact result where eps is 2<sup>-106</sup>.
1351 *
1352 * @param y Value to be subtracted from this number.
1353 * @return {@code this - y}.
1354 */
1355 @Override
1356 public DD subtract(DD y) {
1357 return add(x, xx, -y.x, -y.xx);
1358 }
1359
1360 /**
1361 * Returns a {@code DD} whose value is {@code this * y}.
1362 *
1363 * <p>This computes the same result as
1364 * {@link #multiply(DD) multiply(DD.of(y))}.
1365 *
1366 * <p>The computed result is within 4 eps of the exact result where eps is 2<sup>-106</sup>.
1367 *
1368 * @param y Factor.
1369 * @return {@code this * y}.
1370 * @see #multiply(DD)
1371 */
1372 public DD multiply(double y) {
1373 return multiply(x, xx, y);
1374 }
1375
1376 /**
1377 * Compute the multiplication product of {@code (x, xx)} and {@code y}.
1378 *
1379 * <p>This computes the same result as
1380 * {@link #multiply(double, double, double, double) multiply(x, xx, y, 0)}.
1381 *
1382 * <p>The computed result is within 4 eps of the exact result where eps is 2<sup>-106</sup>.
1383 *
1384 * @param x High part of x.
1385 * @param xx Low part of x.
1386 * @param y High part of y.
1387 * @return the product
1388 * @see #multiply(double, double, double, double)
1389 */
1390 private static DD multiply(double x, double xx, double y) {
1391 // Dekker mul2 with yy=0
1392 // (Alternative: Scale expansion (Schewchuk Fig 13))
1393 final double hi = x * y;
1394 final double lo = twoProductLow(x, y, hi);
1395 // Save 2 FLOPS compared to multiply(x, xx, y, 0).
1396 // This is reused in divide to save more FLOPS so worth the optimisation.
1397 return fastTwoSum(hi, lo + xx * y);
1398 }
1399
1400 /**
1401 * Returns a {@code DD} whose value is {@code this * y}.
1402 *
1403 * <p>The computed result is within 4 eps of the exact result where eps is 2<sup>-106</sup>.
1404 *
1405 * @param y Factor.
1406 * @return {@code this * y}.
1407 */
1408 @Override
1409 public DD multiply(DD y) {
1410 return multiply(x, xx, y.x, y.xx);
1411 }
1412
1413 /**
1414 * Compute the multiplication product of {@code (x, xx)} and {@code (y, yy)}.
1415 *
1416 * <p>The computed result is within 4 eps of the exact result where eps is 2<sup>-106</sup>.
1417 *
1418 * @param x High part of x.
1419 * @param xx Low part of x.
1420 * @param y High part of y.
1421 * @param yy Low part of y.
1422 * @return the product
1423 */
1424 private static DD multiply(double x, double xx, double y, double yy) {
1425 // Dekker mul2
1426 // (Alternative: Scale expansion (Schewchuk Fig 13))
1427 final double hi = x * y;
1428 final double lo = twoProductLow(x, y, hi);
1429 return fastTwoSum(hi, lo + (x * yy + xx * y));
1430 }
1431
1432 /**
1433 * Returns a {@code DD} whose value is {@code this * this}.
1434 *
1435 * <p>This method is an optimisation of {@link #multiply(DD) multiply(this)}.
1436 *
1437 * <p>The computed result is within 4 eps of the exact result where eps is 2<sup>-106</sup>.
1438 *
1439 * @return {@code this}<sup>2</sup>
1440 * @see #multiply(DD)
1441 */
1442 public DD square() {
1443 return square(x, xx);
1444 }
1445
1446 /**
1447 * Compute the square of {@code (x, xx)}.
1448 *
1449 * @param x High part of x.
1450 * @param xx Low part of x.
1451 * @return the square
1452 */
1453 private static DD square(double x, double xx) {
1454 // Dekker mul2
1455 final double hi = x * x;
1456 final double lo = twoSquareLow(x, hi);
1457 return fastTwoSum(hi, lo + (2 * x * xx));
1458 }
1459
1460 /**
1461 * Returns a {@code DD} whose value is {@code (this / y)}.
1462 * If {@code y = 0} the result is undefined.
1463 *
1464 * <p>The computed result is within 1 eps of the exact result where eps is 2<sup>-106</sup>.
1465 *
1466 * @param y Divisor.
1467 * @return {@code this / y}.
1468 */
1469 public DD divide(double y) {
1470 return divide(x, xx, y);
1471 }
1472
1473 /**
1474 * Compute the division of {@code (x, xx)} by {@code y}.
1475 * If {@code y = 0} the result is undefined.
1476 *
1477 * <p>The computed result is within 1 eps of the exact result where eps is 2<sup>-106</sup>.
1478 *
1479 * @param x High part of x.
1480 * @param xx Low part of x.
1481 * @param y High part of y.
1482 * @return the quotient
1483 */
1484 private static DD divide(double x, double xx, double y) {
1485 // Long division
1486 // quotient q0 = x / y
1487 final double q0 = x / y;
1488 // remainder r0 = x - q0 * y
1489 DD p = twoProd(y, q0);
1490 // High accuracy add required
1491 DD r = accurateAdd(x, xx, -p.x, -p.xx);
1492 // next quotient q1 = r0 / y
1493 final double q1 = r.x / y;
1494 // remainder r1 = r0 - q1 * y
1495 p = twoProd(y, q1);
1496 // accurateAdd not used as we do not need r1.xx
1497 r = add(r.x, r.xx, -p.x, -p.xx);
1498 // next quotient q2 = r1 / y
1499 final double q2 = r.x / y;
1500 // Collect (q0, q1, q2)
1501 final DD q = fastTwoSum(q0, q1);
1502 return twoSum(q.x, q.xx + q2);
1503 }
1504
1505 /**
1506 * Returns a {@code DD} whose value is {@code (this / y)}.
1507 * If {@code y = 0} the result is undefined.
1508 *
1509 * <p>The computed result is within 4 eps of the exact result where eps is 2<sup>-106</sup>.
1510 *
1511 * @param y Divisor.
1512 * @return {@code this / y}.
1513 */
1514 @Override
1515 public DD divide(DD y) {
1516 return divide(x, xx, y.x, y.xx);
1517 }
1518
1519 /**
1520 * Compute the division of {@code (x, xx)} by {@code (y, yy)}.
1521 * If {@code y = 0} the result is undefined.
1522 *
1523 * <p>The computed result is within 4 eps of the exact result where eps is 2<sup>-106</sup>.
1524 *
1525 * @param x High part of x.
1526 * @param xx Low part of x.
1527 * @param y High part of y.
1528 * @param yy Low part of y.
1529 * @return the quotient
1530 */
1531 private static DD divide(double x, double xx, double y, double yy) {
1532 // Long division
1533 // quotient q0 = x / y
1534 final double q0 = x / y;
1535 // remainder r0 = x - q0 * y
1536 DD p = multiply(y, yy, q0);
1537 // High accuracy add required
1538 DD r = accurateAdd(x, xx, -p.x, -p.xx);
1539 // next quotient q1 = r0 / y
1540 final double q1 = r.x / y;
1541 // remainder r1 = r0 - q1 * y
1542 p = multiply(y, yy, q1);
1543 // accurateAdd not used as we do not need r1.xx
1544 r = add(r.x, r.xx, -p.x, -p.xx);
1545 // next quotient q2 = r1 / y
1546 final double q2 = r.x / y;
1547 // Collect (q0, q1, q2)
1548 final DD q = fastTwoSum(q0, q1);
1549 return twoSum(q.x, q.xx + q2);
1550 }
1551
1552 /**
1553 * Compute the reciprocal of {@code this}.
1554 * If {@code this} value is zero the result is undefined.
1555 *
1556 * <p>The computed result is within 4 eps of the exact result where eps is 2<sup>-106</sup>.
1557 *
1558 * @return {@code this}<sup>-1</sup>
1559 */
1560 @Override
1561 public DD reciprocal() {
1562 return reciprocal(x, xx);
1563 }
1564
1565 /**
1566 * Compute the inverse of {@code (y, yy)}.
1567 * If {@code y = 0} the result is undefined.
1568 *
1569 * <p>The computed result is within 4 eps of the exact result where eps is 2<sup>-106</sup>.
1570 *
1571 * @param y High part of y.
1572 * @param yy Low part of y.
1573 * @return the inverse
1574 */
1575 private static DD reciprocal(double y, double yy) {
1576 // As per divide using (x, xx) = (1, 0)
1577 // quotient q0 = x / y
1578 final double q0 = 1 / y;
1579 // remainder r0 = x - q0 * y
1580 DD p = multiply(y, yy, q0);
1581 // High accuracy add required
1582 // This add saves 2 twoSum and 3 fastTwoSum (24 FLOPS) by ignoring the zero low part
1583 DD r = accurateAdd(-p.x, -p.xx, 1);
1584 // next quotient q1 = r0 / y
1585 final double q1 = r.x / y;
1586 // remainder r1 = r0 - q1 * y
1587 p = multiply(y, yy, q1);
1588 // accurateAdd not used as we do not need r1.xx
1589 r = add(r.x, r.xx, -p.x, -p.xx);
1590 // next quotient q2 = r1 / y
1591 final double q2 = r.x / y;
1592 // Collect (q0, q1, q2)
1593 final DD q = fastTwoSum(q0, q1);
1594 return twoSum(q.x, q.xx + q2);
1595 }
1596
1597 /**
1598 * Compute the square root of {@code this} number {@code (x, xx)}.
1599 *
1600 * <p>Uses the result {@code Math.sqrt(x)}
1601 * if that result is not a finite normalized {@code double}.
1602 *
1603 * <p>Special cases:
1604 * <ul>
1605 * <li>If {@code x} is NaN or less than zero, then the result is {@code (NaN, 0)}.
1606 * <li>If {@code x} is positive infinity, then the result is {@code (+infinity, 0)}.
1607 * <li>If {@code x} is positive zero or negative zero, then the result is {@code (x, 0)}.
1608 * </ul>
1609 *
1610 * <p>The computed result is within 4 eps of the exact result where eps is 2<sup>-106</sup>.
1611 *
1612 * @return {@code sqrt(this)}
1613 * @see Math#sqrt(double)
1614 * @see Double#MIN_NORMAL
1615 */
1616 public DD sqrt() {
1617 // Standard sqrt
1618 final double c = Math.sqrt(x);
1619
1620 // Here we support {negative, +infinity, nan and zero} edge cases.
1621 // This is required to avoid a divide by zero in the following
1622 // computation, otherwise (0, 0).sqrt() = (NaN, NaN).
1623 if (isNotNormal(c)) {
1624 return new DD(c, 0);
1625 }
1626
1627 // Here hi is positive, non-zero and finite; assume lo is also finite
1628
1629 // Dekker's double precision sqrt2 algorithm.
1630 // See Dekker, 1971, pp 242.
1631 final double hc = highPart(c);
1632 final double lc = c - hc;
1633 final double u = c * c;
1634 final double uu = twoSquareLow(hc, lc, u);
1635 final double cc = (x - u - uu + xx) * 0.5 / c;
1636
1637 // Extended precision result:
1638 // y = c + cc
1639 // yy = c - y + cc
1640 return fastTwoSum(c, cc);
1641 }
1642
1643 /**
1644 * Checks if the number is not normal. This is functionally equivalent to:
1645 * <pre>{@code
1646 * final double abs = Math.abs(a);
1647 * return (abs <= Double.MIN_NORMAL || !(abs <= Double.MAX_VALUE));
1648 * }</pre>
1649 *
1650 * @param a The value.
1651 * @return true if the value is not normal
1652 */
1653 static boolean isNotNormal(double a) {
1654 // Sub-normal numbers have a biased exponent of 0.
1655 // Inf/NaN numbers have a biased exponent of 2047.
1656 // Catch both cases by extracting the raw exponent, subtracting 1
1657 // and compare unsigned (so 0 underflows to a unsigned large value).
1658 final int baisedExponent = ((int) (Double.doubleToRawLongBits(a) >>> 52)) & EXP_MASK;
1659 // Pre-compute the additions used by Integer.compareUnsigned
1660 return baisedExponent + CMP_UNSIGNED_MINUS_1 >= CMP_UNSIGNED_2046;
1661 }
1662
1663 /**
1664 * Multiply {@code this} number {@code (x, xx)} by an integral power of two.
1665 * <pre>
1666 * (y, yy) = (x, xx) * 2^exp
1667 * </pre>
1668 *
1669 * <p>The result is rounded as if performed by a single correctly rounded floating-point
1670 * multiply. This performs the same result as:
1671 * <pre>
1672 * y = Math.scalb(x, exp);
1673 * yy = Math.scalb(xx, exp);
1674 * </pre>
1675 *
1676 * <p>The implementation computes using a single multiplication if {@code exp}
1677 * is in {@code [-1022, 1023]}. Otherwise the parts {@code (x, xx)} are scaled by
1678 * repeated multiplication by power-of-two factors. The result is exact unless the scaling
1679 * generates sub-normal parts; in this case precision may be lost by a single rounding.
1680 *
1681 * @param exp Power of two scale factor.
1682 * @return the result
1683 * @see Math#scalb(double, int)
1684 * @see #frexp(int[])
1685 */
1686 public DD scalb(int exp) {
1687 // Handle scaling when 2^n can be represented with a single normal number
1688 // n >= -1022 && n <= 1023
1689 // Using unsigned compare => n + 1022 <= 1023 + 1022
1690 if (exp + CMP_UNSIGNED_1022 < CMP_UNSIGNED_2046) {
1691 final double s = twoPow(exp);
1692 return new DD(x * s, xx * s);
1693 }
1694
1695 // Scale by multiples of 2^512 (largest representable power of 2).
1696 // Scaling requires max 5 multiplications to under/overflow any normal value.
1697 // Break this down into e.g.: 2^512^(exp / 512) * 2^(exp % 512)
1698 // Number of multiples n = exp / 512 : exp >>> 9
1699 // Remainder m = exp % 512 : exp & 511 (exp must be positive)
1700 int n;
1701 int m;
1702 double p;
1703 if (exp < 0) {
1704 // Downscaling
1705 // (Note: Using an unsigned shift handles negation of min value: -2^31)
1706 n = -exp >>> 9;
1707 // m = exp % 512
1708 m = -(-exp & 511);
1709 p = TWO_POW_M512;
1710 } else {
1711 // Upscaling
1712 n = exp >>> 9;
1713 m = exp & 511;
1714 p = TWO_POW_512;
1715 }
1716
1717 // Multiply by the remainder scaling factor first. The remaining multiplications
1718 // are either 2^512 or 2^-512.
1719 // Down-scaling to sub-normal will use the final multiplication into a sub-normal result.
1720 // Note here that n >= 1 as the n in [-1022, 1023] case has been handled.
1721
1722 double z0;
1723 double z1;
1724
1725 // Handle n : 1, 2, 3, 4, 5
1726 if (n >= 5) {
1727 // n >= 5 will be over/underflow. Use an extreme scale factor.
1728 // Do not use +/- infinity as this creates NaN if x = 0.
1729 // p -> 2^1023 or 2^-1025
1730 p *= p * 0.5;
1731 z0 = x * p * p * p;
1732 z1 = xx * p * p * p;
1733 return new DD(z0, z1);
1734 }
1735
1736 final double s = twoPow(m);
1737 if (n == 4) {
1738 z0 = x * s * p * p * p * p;
1739 z1 = xx * s * p * p * p * p;
1740 } else if (n == 3) {
1741 z0 = x * s * p * p * p;
1742 z1 = xx * s * p * p * p;
1743 } else if (n == 2) {
1744 z0 = x * s * p * p;
1745 z1 = xx * s * p * p;
1746 } else {
1747 // n = 1. Occurs only if exp = -1023.
1748 z0 = x * s * p;
1749 z1 = xx * s * p;
1750 }
1751 return new DD(z0, z1);
1752 }
1753
1754 /**
1755 * Create a normalized double with the value {@code 2^n}.
1756 *
1757 * <p>Warning: Do not call with {@code n = -1023}. This will create zero.
1758 *
1759 * @param n Exponent (in the range [-1022, 1023]).
1760 * @return the double
1761 */
1762 static double twoPow(int n) {
1763 return Double.longBitsToDouble(((long) (n + 1023)) << 52);
1764 }
1765
1766 /**
1767 * Convert {@code this} number {@code x} to fractional {@code f} and integral
1768 * {@code 2^exp} components.
1769 * <pre>
1770 * x = f * 2^exp
1771 * </pre>
1772 *
1773 * <p>The combined fractional part (f, ff) is in the range {@code [0.5, 1)}.
1774 *
1775 * <p>Special cases:
1776 * <ul>
1777 * <li>If {@code x} is zero, then the normalized fraction is zero and the exponent is zero.
1778 * <li>If {@code x} is NaN, then the normalized fraction is NaN and the exponent is unspecified.
1779 * <li>If {@code x} is infinite, then the normalized fraction is infinite and the exponent is unspecified.
1780 * <li>If high-part {@code x} is an exact power of 2 and the low-part {@code xx} has an opposite
1781 * signed non-zero magnitude then fraction high-part {@code f} will be {@code +/-1} such that
1782 * the double-double number is in the range {@code [0.5, 1)}.
1783 * </ul>
1784 *
1785 * <p>This is named using the equivalent function in the standard C math.h library.
1786 *
1787 * @param exp Power of two scale factor (integral exponent).
1788 * @return Fraction part.
1789 * @see Math#getExponent(double)
1790 * @see #scalb(int)
1791 * @see <a href="https://www.cplusplus.com/reference/cmath/frexp/">C math.h frexp</a>
1792 */
1793 public DD frexp(int[] exp) {
1794 exp[0] = getScale(x);
1795 // Handle non-scalable numbers
1796 if (exp[0] == Double.MAX_EXPONENT + 1) {
1797 // Returns +/-0.0, inf or nan
1798 // Maintain the fractional part unchanged.
1799 // Do not change the fractional part of inf/nan, and assume
1800 // |xx| < |x| thus if x == 0 then xx == 0 (otherwise the double-double is invalid)
1801 // Unspecified for NaN/inf so just return zero exponent.
1802 exp[0] = 0;
1803 return this;
1804 }
1805 // The scale will create the fraction in [1, 2) so increase by 1 for [0.5, 1)
1806 exp[0] += 1;
1807 DD f = scalb(-exp[0]);
1808 // Return |(hi, lo)| = (1, -eps) if required.
1809 // f.x * f.xx < 0 detects sign change unless the product underflows.
1810 // Handle extreme case of |f.xx| being min value by doubling f.x to 1.
1811 if (Math.abs(f.x) == HALF && 2 * f.x * f.xx < 0) {
1812 f = new DD(f.x * 2, f.xx * 2);
1813 exp[0] -= 1;
1814 }
1815 return f;
1816 }
1817
1818 /**
1819 * Returns a scale suitable for use with {@link Math#scalb(double, int)} to normalise
1820 * the number to the interval {@code [1, 2)}.
1821 *
1822 * <p>In contrast to {@link Math#getExponent(double)} this handles
1823 * sub-normal numbers by computing the number of leading zeros in the mantissa
1824 * and shifting the unbiased exponent. The result is that for all finite, non-zero,
1825 * numbers, the magnitude of {@code scalb(x, -getScale(x))} is
1826 * always in the range {@code [1, 2)}.
1827 *
1828 * <p>This method is a functional equivalent of the c function ilogb(double).
1829 *
1830 * <p>The result is to be used to scale a number using {@link Math#scalb(double, int)}.
1831 * Hence the special case of a zero argument is handled using the return value for NaN
1832 * as zero cannot be scaled. This is different from {@link Math#getExponent(double)}.
1833 *
1834 * <p>Special cases:
1835 * <ul>
1836 * <li>If the argument is NaN or infinite, then the result is {@link Double#MAX_EXPONENT} + 1.
1837 * <li>If the argument is zero, then the result is {@link Double#MAX_EXPONENT} + 1.
1838 * </ul>
1839 *
1840 * @param a Value.
1841 * @return The unbiased exponent of the value to be used for scaling, or 1024 for 0, NaN or Inf
1842 * @see Math#getExponent(double)
1843 * @see Math#scalb(double, int)
1844 * @see <a href="https://www.cplusplus.com/reference/cmath/ilogb/">ilogb</a>
1845 */
1846 private static int getScale(double a) {
1847 // Only interested in the exponent and mantissa so remove the sign bit
1848 final long bits = Double.doubleToRawLongBits(a) & UNSIGN_MASK;
1849 // Get the unbiased exponent
1850 int exp = ((int) (bits >>> 52)) - EXPONENT_OFFSET;
1851
1852 // No case to distinguish nan/inf (exp == 1024).
1853 // Handle sub-normal numbers
1854 if (exp == Double.MIN_EXPONENT - 1) {
1855 // Special case for zero, return as nan/inf to indicate scaling is not possible
1856 if (bits == 0) {
1857 return Double.MAX_EXPONENT + 1;
1858 }
1859 // A sub-normal number has an exponent below -1022. The amount below
1860 // is defined by the number of shifts of the most significant bit in
1861 // the mantissa that is required to get a 1 at position 53 (i.e. as
1862 // if it were a normal number with assumed leading bit)
1863 final long mantissa = bits & MANTISSA_MASK;
1864 exp -= Long.numberOfLeadingZeros(mantissa << 12);
1865 }
1866 return exp;
1867 }
1868
1869 /**
1870 * Compute {@code this} number {@code (x, xx)} raised to the power {@code n}.
1871 *
1872 * <p>Special cases:
1873 * <ul>
1874 * <li>If {@code x} is not a finite normalized {@code double}, the low part {@code xx}
1875 * is ignored and the result is {@link Math#pow(double, double) Math.pow(x, n)}.
1876 * <li>If {@code n = 0} the result is {@code (1, 0)}.
1877 * <li>If {@code n = 1} the result is {@code (x, xx)}.
1878 * <li>If {@code n = -1} the result is the {@link #reciprocal() reciprocal}.
1879 * <li>If the computation overflows the result is undefined.
1880 * </ul>
1881 *
1882 * <p>Computation uses multiplication by factors generated by repeat squaring of the value.
1883 * These multiplications have no special case handling for overflow; in the event of overflow
1884 * the result is undefined. The {@link #pow(int, long[])} method can be used to
1885 * generate a scaled fraction result for any finite {@code DD} number and exponent.
1886 *
1887 * <p>The computed result is approximately {@code 16 * (n - 1) * eps} of the exact result
1888 * where eps is 2<sup>-106</sup>.
1889 *
1890 * @param n Exponent.
1891 * @return {@code this}<sup>n</sup>
1892 * @see Math#pow(double, double)
1893 * @see #pow(int, long[])
1894 * @see #isFinite()
1895 */
1896 @Override
1897 public DD pow(int n) {
1898 // Edge cases.
1899 if (n == 1) {
1900 return this;
1901 }
1902 if (n == 0) {
1903 return ONE;
1904 }
1905
1906 // Handles {infinity, nan and zero} cases
1907 if (isNotNormal(x)) {
1908 // Assume the high part has the greatest magnitude
1909 // so here the low part is irrelevant
1910 return new DD(Math.pow(x, n), 0);
1911 }
1912
1913 // Here hi is finite; assume lo is also finite
1914 if (n == -1) {
1915 return reciprocal();
1916 }
1917
1918 // Extended precision computation is required.
1919 // No checks for overflow.
1920 if (n < 0) {
1921 // Note: Correctly handles negating -2^31
1922 return computePow(x, xx, -n).reciprocal();
1923 }
1924 return computePow(x, xx, n);
1925 }
1926
1927 /**
1928 * Compute the number {@code x} (non-zero finite) raised to the power {@code n}.
1929 *
1930 * <p>The input power is treated as an unsigned integer. Thus the negative value
1931 * {@link Integer#MIN_VALUE} is 2^31.
1932 *
1933 * @param x Fractional high part of x.
1934 * @param xx Fractional low part of x.
1935 * @param n Power (in [2, 2^31]).
1936 * @return x^n.
1937 */
1938 private static DD computePow(double x, double xx, int n) {
1939 // Compute the power by multiplication (keeping track of the scale):
1940 // 13 = 1101
1941 // x^13 = x^8 * x^4 * x^1
1942 // = ((x^2 * x)^2)^2 * x
1943 // 21 = 10101
1944 // x^21 = x^16 * x^4 * x^1
1945 // = (((x^2)^2 * x)^2)^2 * x
1946 // 1. Find highest set bit in n (assume n != 0)
1947 // 2. Initialise result as x
1948 // 3. For remaining bits (0 or 1) below the highest set bit:
1949 // - square the current result
1950 // - if the current bit is 1 then multiply by x
1951 // In this scheme the factors to multiply by x can be pre-computed.
1952
1953 // Split b
1954 final double xh = highPart(x);
1955 final double xl = x - xh;
1956
1957 // Initialise the result as x^1
1958 double f0 = x;
1959 double f1 = xx;
1960
1961 double u;
1962 double v;
1963 double w;
1964
1965 // Shift the highest set bit off the top.
1966 // Any remaining bits are detected in the sign bit.
1967 final int shift = Integer.numberOfLeadingZeros(n) + 1;
1968 int bits = n << shift;
1969
1970 // Multiplication is done without object allocation of DD intermediates.
1971 // The square can be optimised.
1972 // Process remaining bits below highest set bit.
1973 for (int i = 32 - shift; i != 0; i--, bits <<= 1) {
1974 // Square the result
1975 // Inline multiply(f0, f1, f0, f1), adapted for squaring
1976 u = f0 * f0;
1977 v = twoSquareLow(f0, u);
1978 // Inline (f0, f1) = fastTwoSum(hi, lo + (2 * f0 * f1))
1979 w = v + (2 * f0 * f1);
1980 f0 = u + w;
1981 f1 = fastTwoSumLow(u, w, f0);
1982 if (bits < 0) {
1983 // Inline multiply(f0, f1, x, xx)
1984 u = highPart(f0);
1985 v = f0 - u;
1986 w = f0 * x;
1987 v = twoProductLow(u, v, xh, xl, w);
1988 // Inline (f0, f1) = fastTwoSum(w, v + (f0 * xx + f1 * x))
1989 u = v + (f0 * xx + f1 * x);
1990 f0 = w + u;
1991 f1 = fastTwoSumLow(w, u, f0);
1992 }
1993 }
1994
1995 return new DD(f0, f1);
1996 }
1997
1998 /**
1999 * Compute {@code this} number {@code x} raised to the power {@code n}.
2000 *
2001 * <p>The value is returned as fractional {@code f} and integral
2002 * {@code 2^exp} components.
2003 * <pre>
2004 * (x+xx)^n = (f+ff) * 2^exp
2005 * </pre>
2006 *
2007 * <p>The combined fractional part (f, ff) is in the range {@code [0.5, 1)}.
2008 *
2009 * <p>Special cases:
2010 * <ul>
2011 * <li>If {@code (x, xx)} is zero the high part of the fractional part is
2012 * computed using {@link Math#pow(double, double) Math.pow(x, n)} and the exponent is 0.
2013 * <li>If {@code n = 0} the fractional part is 0.5 and the exponent is 1.
2014 * <li>If {@code (x, xx)} is an exact power of 2 the fractional part is 0.5 and the exponent
2015 * is the power of 2 minus 1.
2016 * <li>If the result high-part is an exact power of 2 and the low-part has an opposite
2017 * signed non-zero magnitude then the fraction high-part {@code f} will be {@code +/-1} such that
2018 * the double-double number is in the range {@code [0.5, 1)}.
2019 * <li>If the argument is not finite then a fractional representation is not possible.
2020 * In this case the fraction and the scale factor is undefined.
2021 * </ul>
2022 *
2023 * <p>The computed result is approximately {@code 16 * (n - 1) * eps} of the exact result
2024 * where eps is 2<sup>-106</sup>.
2025 *
2026 * @param n Power.
2027 * @param exp Result power of two scale factor (integral exponent).
2028 * @return Fraction part.
2029 * @see #frexp(int[])
2030 */
2031 public DD pow(int n, long[] exp) {
2032 // Edge cases.
2033 if (n == 0) {
2034 exp[0] = 1;
2035 return new DD(0.5, 0);
2036 }
2037 // IEEE result for non-finite or zero
2038 if (!Double.isFinite(x) || x == 0) {
2039 exp[0] = 0;
2040 return new DD(Math.pow(x, n), 0);
2041 }
2042 // Here the number is non-zero finite
2043 final int[] ie = {0};
2044 DD f = frexp(ie);
2045 final long b = ie[0];
2046 // Handle exact powers of 2
2047 if (Math.abs(f.x) == HALF && f.xx == 0) {
2048 // (f * 2^b)^n = (2f)^n * 2^(b-1)^n
2049 // Use Math.pow to create the sign.
2050 // Note the result must be scaled to the fractional representation
2051 // by multiplication by 0.5 and addition of 1 to the exponent.
2052 final double y0 = 0.5 * Math.pow(2 * f.x, n);
2053 // Propagate sign change (y0*f.x) to the original zero (this.xx)
2054 final double y1 = Math.copySign(0.0, y0 * f.x * this.xx);
2055 exp[0] = 1 + (b - 1) * n;
2056 return new DD(y0, y1);
2057 }
2058 if (n < 0) {
2059 f = computePowScaled(b, f.x, f.xx, -n, exp);
2060 // Result is a non-zero fraction part so inversion is safe
2061 f = reciprocal(f.x, f.xx);
2062 // Rescale to [0.5, 1.0)
2063 f = f.frexp(ie);
2064 exp[0] = ie[0] - exp[0];
2065 return f;
2066 }
2067 return computePowScaled(b, f.x, f.xx, n, exp);
2068 }
2069
2070 /**
2071 * Compute the number {@code x} (non-zero finite) raised to the power {@code n}.
2072 *
2073 * <p>The input power is treated as an unsigned integer. Thus the negative value
2074 * {@link Integer#MIN_VALUE} is 2^31.
2075 *
2076 * @param b Integral component 2^exp of x.
2077 * @param x Fractional high part of x.
2078 * @param xx Fractional low part of x.
2079 * @param n Power (in [2, 2^31]).
2080 * @param exp Result power of two scale factor (integral exponent).
2081 * @return Fraction part.
2082 */
2083 private static DD computePowScaled(long b, double x, double xx, int n, long[] exp) {
2084 // Compute the power by multiplication (keeping track of the scale):
2085 // 13 = 1101
2086 // x^13 = x^8 * x^4 * x^1
2087 // = ((x^2 * x)^2)^2 * x
2088 // 21 = 10101
2089 // x^21 = x^16 * x^4 * x^1
2090 // = (((x^2)^2 * x)^2)^2 * x
2091 // 1. Find highest set bit in n (assume n != 0)
2092 // 2. Initialise result as x
2093 // 3. For remaining bits (0 or 1) below the highest set bit:
2094 // - square the current result
2095 // - if the current bit is 1 then multiply by x
2096 // In this scheme the factors to multiply by x can be pre-computed.
2097
2098 // Scale the input in [0.5, 1) to be above 1. Represented as 2^be * b.
2099 final long be = b - 1;
2100 final double b0 = x * 2;
2101 final double b1 = xx * 2;
2102 // Split b
2103 final double b0h = highPart(b0);
2104 final double b0l = b0 - b0h;
2105
2106 // Initialise the result as x^1. Represented as 2^fe * f.
2107 long fe = be;
2108 double f0 = b0;
2109 double f1 = b1;
2110
2111 double u;
2112 double v;
2113 double w;
2114
2115 // Shift the highest set bit off the top.
2116 // Any remaining bits are detected in the sign bit.
2117 final int shift = Integer.numberOfLeadingZeros(n) + 1;
2118 int bits = n << shift;
2119
2120 // Multiplication is done without using DD.multiply as the arguments
2121 // are always finite and the product will not overflow. The square can be optimised.
2122 // Process remaining bits below highest set bit.
2123 for (int i = 32 - shift; i != 0; i--, bits <<= 1) {
2124 // Square the result
2125 // Inline multiply(f0, f1, f0, f1, f), adapted for squaring
2126 fe <<= 1;
2127 u = f0 * f0;
2128 v = twoSquareLow(f0, u);
2129 // Inline fastTwoSum(hi, lo + (2 * f0 * f1), f)
2130 w = v + (2 * f0 * f1);
2131 f0 = u + w;
2132 f1 = fastTwoSumLow(u, w, f0);
2133 // Rescale
2134 if (Math.abs(f0) > SAFE_MULTIPLY) {
2135 // Scale back to the [1, 2) range. As safe multiply is 2^500
2136 // the exponent should be < 1001 so the twoPow scaling factor is supported.
2137 final int e = Math.getExponent(f0);
2138 final double s = twoPow(-e);
2139 fe += e;
2140 f0 *= s;
2141 f1 *= s;
2142 }
2143 if (bits < 0) {
2144 // Multiply by b
2145 fe += be;
2146 // Inline multiply(f0, f1, b0, b1, f)
2147 u = highPart(f0);
2148 v = f0 - u;
2149 w = f0 * b0;
2150 v = twoProductLow(u, v, b0h, b0l, w);
2151 // Inline fastTwoSum(w, v + (f0 * b1 + f1 * b0), f)
2152 u = v + (f0 * b1 + f1 * b0);
2153 f0 = w + u;
2154 f1 = fastTwoSumLow(w, u, f0);
2155 // Avoid rescale as x2 is in [1, 2)
2156 }
2157 }
2158
2159 final int[] e = {0};
2160 final DD f = new DD(f0, f1).frexp(e);
2161 exp[0] = fe + e[0];
2162 return f;
2163 }
2164
2165 /**
2166 * Test for equality with another object. If the other object is a {@code DD} then a
2167 * comparison is made of the parts; otherwise {@code false} is returned.
2168 *
2169 * <p>If both parts of two double-double numbers
2170 * are numerically equivalent the two {@code DD} objects are considered to be equal.
2171 * For this purpose, two {@code double} values are considered to be
2172 * the same if and only if the method call
2173 * {@link Double#doubleToLongBits(double) Double.doubleToLongBits(value + 0.0)}
2174 * returns the identical {@code long} when applied to each value. This provides
2175 * numeric equality of different representations of zero as per {@code -0.0 == 0.0},
2176 * and equality of {@code NaN} values.
2177 *
2178 * <p>Note that in most cases, for two instances of class
2179 * {@code DD}, {@code x} and {@code y}, the
2180 * value of {@code x.equals(y)} is {@code true} if and only if
2181 *
2182 * <pre>
2183 * {@code x.hi() == y.hi() && x.lo() == y.lo()}</pre>
2184 *
2185 * <p>also has the value {@code true}. However, there are exceptions:
2186 *
2187 * <ul>
2188 * <li>Instances that contain {@code NaN} values in the same part
2189 * are considered to be equal for that part, even though {@code Double.NaN == Double.NaN}
2190 * has the value {@code false}.
2191 * <li>Instances that share a {@code NaN} value in one part
2192 * but have different values in the other part are <em>not</em> considered equal.
2193 * </ul>
2194 *
2195 * <p>The behavior is the same as if the components of the two double-double numbers were passed
2196 * to {@link java.util.Arrays#equals(double[], double[]) Arrays.equals(double[], double[])}:
2197 *
2198 * <pre>
2199 * Arrays.equals(new double[]{x.hi() + 0.0, x.lo() + 0.0},
2200 * new double[]{y.hi() + 0.0, y.lo() + 0.0}); </pre>
2201 *
2202 * <p>Note: Addition of {@code 0.0} converts signed representations of zero values
2203 * {@code -0.0} and {@code 0.0} to a canonical {@code 0.0}.
2204 *
2205 * @param other Object to test for equality with this instance.
2206 * @return {@code true} if the objects are equal, {@code false} if object
2207 * is {@code null}, not an instance of {@code DD}, or not equal to
2208 * this instance.
2209 * @see Double#doubleToLongBits(double)
2210 * @see java.util.Arrays#equals(double[], double[])
2211 */
2212 @Override
2213 public boolean equals(Object other) {
2214 if (this == other) {
2215 return true;
2216 }
2217 if (other instanceof DD) {
2218 final DD c = (DD) other;
2219 return equals(x, c.x) && equals(xx, c.xx);
2220 }
2221 return false;
2222 }
2223
2224 /**
2225 * Gets a hash code for the double-double number.
2226 *
2227 * <p>The behavior is the same as if the parts of the double-double number were passed
2228 * to {@link java.util.Arrays#hashCode(double[]) Arrays.hashCode(double[])}:
2229 *
2230 * <pre>
2231 * {@code Arrays.hashCode(new double[] {hi() + 0.0, lo() + 0.0})}</pre>
2232 *
2233 * <p>Note: Addition of {@code 0.0} provides the same hash code for different
2234 * signed representations of zero values {@code -0.0} and {@code 0.0}.
2235 *
2236 * @return A hash code value for this object.
2237 * @see java.util.Arrays#hashCode(double[]) Arrays.hashCode(double[])
2238 */
2239 @Override
2240 public int hashCode() {
2241 return 31 * (31 + Double.hashCode(x + 0.0)) + Double.hashCode(xx + 0.0);
2242 }
2243
2244 /**
2245 * Returns {@code true} if the values are numerically equal.
2246 *
2247 * <p>Two {@code double} values are considered to be
2248 * the same if and only if the method call
2249 * {@link Double#doubleToLongBits(double) Double.doubleToLongBits(value + 0.0)}
2250 * returns the identical {@code long} when applied to each value. This provides
2251 * numeric equality of different representations of zero as per {@code -0.0 == 0.0},
2252 * and equality of {@code NaN} values.
2253 *
2254 * @param x Value
2255 * @param y Value
2256 * @return {@code true} if the values are numerically equal
2257 */
2258 private static boolean equals(double x, double y) {
2259 return Double.doubleToLongBits(x + 0.0) == Double.doubleToLongBits(y + 0.0);
2260 }
2261
2262 /**
2263 * Returns a string representation of the double-double number.
2264 *
2265 * <p>The string will represent the numeric values of the parts.
2266 * The values are split by a separator and surrounded by parentheses.
2267 *
2268 * <p>The format for a double-double number is {@code "(x,xx)"}, with {@code x} and
2269 * {@code xx} converted as if using {@link Double#toString(double)}.
2270 *
2271 * <p>Note: A numerical string representation of a finite double-double number can be
2272 * generated by conversion to a {@link BigDecimal} before formatting.
2273 *
2274 * @return A string representation of the double-double number.
2275 * @see Double#toString(double)
2276 * @see #bigDecimalValue()
2277 */
2278 @Override
2279 public String toString() {
2280 return new StringBuilder(TO_STRING_SIZE)
2281 .append(FORMAT_START)
2282 .append(x).append(FORMAT_SEP)
2283 .append(xx)
2284 .append(FORMAT_END)
2285 .toString();
2286 }
2287
2288 /**
2289 * {@inheritDoc}
2290 *
2291 * <p>Note: Addition of this value with any element {@code a} may not create an
2292 * element equal to {@code a} if the element contains sign zeros. In this case the
2293 * magnitude of the result will be identical.
2294 */
2295 @Override
2296 public DD zero() {
2297 return ZERO;
2298 }
2299
2300 /** {@inheritDoc} */
2301 @Override
2302 public boolean isZero() {
2303 // we keep |x| > |xx| and Java provides 0.0 == -0.0
2304 return x == 0.0;
2305 }
2306
2307 /**
2308 * {@inheritDoc}
2309 *
2310 * <p>Note: Multiplication of this value with any element {@code a} may not create an
2311 * element equal to {@code a} if the element contains sign zeros. In this case the
2312 * magnitude of the result will be identical.
2313 */
2314 @Override
2315 public DD one() {
2316 return ONE;
2317 }
2318
2319 /** {@inheritDoc} */
2320 @Override
2321 public boolean isOne() {
2322 return x == 1.0 && xx == 0.0;
2323 }
2324
2325 /**
2326 * {@inheritDoc}
2327 *
2328 * <p>This computes the same result as {@link #multiply(double) multiply((double) y)}.
2329 *
2330 * @see #multiply(double)
2331 */
2332 @Override
2333 public DD multiply(int n) {
2334 // Note: This method exists to support the NativeOperators interface
2335 return multiply(x, xx, n);
2336 }
2337 }