ExtendedPrecision.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.numbers.core;
/**
* Computes extended precision floating-point operations.
*
* <p>Extended precision computation is delegated to the {@link DD} class. The methods here
* are extensions to prevent overflow or underflow in intermediate computations.
*/
final class ExtendedPrecision {
/**
* The upper limit above which a number may overflow during the split into a high part.
* Assuming the multiplier is above 2^27 and the maximum exponent is 1023 then a safe
* limit is a value with an exponent of (1023 - 27) = 2^996.
* 996 is the value obtained from {@code Math.getExponent(Double.MAX_VALUE / MULTIPLIER)}.
*/
private static final double SAFE_UPPER = 0x1.0p996;
/**
* The lower limit for a product {@code x * y} below which the round-off component may be
* sub-normal. This is set as 2^-1022 * 2^54.
*/
private static final double SAFE_LOWER = 0x1.0p-968;
/** The scale to use when down-scaling during a split into a high part.
* This must be smaller than the inverse of the multiplier and a power of 2 for exact scaling. */
private static final double DOWN_SCALE = 0x1.0p-30;
/** The scale to use when up-scaling during a split into a high part.
* This is the inverse of {@link #DOWN_SCALE}. */
private static final double UP_SCALE = 0x1.0p30;
/** The upscale factor squared. */
private static final double UP_SCALE2 = 0x1.0p60;
/** The downscale factor squared. */
private static final double DOWN_SCALE2 = 0x1.0p-60;
/** Private constructor. */
private ExtendedPrecision() {
// intentionally empty.
}
/**
* Compute the low part of the double length number {@code (z,zz)} for the exact
* product of {@code x} and {@code y}. This is equivalent to computing a {@code double}
* containing the magnitude of the rounding error when converting the exact 106-bit
* significand of the multiplication result to a 53-bit significand.
*
* <p>The method is written to be functionally similar to using a fused multiply add (FMA)
* operation to compute the low part, for example JDK 9's Math.fma function (note the sign
* change in the input argument for the product):
* <pre>
* double x = ...;
* double y = ...;
* double xy = x * y;
* double low1 = Math.fma(x, y, -xy);
* double low2 = productLow(x, y, xy);
* </pre>
*
* <p>Special cases:
*
* <ul>
* <li>If {@code x * y} is sub-normal or zero then the result is 0.0.
* <li>If {@code x * y} is infinite or NaN then the result is NaN.
* </ul>
*
* <p>This method delegates to {@link DD#twoProductLow(double, double, double)} but uses
* scaling to avoid intermediate overflow.
*
* @param x First factor.
* @param y Second factor.
* @param xy Product of the factors (x * y).
* @return the low part of the product double length number
* @see DD#twoProductLow(double, double, double)
*/
static double productLow(double x, double y, double xy) {
// Verify the input. This must be NaN safe.
//assert Double.compare(x * y, xy) == 0
// If the number is sub-normal, inf or nan there is no round-off.
if (DD.isNotNormal(xy)) {
// Returns 0.0 for sub-normal xy, otherwise NaN for inf/nan:
return xy - xy;
}
// The result xy is finite and normal.
// Use Dekker's mul12 algorithm that splits the values into high and low parts.
// Dekker's split using multiplication will overflow if the value is within 2^27
// of double max value. It can also produce 26-bit approximations that are larger
// than the input numbers for the high part causing overflow in hx * hy when
// x * y does not overflow. So we must scale down big numbers.
// We only have to scale the largest number as we know the product does not overflow
// (if one is too big then the other cannot be).
// We also scale if the product is close to overflow to avoid intermediate overflow.
// This could be done at a higher limit (e.g. Math.abs(xy) > Double.MAX_VALUE / 4)
// but is included here to have a single low probability branch condition.
// Add the absolute inputs for a single comparison. The sum will not be more than
// 3-fold higher than any component.
final double a = Math.abs(x);
final double b = Math.abs(y);
final double ab = Math.abs(xy);
if (a + b + ab >= SAFE_UPPER) {
// Only required to scale the largest number as x*y does not overflow.
if (a > b) {
return DD.twoProductLow(x * DOWN_SCALE, y, xy * DOWN_SCALE) * UP_SCALE;
}
return DD.twoProductLow(x, y * DOWN_SCALE, xy * DOWN_SCALE) * UP_SCALE;
}
// The result is computed using a product of the low parts.
// To avoid underflow in the low parts we note that these are approximately a factor
// of 2^27 smaller than the original inputs so their product will be ~2^54 smaller
// than the product xy. Ensure the product is at least 2^54 above a sub-normal.
if (ab <= SAFE_LOWER) {
// Scaling up here is safe: the largest magnitude cannot be above SAFE_LOWER / MIN_VALUE.
return DD.twoProductLow(x * UP_SCALE, y * UP_SCALE, xy * UP_SCALE2) * DOWN_SCALE2;
}
// No scaling required
return DD.twoProductLow(x, y, xy);
}
}