ExtendedPrecision.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.commons.numbers.core;

/**
 * Computes extended precision floating-point operations.
 *
 * <p>Extended precision computation is delegated to the {@link DD} class. The methods here
 * are extensions to prevent overflow or underflow in intermediate computations.
 */
final class ExtendedPrecision {
    /**
     * The upper limit above which a number may overflow during the split into a high part.
     * Assuming the multiplier is above 2^27 and the maximum exponent is 1023 then a safe
     * limit is a value with an exponent of (1023 - 27) = 2^996.
     * 996 is the value obtained from {@code Math.getExponent(Double.MAX_VALUE / MULTIPLIER)}.
     */
    private static final double SAFE_UPPER = 0x1.0p996;
    /**
     * The lower limit for a product {@code x * y} below which the round-off component may be
     * sub-normal. This is set as 2^-1022 * 2^54.
     */
    private static final double SAFE_LOWER = 0x1.0p-968;

    /** The scale to use when down-scaling during a split into a high part.
     * This must be smaller than the inverse of the multiplier and a power of 2 for exact scaling. */
    private static final double DOWN_SCALE = 0x1.0p-30;
    /** The scale to use when up-scaling during a split into a high part.
     * This is the inverse of {@link #DOWN_SCALE}. */
    private static final double UP_SCALE = 0x1.0p30;

    /** The upscale factor squared. */
    private static final double UP_SCALE2 = 0x1.0p60;
    /** The downscale factor squared. */
    private static final double DOWN_SCALE2 = 0x1.0p-60;

    /** Private constructor. */
    private ExtendedPrecision() {
        // intentionally empty.
    }

    /**
     * Compute the low part of the double length number {@code (z,zz)} for the exact
     * product of {@code x} and {@code y}. This is equivalent to computing a {@code double}
     * containing the magnitude of the rounding error when converting the exact 106-bit
     * significand of the multiplication result to a 53-bit significand.
     *
     * <p>The method is written to be functionally similar to using a fused multiply add (FMA)
     * operation to compute the low part, for example JDK 9's Math.fma function (note the sign
     * change in the input argument for the product):
     * <pre>
     *  double x = ...;
     *  double y = ...;
     *  double xy = x * y;
     *  double low1 = Math.fma(x, y, -xy);
     *  double low2 = productLow(x, y, xy);
     * </pre>
     *
     * <p>Special cases:
     *
     * <ul>
     *  <li>If {@code x * y} is sub-normal or zero then the result is 0.0.
     *  <li>If {@code x * y} is infinite or NaN then the result is NaN.
     * </ul>
     *
     * <p>This method delegates to {@link DD#twoProductLow(double, double, double)} but uses
     * scaling to avoid intermediate overflow.
     *
     * @param x First factor.
     * @param y Second factor.
     * @param xy Product of the factors (x * y).
     * @return the low part of the product double length number
     * @see DD#twoProductLow(double, double, double)
     */
    static double productLow(double x, double y, double xy) {
        // Verify the input. This must be NaN safe.
        //assert Double.compare(x * y, xy) == 0

        // If the number is sub-normal, inf or nan there is no round-off.
        if (DD.isNotNormal(xy)) {
            // Returns 0.0 for sub-normal xy, otherwise NaN for inf/nan:
            return xy - xy;
        }

        // The result xy is finite and normal.
        // Use Dekker's mul12 algorithm that splits the values into high and low parts.
        // Dekker's split using multiplication will overflow if the value is within 2^27
        // of double max value. It can also produce 26-bit approximations that are larger
        // than the input numbers for the high part causing overflow in hx * hy when
        // x * y does not overflow. So we must scale down big numbers.
        // We only have to scale the largest number as we know the product does not overflow
        // (if one is too big then the other cannot be).
        // We also scale if the product is close to overflow to avoid intermediate overflow.
        // This could be done at a higher limit (e.g. Math.abs(xy) > Double.MAX_VALUE / 4)
        // but is included here to have a single low probability branch condition.

        // Add the absolute inputs for a single comparison. The sum will not be more than
        // 3-fold higher than any component.
        final double a = Math.abs(x);
        final double b = Math.abs(y);
        final double ab = Math.abs(xy);
        if (a + b + ab >= SAFE_UPPER) {
            // Only required to scale the largest number as x*y does not overflow.
            if (a > b) {
                return DD.twoProductLow(x * DOWN_SCALE, y, xy * DOWN_SCALE) * UP_SCALE;
            }
            return DD.twoProductLow(x, y * DOWN_SCALE, xy * DOWN_SCALE) * UP_SCALE;
        }

        // The result is computed using a product of the low parts.
        // To avoid underflow in the low parts we note that these are approximately a factor
        // of 2^27 smaller than the original inputs so their product will be ~2^54 smaller
        // than the product xy. Ensure the product is at least 2^54 above a sub-normal.
        if (ab <= SAFE_LOWER) {
            // Scaling up here is safe: the largest magnitude cannot be above SAFE_LOWER / MIN_VALUE.
            return DD.twoProductLow(x * UP_SCALE, y * UP_SCALE, xy * UP_SCALE2) * DOWN_SCALE2;
        }

        // No scaling required
        return DD.twoProductLow(x, y, xy);
    }
}