ExtendedPrecision.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.apache.commons.numbers.core;

  18. /**
  19.  * Computes extended precision floating-point operations.
  20.  *
  21.  * <p>Extended precision computation is delegated to the {@link DD} class. The methods here
  22.  * are extensions to prevent overflow or underflow in intermediate computations.
  23.  */
  24. final class ExtendedPrecision {
  25.     /**
  26.      * The upper limit above which a number may overflow during the split into a high part.
  27.      * Assuming the multiplier is above 2^27 and the maximum exponent is 1023 then a safe
  28.      * limit is a value with an exponent of (1023 - 27) = 2^996.
  29.      * 996 is the value obtained from {@code Math.getExponent(Double.MAX_VALUE / MULTIPLIER)}.
  30.      */
  31.     private static final double SAFE_UPPER = 0x1.0p996;
  32.     /**
  33.      * The lower limit for a product {@code x * y} below which the round-off component may be
  34.      * sub-normal. This is set as 2^-1022 * 2^54.
  35.      */
  36.     private static final double SAFE_LOWER = 0x1.0p-968;

  37.     /** The scale to use when down-scaling during a split into a high part.
  38.      * This must be smaller than the inverse of the multiplier and a power of 2 for exact scaling. */
  39.     private static final double DOWN_SCALE = 0x1.0p-30;
  40.     /** The scale to use when up-scaling during a split into a high part.
  41.      * This is the inverse of {@link #DOWN_SCALE}. */
  42.     private static final double UP_SCALE = 0x1.0p30;

  43.     /** The upscale factor squared. */
  44.     private static final double UP_SCALE2 = 0x1.0p60;
  45.     /** The downscale factor squared. */
  46.     private static final double DOWN_SCALE2 = 0x1.0p-60;

  47.     /** Private constructor. */
  48.     private ExtendedPrecision() {
  49.         // intentionally empty.
  50.     }

  51.     /**
  52.      * Compute the low part of the double length number {@code (z,zz)} for the exact
  53.      * product of {@code x} and {@code y}. This is equivalent to computing a {@code double}
  54.      * containing the magnitude of the rounding error when converting the exact 106-bit
  55.      * significand of the multiplication result to a 53-bit significand.
  56.      *
  57.      * <p>The method is written to be functionally similar to using a fused multiply add (FMA)
  58.      * operation to compute the low part, for example JDK 9's Math.fma function (note the sign
  59.      * change in the input argument for the product):
  60.      * <pre>
  61.      *  double x = ...;
  62.      *  double y = ...;
  63.      *  double xy = x * y;
  64.      *  double low1 = Math.fma(x, y, -xy);
  65.      *  double low2 = productLow(x, y, xy);
  66.      * </pre>
  67.      *
  68.      * <p>Special cases:
  69.      *
  70.      * <ul>
  71.      *  <li>If {@code x * y} is sub-normal or zero then the result is 0.0.
  72.      *  <li>If {@code x * y} is infinite or NaN then the result is NaN.
  73.      * </ul>
  74.      *
  75.      * <p>This method delegates to {@link DD#twoProductLow(double, double, double)} but uses
  76.      * scaling to avoid intermediate overflow.
  77.      *
  78.      * @param x First factor.
  79.      * @param y Second factor.
  80.      * @param xy Product of the factors (x * y).
  81.      * @return the low part of the product double length number
  82.      * @see DD#twoProductLow(double, double, double)
  83.      */
  84.     static double productLow(double x, double y, double xy) {
  85.         // Verify the input. This must be NaN safe.
  86.         //assert Double.compare(x * y, xy) == 0

  87.         // If the number is sub-normal, inf or nan there is no round-off.
  88.         if (DD.isNotNormal(xy)) {
  89.             // Returns 0.0 for sub-normal xy, otherwise NaN for inf/nan:
  90.             return xy - xy;
  91.         }

  92.         // The result xy is finite and normal.
  93.         // Use Dekker's mul12 algorithm that splits the values into high and low parts.
  94.         // Dekker's split using multiplication will overflow if the value is within 2^27
  95.         // of double max value. It can also produce 26-bit approximations that are larger
  96.         // than the input numbers for the high part causing overflow in hx * hy when
  97.         // x * y does not overflow. So we must scale down big numbers.
  98.         // We only have to scale the largest number as we know the product does not overflow
  99.         // (if one is too big then the other cannot be).
  100.         // We also scale if the product is close to overflow to avoid intermediate overflow.
  101.         // This could be done at a higher limit (e.g. Math.abs(xy) > Double.MAX_VALUE / 4)
  102.         // but is included here to have a single low probability branch condition.

  103.         // Add the absolute inputs for a single comparison. The sum will not be more than
  104.         // 3-fold higher than any component.
  105.         final double a = Math.abs(x);
  106.         final double b = Math.abs(y);
  107.         final double ab = Math.abs(xy);
  108.         if (a + b + ab >= SAFE_UPPER) {
  109.             // Only required to scale the largest number as x*y does not overflow.
  110.             if (a > b) {
  111.                 return DD.twoProductLow(x * DOWN_SCALE, y, xy * DOWN_SCALE) * UP_SCALE;
  112.             }
  113.             return DD.twoProductLow(x, y * DOWN_SCALE, xy * DOWN_SCALE) * UP_SCALE;
  114.         }

  115.         // The result is computed using a product of the low parts.
  116.         // To avoid underflow in the low parts we note that these are approximately a factor
  117.         // of 2^27 smaller than the original inputs so their product will be ~2^54 smaller
  118.         // than the product xy. Ensure the product is at least 2^54 above a sub-normal.
  119.         if (ab <= SAFE_LOWER) {
  120.             // Scaling up here is safe: the largest magnitude cannot be above SAFE_LOWER / MIN_VALUE.
  121.             return DD.twoProductLow(x * UP_SCALE, y * UP_SCALE, xy * UP_SCALE2) * DOWN_SCALE2;
  122.         }

  123.         // No scaling required
  124.         return DD.twoProductLow(x, y, xy);
  125.     }
  126. }