DDMath.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.apache.commons.numbers.core;

  18. /**
  19.  * Computes extended precision floating-point operations.
  20.  *
  21.  * <p>This class supplements the arithmetic operations in the {@link DD} class providing
  22.  * greater accuracy at the cost of performance.
  23.  *
  24.  * @since 1.2
  25.  */
  26. public final class DDMath {
  27.     /** 0.5. */
  28.     private static final double HALF = 0.5;
  29.     /** The limit for safe multiplication of {@code x*y}, assuming values above 1.
  30.      * Used to maintain positive values during the power computation. */
  31.     private static final double SAFE_MULTIPLY = 0x1.0p500;

  32.     /**
  33.      * Mutable double-double number used for working.
  34.      * This structure is used for the output argument during triple-double computations.
  35.      */
  36.     private static final class MDD {
  37.         /** The high part of the double-double number. */
  38.         private double x;
  39.         /** The low part of the double-double number. */
  40.         private double xx;

  41.         /** Package-private constructor. */
  42.         MDD() {}
  43.     }

  44.     /** No instances. */
  45.     private DDMath() {}

  46.     /**
  47.      * Compute the number {@code x} raised to the power {@code n}.
  48.      *
  49.      * <p>The value is returned as fractional {@code f} and integral
  50.      * {@code 2^exp} components.
  51.      * <pre>
  52.      * (x+xx)^n = (f+ff) * 2^exp
  53.      * </pre>
  54.      *
  55.      * <p>The combined fractional part (f, ff) is in the range {@code [0.5, 1)}.
  56.      *
  57.      * <p>Special cases:
  58.      * <ul>
  59.      *  <li>If {@code (x, xx)} is zero the high part of the fractional part is
  60.      *      computed using {@link Math#pow(double, double) Math.pow(x, n)} and the exponent is 0.
  61.      *  <li>If {@code n = 0} the fractional part is 0.5 and the exponent is 1.
  62.      *  <li>If {@code (x, xx)} is an exact power of 2 the fractional part is 0.5 and the exponent
  63.      *      is the power of 2 minus 1.
  64.      *  <li>If the result high-part is an exact power of 2 and the low-part has an opposite
  65.      *      signed non-zero magnitude then the fraction high-part {@code f} will be {@code +/-1} such that
  66.      *      the double-double number is in the range {@code [0.5, 1)}.
  67.      *  <li>If the argument is not finite then a fractional representation is not possible.
  68.      *      In this case the fraction and the scale factor is undefined.
  69.      * </ul>
  70.      *
  71.      * <p>The computed result is within 1 eps of the exact result where eps is 2<sup>-106</sup>.
  72.      *
  73.      * <p>The performance is approximately 4-fold slower than {@link DD#pow(int, long[])}.
  74.      *
  75.      * @param x Number.
  76.      * @param n Power.
  77.      * @param exp Result power of two scale factor (integral exponent).
  78.      * @return Fraction part.
  79.      * @see DD#frexp(int[])
  80.      */
  81.     public static DD pow(DD x, int n, long[] exp) {
  82.         // Edge cases.
  83.         if (n == 0) {
  84.             exp[0] = 1;
  85.             return DD.of(0.5);
  86.         }
  87.         // IEEE result for non-finite or zero
  88.         if (!Double.isFinite(x.hi()) || x.hi() == 0) {
  89.             exp[0] = 0;
  90.             return DD.of(Math.pow(x.hi(), n));
  91.         }
  92.         // Here the number is non-zero finite
  93.         final int[] ie = {0};
  94.         final DD f = x.frexp(ie);
  95.         final long b = ie[0];
  96.         // Handle exact powers of 2
  97.         if (Math.abs(f.hi()) == HALF && f.lo() == 0) {
  98.             // (f * 2^b)^n = (2f)^n * 2^(b-1)^n
  99.             // Use Math.pow to create the sign.
  100.             // Note the result must be scaled to the fractional representation
  101.             // by multiplication by 0.5 and addition of 1 to the exponent.
  102.             final double y0 = 0.5 * Math.pow(2 * f.hi(), n);
  103.             // Propagate sign change (y0*f.x) to the original zero (this.xx)
  104.             final double y1 = Math.copySign(0.0, y0 * f.hi() * x.lo());
  105.             exp[0] = 1 + (b - 1) * n;
  106.             return DD.of(y0, y1);
  107.         }
  108.         return computePowScaled(b, f.hi(), f.lo(), n, exp);
  109.     }

  110.     /**
  111.      * Compute the number {@code x} (non-zero finite) raised to the power {@code n}.
  112.      *
  113.      * <p>Performs the computation using triple-double precision. If the input power is
  114.      * negative the result is computed using the absolute value of {@code n} and then
  115.      * inverted by dividing into 1.
  116.      *
  117.      * @param b Integral component 2^b of x.
  118.      * @param x Fractional high part of x.
  119.      * @param xx Fractional low part of x.
  120.      * @param n Power (in [2, 2^31]).
  121.      * @param exp Result power of two scale factor (integral exponent).
  122.      * @return Fraction part.
  123.      */
  124.     private static DD computePowScaled(long b, double x, double xx, int n, long[] exp) {
  125.         // Same as DD.computePowScaled using a triple-double intermediate.

  126.         // triple-double multiplication:
  127.         // (a0, a1, a2) * (b0, b1, b2)
  128.         // a x b ~ a0b0                 O(1) term
  129.         //       + a0b1 + a1b0          O(eps) terms
  130.         //       + a0b2 + a1b1 + a2b0   O(eps^2) terms
  131.         //       + a1b2 + a2b1          O(eps^3) terms
  132.         //       + a2b2                 O(eps^4) term  (not required for the first 159 bits)
  133.         // Higher terms require two-prod if the round-off is <= O(eps^3).
  134.         // (pij,qij) = two-prod(ai, bj); pij = O(eps^i+j); qij = O(eps^i+j+1)
  135.         // p00                      O(1)
  136.         // p01, p10, q00            O(eps)
  137.         // p02, p11, p20, q01, q10  O(eps^2)
  138.         // p12, p21, q02, q11, q20  O(eps^3)
  139.         // Sum terms of the same order. Carry round-off to lower order:
  140.         // s0 = p00                                        Order(1)
  141.         // Sum (p01, p10, q00) -> (s1, r2, r3a)            Order(eps)
  142.         // Sum (p02, p11, p20, q01, q10, r2) -> (s2, r3b)  Order(eps^2)
  143.         // Sum (p12, p21, q02, q11, q20, r3a, r3b) -> s3   Order(eps^3)
  144.         //
  145.         // Simplifies for (b0, b1):
  146.         // Sum (p01, p10, q00) -> (s1, r2, r3a)            Order(eps)
  147.         // Sum (p11, p20, q01, q10, r2) -> (s2, r3b)       Order(eps^2)
  148.         // Sum (p21, q11, q20, r3a, r3b) -> s3             Order(eps^3)
  149.         //
  150.         // Simplifies for the square:
  151.         // Sum (2 * p01, q00) -> (s1, r2)                  Order(eps)
  152.         // Sum (2 * p02, 2 * q01, p11, r2) -> (s2, r3b)    Order(eps^2)
  153.         // Sum (2 * p12, 2 * q02, q11, r3b) -> s3          Order(eps^3)

  154.         // Scale the input in [0.5, 1) to be above 1. Represented as 2^be * b.
  155.         final long be = b - 1;
  156.         final double b0 = x * 2;
  157.         final double b1 = xx * 2;
  158.         // Split b
  159.         final double b0h = DD.highPart(b0);
  160.         final double b0l = b0 - b0h;
  161.         final double b1h = DD.highPart(b1);
  162.         final double b1l = b1 - b1h;

  163.         // Initialise the result as x^1. Represented as 2^fe * f.
  164.         long fe = be;
  165.         double f0 = b0;
  166.         double f1 = b1;
  167.         double f2 = 0;

  168.         // Shift the highest set bit off the top.
  169.         // Any remaining bits are detected in the sign bit.
  170.         final int an = Math.abs(n);
  171.         final int shift = Integer.numberOfLeadingZeros(an) + 1;
  172.         int bits = an << shift;
  173.         DD t;
  174.         final MDD m = new MDD();

  175.         // Multiplication is done inline with some triple precision helper routines.
  176.         // Process remaining bits below highest set bit.
  177.         for (int i = 32 - shift; i != 0; i--, bits <<= 1) {
  178.             // Square the result
  179.             fe <<= 1;
  180.             double a0h = DD.highPart(f0);
  181.             double a0l = f0 - a0h;
  182.             double a1h = DD.highPart(f1);
  183.             double a1l = f1 - a1h;
  184.             double a2h = DD.highPart(f2);
  185.             double a2l = f2 - a2h;
  186.             double p00 = f0 * f0;
  187.             double q00 = DD.twoSquareLow(a0h, a0l, p00);
  188.             double p01 = f0 * f1;
  189.             double q01 = DD.twoProductLow(a0h, a0l, a1h, a1l, p01);
  190.             final double p02 = f0 * f2;
  191.             final double q02 = DD.twoProductLow(a0h, a0l, a2h, a2l, p02);
  192.             double p11 = f1 * f1;
  193.             double q11 = DD.twoSquareLow(a1h, a1l, p11);
  194.             final double p12 = f1 * f2;
  195.             double s0 = p00;
  196.             // Sum (2 * p01, q00) -> (s1, r2)                  Order(eps)
  197.             double s1 = 2 * p01 + q00;
  198.             double r2 = DD.twoSumLow(2 * p01, q00, s1);
  199.             // Sum (2 * p02, 2 * q01, p11, r2) -> (s2, r3b)    Order(eps^2)
  200.             double s2 = p02 + q01;
  201.             double r3b = DD.twoSumLow(p02, q01, s2);
  202.             double u = p11 + r2;
  203.             double v = DD.twoSumLow(p11, r2, u);
  204.             t = DD.add(2 * s2, 2 * r3b, u, v);
  205.             s2 = t.hi();
  206.             r3b = t.lo();
  207.             // Sum (2 * p12, 2 * q02, q11, r3b) -> s3          Order(eps^3)
  208.             double s3 = 2 * (p12 + q02) + q11 + r3b;
  209.             f0 = norm3(s0, s1, s2, s3, m);
  210.             f1 = m.x;
  211.             f2 = m.xx;

  212.             // Rescale
  213.             if (Math.abs(f0) > SAFE_MULTIPLY) {
  214.                 // Scale back to the [1, 2) range. As safe multiply is 2^500
  215.                 // the exponent should be < 1001 so the twoPow scaling factor is supported.
  216.                 final int e = Math.getExponent(f0);
  217.                 final double s = DD.twoPow(-e);
  218.                 fe += e;
  219.                 f0 *= s;
  220.                 f1 *= s;
  221.                 f2 *= s;
  222.             }

  223.             if (bits < 0) {
  224.                 // Multiply by b
  225.                 fe += be;
  226.                 a0h = DD.highPart(f0);
  227.                 a0l = f0 - a0h;
  228.                 a1h = DD.highPart(f1);
  229.                 a1l = f1 - a1h;
  230.                 a2h = DD.highPart(f2);
  231.                 a2l = f2 - a2h;
  232.                 p00 = f0 * b0;
  233.                 q00 = DD.twoProductLow(a0h, a0l, b0h, b0l, p00);
  234.                 p01 = f0 * b1;
  235.                 q01 = DD.twoProductLow(a0h, a0l, b1h, b1l, p01);
  236.                 final double p10 = f1 * b0;
  237.                 final double q10 = DD.twoProductLow(a1h, a1l, b0h, b0l, p10);
  238.                 p11 = f1 * b1;
  239.                 q11 = DD.twoProductLow(a1h, a1l, b1h, b1l, p11);
  240.                 final double p20 = f2 * b0;
  241.                 final double q20 = DD.twoProductLow(a2h, a2l, b0h, b0l, p20);
  242.                 final double p21 = f2 * b1;
  243.                 s0 = p00;
  244.                 // Sum (p01, p10, q00) -> (s1, r2, r3a)            Order(eps)
  245.                 u = p01 + p10;
  246.                 v = DD.twoSumLow(p01, p10, u);
  247.                 s1 = q00 + u;
  248.                 final double w = DD.twoSumLow(q00, u, s1);
  249.                 r2 = v + w;
  250.                 final double r3a = DD.twoSumLow(v, w, r2);
  251.                 // Sum (p11, p20, q01, q10, r2) -> (s2, r3b)       Order(eps^2)
  252.                 s2 = p11 + p20;
  253.                 r3b = DD.twoSumLow(p11, p20, s2);
  254.                 u = q01 + q10;
  255.                 v = DD.twoSumLow(q01, q10, u);
  256.                 t = DD.add(s2, r3b, u, v);
  257.                 s2 = t.hi() + r2;
  258.                 r3b = DD.twoSumLow(t.hi(), r2, s2);
  259.                 // Sum (p21, q11, q20, r3a, r3b) -> s3             Order(eps^3)
  260.                 s3 = p21 + q11 + q20 + r3a + r3b;
  261.                 f0 = norm3(s0, s1, s2, s3, m);
  262.                 f1 = m.x;
  263.                 f2 = m.xx;
  264.                 // Avoid rescale as x2 is in [1, 2)
  265.             }
  266.         }

  267.         // Ensure (f0, f1) are 1 ulp exact
  268.         final double u = f1 + f2;
  269.         t = DD.fastTwoSum(f0, u);
  270.         final int[] e = {0};

  271.         // If the power is negative, invert in triple precision
  272.         if (n < 0) {
  273.             // Require the round-off
  274.             final double v = DD.fastTwoSumLow(f1, f2, u);
  275.             // Result is in approximately [1, 2^501] so inversion is safe.
  276.             t = inverse3(t.hi(), t.lo(), v);
  277.             // Rescale to [0.5, 1.0]
  278.             t = t.frexp(e);
  279.             exp[0] = e[0] - fe;
  280.             return t;
  281.         }

  282.         t = t.frexp(e);
  283.         exp[0] = fe + e[0];
  284.         return t;
  285.     }

  286.     /**
  287.      * Normalize (s0, s1, s2, s3) to (s0, s1, s2).
  288.      *
  289.      * @param s0 High part of s.
  290.      * @param s1 Second part of s.
  291.      * @param s2 Third part of s.
  292.      * @param s3 Fourth part of s.
  293.      * @param s12 Output parts (s1, s2)
  294.      * @return s0
  295.      */
  296.     private static double norm3(double s0, double s1, double s2, double s3, MDD s12) {
  297.         double q;
  298.         // Compress (Schewchuk Fig. 15) (s0, s1, s2, s3) -> (g0, g1, g2, g3)
  299.         final double g0 = s0 + s1;
  300.         q = DD.fastTwoSumLow(s0, s1, g0);
  301.         final double g1 = q + s2;
  302.         q = DD.fastTwoSumLow(q, s2, g1);
  303.         final double g2 = q + s3;
  304.         final double g3 = DD.fastTwoSumLow(q, s3, g2);
  305.         // (g0, g1, g2, g3) -> (h0, h1, h2, h3), returned as (h0, h1, h2 + h3)
  306.         q = g1 + g2;
  307.         s12.xx = DD.fastTwoSumLow(g1, g2, q) + g3;
  308.         final double h0 = g0 + q;
  309.         s12.x = DD.fastTwoSumLow(g0, q, h0);
  310.         return h0;
  311.     }

  312.     /**
  313.      * Compute the inverse of {@code (y, yy, yyy)}.
  314.      * If {@code y = 0} the result is undefined.
  315.      *
  316.      * <p>This is special routine used in {@link #pow(int, long[])}
  317.      * to invert the triple precision result.
  318.      *
  319.      * @param y First part of y.
  320.      * @param yy Second part of y.
  321.      * @param yyy Third part of y.
  322.      * @return the inverse
  323.      */
  324.     private static DD inverse3(double y, double yy, double yyy) {
  325.         // Long division (1, 0, 0) / (y, yy, yyy)
  326.         double r;
  327.         double rr;
  328.         double rrr;
  329.         double t;
  330.         // quotient q0 = x / y
  331.         final double q0 = 1 / y;
  332.         // remainder r0 = x - q0 * y
  333.         final MDD q = new MDD();
  334.         t = multiply3(y, yy, yyy, q0, q);
  335.         r = add3(-t, -q.x, -q.xx, 1, q);
  336.         rr = q.x;
  337.         rrr = q.xx;
  338.         // next quotient q1 = r0 / y
  339.         final double q1 = r / y;
  340.         // remainder r1 = r0 - q1 * y
  341.         t = multiply3(y, yy, yyy, q1, q);
  342.         r = add3(-t, -q.x, -q.xx, r, rr, rrr, q);
  343.         rr = q.x;
  344.         rrr = q.xx;
  345.         // next quotient q2 = r1 / y
  346.         final double q2 = r / y;
  347.         // remainder r2 = r1 - q2 * y
  348.         t = multiply3(y, yy, yyy, q2, q);
  349.         r = add3(-t, -q.x, -q.xx, r, rr, rrr, q);
  350.         // next quotient q3 = r2 / y
  351.         final double q3 = r / y;
  352.         // Collect (q0, q1, q2, q3) to (s0, s1, s2)
  353.         t = norm3(q0, q1, q2, q3, q);
  354.         // Reduce to (s0, s1)
  355.         return DD.fastTwoSum(t, q.x + q.xx);
  356.     }

  357.     /**
  358.      * Compute the multiplication product of {@code (a0,a1,a2)} and {@code b}.
  359.      *
  360.      * @param a0 High part of a.
  361.      * @param a1 Second part of a.
  362.      * @param a2 Third part of a.
  363.      * @param b Factor.
  364.      * @param s12 Output parts (s1, s2)
  365.      * @return s0
  366.      */
  367.     private static double multiply3(double a0, double a1, double a2, double b, MDD s12) {
  368.         // Triple-Double x Double
  369.         // a x b ~ a0b                 O(1) term
  370.         //       + a1b                 O(eps) terms
  371.         //       + a2b                 O(eps^2) terms
  372.         // Higher terms require two-prod if the round-off is <= O(eps^2).
  373.         // (pij,qij) = two-prod(ai, bj); pij = O(eps^i+j); qij = O(eps^i+j+1)
  374.         // p00           O(1)
  375.         // p10, q00      O(eps)
  376.         // p20, q10      O(eps^2)
  377.         // |a2| < |eps^2 a0| => |a2 * b| < eps^2 |a0 * b| and q20 < eps^3 |a0 * b|
  378.         //
  379.         // Sum terms of the same order. Carry round-off to lower order:
  380.         // s0 = p00                              Order(1)
  381.         // Sum (p10, q00) -> (s1, r1)            Order(eps)
  382.         // Sum (p20, q10, r1) -> (s2, s3)        Order(eps^2)
  383.         final double a0h = DD.highPart(a0);
  384.         final double a0l = a0 - a0h;
  385.         final double a1h = DD.highPart(a1);
  386.         final double a1l = a1 - a1h;
  387.         final double b0h = DD.highPart(b);
  388.         final double b0l = b - b0h;
  389.         final double p00 = a0 * b;
  390.         final double q00 = DD.twoProductLow(a0h, a0l, b0h, b0l, p00);
  391.         final double p10 = a1 * b;
  392.         final double q10 = DD.twoProductLow(a1h, a1l, b0h, b0l, p10);
  393.         final double p20 = a2 * b;
  394.         // Sum (p10, q00) -> (s1, r1)            Order(eps)
  395.         final double s1 = p10 + q00;
  396.         final double r1 = DD.twoSumLow(p10, q00, s1);
  397.         // Sum (p20, q10, r1) -> (s2, s3)        Order(eps^2)
  398.         double u = p20 + q10;
  399.         final double v = DD.twoSumLow(p20, q10, u);
  400.         final double s2 = u + r1;
  401.         u = DD.twoSumLow(u, r1, s2);
  402.         return norm3(p00, s1, s2, v + u, s12);
  403.     }

  404.     /**
  405.      * Compute the sum of {@code (a0,a1,a2)} and {@code b}.
  406.      *
  407.      * @param a0 High part of a.
  408.      * @param a1 Second part of a.
  409.      * @param a2 Third part of a.
  410.      * @param b Addend.
  411.      * @param s12 Output parts (s1, s2)
  412.      * @return s0
  413.      */
  414.     private static double add3(double a0, double a1, double a2, double b, MDD s12) {
  415.         // Hide et al (2008) Fig.5: Quad-Double + Double without final a3.
  416.         double u;
  417.         double v;
  418.         final double s0 = a0 + b;
  419.         u = DD.twoSumLow(a0, b, s0);
  420.         final double s1 = a1 + u;
  421.         v = DD.twoSumLow(a1, u, s1);
  422.         final double s2 = a2 + v;
  423.         u = DD.twoSumLow(a2, v, s2);
  424.         return norm3(s0, s1, s2, u, s12);
  425.     }

  426.     /**
  427.      * Compute the sum of {@code (a0,a1,a2)} and {@code (b0,b1,b2))}.
  428.      * It is assumed the absolute magnitudes of a and b are equal and the sign
  429.      * of a and b are opposite.
  430.      *
  431.      * @param a0 High part of a.
  432.      * @param a1 Second part of a.
  433.      * @param a2 Third part of a.
  434.      * @param b0 High part of b.
  435.      * @param b1 Second part of b.
  436.      * @param b2 Third part of b.
  437.      * @param s12 Output parts (s1, s2)
  438.      * @return s0
  439.      */
  440.     private static double add3(double a0, double a1, double a2, double b0, double b1, double b2, MDD s12) {
  441.         // Hide et al (2008) Fig.6: Quad-Double + Quad-Double without final a3, b3.
  442.         double u;
  443.         double v;
  444.         // a0 + b0 -> (s0, r1)
  445.         final double s0 = a0 + b0;
  446.         final double r1 = DD.twoSumLow(a0, b0, s0);
  447.         // a1 + b1 + r1 -> (s1, r2, r3)
  448.         u = a1 + b1;
  449.         v = DD.twoSumLow(a1, b1, u);
  450.         final double s1 = r1 + u;
  451.         u = DD.twoSumLow(r1, u, s1);
  452.         final double r2 = v + u;
  453.         final double r3 = DD.twoSumLow(v, u, r2);
  454.         // (a2 + b2 + r2) + r3 -> (s2, s3)
  455.         u = a2 + b2;
  456.         v = DD.twoSumLow(a2, b2, u);
  457.         final double s2 = r2 + u;
  458.         u = DD.twoSumLow(r2, u, s2);
  459.         final double s3 = v + u + r3;
  460.         return norm3(s0, s1, s2, s3, s12);
  461.     }
  462. }