1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 // (C) Copyright John Maddock 2006.
19 // Use, modification and distribution are subject to the
20 // Boost Software License, Version 1.0. (See accompanying file
21 // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
22
23 package org.apache.commons.numbers.gamma;
24
25 import org.apache.commons.numbers.core.DD;
26
27 /**
28 * Implementation of the <a href="http://mathworld.wolfram.com/Erf.html">error function</a> and
29 * its inverse.
30 *
31 * <p>This code has been adapted from the <a href="https://www.boost.org/">Boost</a>
32 * {@code c++} implementation {@code <boost/math/special_functions/erf.hpp>}.
33 * The erf/erfc functions and their inverses are copyright John Maddock 2006 and subject to
34 * the Boost Software License.
35 *
36 * <p>Additions made to support the erfcx function are original work under the Apache software
37 * license.
38 *
39 * @see
40 * <a href="https://www.boost.org/doc/libs/1_77_0/libs/math/doc/html/math_toolkit/sf_erf/error_function.html">
41 * Boost C++ Error functions</a>
42 */
43 final class BoostErf {
44 /** 1 / sqrt(pi). Used for the scaled complementary error function erfcx. */
45 private static final double ONE_OVER_ROOT_PI = 0.5641895835477562869480794515607725858;
46 /** Threshold for the scaled complementary error function erfcx
47 * where the approximation {@code (1 / sqrt(pi)) / x} can be used. */
48 private static final double ERFCX_APPROX = 6.71e7;
49 /** Threshold for the erf implementation for |x| where the computation
50 * uses {@code erf(x)}; otherwise {@code erfc(x)} is computed. The final result is
51 * achieved by suitable application of symmetry. */
52 private static final double COMPUTE_ERF = 0.5;
53 /** Threshold for the scaled complementary error function erfcx for negative x
54 * where {@code 2 * exp(x*x)} will overflow. Value is 26.62873571375149. */
55 private static final double ERFCX_NEG_X_MAX = Math.sqrt(Math.log(Double.MAX_VALUE / 2));
56 /** Threshold for the scaled complementary error function erfcx for x
57 * where {@code exp(x*x) == 1; x <= t}. Value is (1 + 5/16) * 2^-27 = 9.778887033462524E-9.
58 * <p>Note: This is used for performance. If set to 0 then the result is computed
59 * using expm1(x*x) with the same final result. */
60 private static final double EXP_XX_1 = 0x1.5p-27;
61
62 /** Private constructor. */
63 private BoostErf() {
64 // intentionally empty.
65 }
66
67 // Code ported from Boost 1.77.0
68 //
69 // boost/math/special_functions/erf.hpp
70 // boost/math/special_functions/detail/erf_inv.hpp
71 //
72 // Original code comments, including measured deviations, are preserved.
73 //
74 // Changes to the Boost implementation:
75 // - Update method names to replace underscores with camel case
76 // - Explicitly inline the polynomial function evaluation
77 // using Horner's method (https://en.wikipedia.org/wiki/Horner%27s_method)
78 // - Support odd function for f(0.0) = -f(-0.0)
79 // - Support the scaled complementary error function erfcx
80 // Erf:
81 // - Change extended precision z*z to compute the square round-off
82 // using Dekker's method
83 // - Change extended precision exp(-z*z) to compute using a
84 // round-off addition to the standard exp result (see NUMBERS-177)
85 // - Change the erf threshold for z when erf(z)=1 from
86 // z > 5.8f to z > 5.930664
87 // - Change the erfc threshold for z when erfc(z)=0 from
88 // z < 28 to z < 27.3
89 // - Change rational function approximation for z > 4 to a function
90 // suitable for erfcx (see NUMBERS-177)
91 // Inverse erf:
92 // - Change inverse erf edge case detection to include NaN
93 // - Change edge case detection for integer z
94 //
95 // Note:
96 // Constants using the 'f' suffix are machine
97 // representable as a float, e.g.
98 // assert 0.0891314744949340820313f == 0.0891314744949340820313;
99 // The values are unchanged from the Boost reference.
100
101 /**
102 * Returns the complementary error function.
103 *
104 * @param x the value.
105 * @return the complementary error function.
106 */
107 static double erfc(double x) {
108 return erfImp(x, true, false);
109 }
110
111 /**
112 * Returns the error function.
113 *
114 * @param x the value.
115 * @return the error function.
116 */
117 static double erf(double x) {
118 return erfImp(x, false, false);
119 }
120
121 /**
122 * 53-bit implementation for the error function.
123 *
124 * <p>Note: The {@code scaled} flag only applies when
125 * {@code z >= 0.5} and {@code invert == true}.
126 * This functionality is used to compute erfcx(z) for positive z.
127 *
128 * @param z Point to evaluate
129 * @param invert true to invert the result (for the complementary error function)
130 * @param scaled true to compute the scaled complementary error function
131 * @return the error function result
132 */
133 private static double erfImp(double z, boolean invert, boolean scaled) {
134 if (Double.isNaN(z)) {
135 return Double.NaN;
136 }
137
138 if (z < 0) {
139 // Here the scaled flag is ignored.
140 if (!invert) {
141 return -erfImp(-z, invert, false);
142 } else if (z < -0.5) {
143 return 2 - erfImp(-z, invert, false);
144 } else {
145 return 1 + erfImp(-z, false, false);
146 }
147 }
148
149 double result;
150
151 //
152 // Big bunch of selection statements now to pick
153 // which implementation to use,
154 // try to put most likely options first:
155 //
156 if (z < COMPUTE_ERF) {
157 //
158 // We're going to calculate erf:
159 //
160 // Here the scaled flag is ignored.
161 if (z < 1e-10) {
162 if (z == 0) {
163 result = z;
164 } else {
165 final double c = 0.003379167095512573896158903121545171688;
166 result = z * 1.125f + z * c;
167 }
168 } else {
169 // Maximum Deviation Found: 1.561e-17
170 // Expected Error Term: 1.561e-17
171 // Maximum Relative Change in Control Points: 1.155e-04
172 // Max Error found at double precision = 2.961182e-17
173
174 final double Y = 1.044948577880859375f;
175 final double zz = z * z;
176 double P;
177 P = -0.000322780120964605683831;
178 P = -0.00772758345802133288487 + P * zz;
179 P = -0.0509990735146777432841 + P * zz;
180 P = -0.338165134459360935041 + P * zz;
181 P = 0.0834305892146531832907 + P * zz;
182 double Q;
183 Q = 0.000370900071787748000569;
184 Q = 0.00858571925074406212772 + Q * zz;
185 Q = 0.0875222600142252549554 + Q * zz;
186 Q = 0.455004033050794024546 + Q * zz;
187 Q = 1.0 + Q * zz;
188 result = z * (Y + P / Q);
189 }
190 // Note: Boost threshold of 5.8f has been raised to approximately 5.93 (6073 / 1024);
191 // threshold of 28 has been lowered to approximately 27.3 (6989/256) where exp(-z*z) = 0.
192 } else if (scaled || (invert ? (z < 27.300781f) : (z < 5.9306640625f))) {
193 //
194 // We'll be calculating erfc:
195 //
196 // Here the scaled flag is used.
197 invert = !invert;
198 if (z < 1.5f) {
199 // Maximum Deviation Found: 3.702e-17
200 // Expected Error Term: 3.702e-17
201 // Maximum Relative Change in Control Points: 2.845e-04
202 // Max Error found at double precision = 4.841816e-17
203 final double Y = 0.405935764312744140625f;
204 final double zm = z - 0.5;
205 double P;
206 P = 0.00180424538297014223957;
207 P = 0.0195049001251218801359 + P * zm;
208 P = 0.0888900368967884466578 + P * zm;
209 P = 0.191003695796775433986 + P * zm;
210 P = 0.178114665841120341155 + P * zm;
211 P = -0.098090592216281240205 + P * zm;
212 double Q;
213 Q = 0.337511472483094676155e-5;
214 Q = 0.0113385233577001411017 + Q * zm;
215 Q = 0.12385097467900864233 + Q * zm;
216 Q = 0.578052804889902404909 + Q * zm;
217 Q = 1.42628004845511324508 + Q * zm;
218 Q = 1.84759070983002217845 + Q * zm;
219 Q = 1.0 + Q * zm;
220 result = Y + P / Q;
221 if (scaled) {
222 result /= z;
223 } else {
224 result *= expmxx(z) / z;
225 }
226 } else if (z < 2.5f) {
227 // Max Error found at double precision = 6.599585e-18
228 // Maximum Deviation Found: 3.909e-18
229 // Expected Error Term: 3.909e-18
230 // Maximum Relative Change in Control Points: 9.886e-05
231 final double Y = 0.50672817230224609375f;
232 final double zm = z - 1.5;
233 double P;
234 P = 0.000235839115596880717416;
235 P = 0.00323962406290842133584 + P * zm;
236 P = 0.0175679436311802092299 + P * zm;
237 P = 0.04394818964209516296 + P * zm;
238 P = 0.0386540375035707201728 + P * zm;
239 P = -0.0243500476207698441272 + P * zm;
240 double Q;
241 Q = 0.00410369723978904575884;
242 Q = 0.0563921837420478160373 + Q * zm;
243 Q = 0.325732924782444448493 + Q * zm;
244 Q = 0.982403709157920235114 + Q * zm;
245 Q = 1.53991494948552447182 + Q * zm;
246 Q = 1.0 + Q * zm;
247 result = Y + P / Q;
248 if (scaled) {
249 result /= z;
250 } else {
251 result *= expmxx(z) / z;
252 }
253 // Lowered Boost threshold from 4.5 to 4.0 as this is the limit
254 // for the Cody erfc approximation
255 } else if (z < 4.0f) {
256 // Maximum Deviation Found: 1.512e-17
257 // Expected Error Term: 1.512e-17
258 // Maximum Relative Change in Control Points: 2.222e-04
259 // Max Error found at double precision = 2.062515e-17
260 final double Y = 0.5405750274658203125f;
261 final double zm = z - 3.5;
262 double P;
263 P = 0.113212406648847561139e-4;
264 P = 0.000250269961544794627958 + P * zm;
265 P = 0.00212825620914618649141 + P * zm;
266 P = 0.00840807615555585383007 + P * zm;
267 P = 0.0137384425896355332126 + P * zm;
268 P = 0.00295276716530971662634 + P * zm;
269 double Q;
270 Q = 0.000479411269521714493907;
271 Q = 0.0105982906484876531489 + Q * zm;
272 Q = 0.0958492726301061423444 + Q * zm;
273 Q = 0.442597659481563127003 + Q * zm;
274 Q = 1.04217814166938418171 + Q * zm;
275 Q = 1.0 + Q * zm;
276 result = Y + P / Q;
277 if (scaled) {
278 result /= z;
279 } else {
280 result *= expmxx(z) / z;
281 }
282 } else {
283 // Rational function approximation for erfc(x > 4.0)
284 //
285 // This approximation is not the Boost implementation.
286 // The Boost function is suitable for [4.5 < z < 28].
287 //
288 // This function is suitable for erfcx(z) as it asymptotes
289 // to (1 / sqrt(pi)) / z at large z.
290 //
291 // Taken from "Rational Chebyshev approximations for the error function"
292 // by W. J. Cody, Math. Comp., 1969, PP. 631-638.
293 //
294 // See NUMBERS-177.
295
296 final double izz = 1 / (z * z);
297 double p;
298 p = 1.63153871373020978498e-2;
299 p = 3.05326634961232344035e-1 + p * izz;
300 p = 3.60344899949804439429e-1 + p * izz;
301 p = 1.25781726111229246204e-1 + p * izz;
302 p = 1.60837851487422766278e-2 + p * izz;
303 p = 6.58749161529837803157e-4 + p * izz;
304 double q;
305 q = 1;
306 q = 2.56852019228982242072e00 + q * izz;
307 q = 1.87295284992346047209e00 + q * izz;
308 q = 5.27905102951428412248e-1 + q * izz;
309 q = 6.05183413124413191178e-2 + q * izz;
310 q = 2.33520497626869185443e-3 + q * izz;
311
312 result = izz * p / q;
313 result = (ONE_OVER_ROOT_PI - result) / z;
314
315 if (!scaled) {
316 // exp(-z*z) can be sub-normal so
317 // multiply by any sub-normal after divide by z
318 result *= expmxx(z);
319 }
320 }
321 } else {
322 //
323 // Any value of z larger than 27.3 will underflow to zero:
324 //
325 result = 0;
326 invert = !invert;
327 }
328
329 if (invert) {
330 // Note: If 0.5 <= z < 28 and the scaled flag is true then
331 // invert will have been flipped to false and the
332 // the result is unchanged as erfcx(z)
333 result = 1 - result;
334 }
335
336 return result;
337 }
338
339 /**
340 * Returns the scaled complementary error function.
341 * <pre>
342 * erfcx(x) = exp(x^2) * erfc(x)
343 * </pre>
344 *
345 * @param x the value.
346 * @return the scaled complementary error function.
347 */
348 static double erfcx(double x) {
349 if (Double.isNaN(x)) {
350 return Double.NaN;
351 }
352
353 // For |z| < 0.5 erfc is computed using erf
354 final double ax = Math.abs(x);
355 if (ax < COMPUTE_ERF) {
356 // Use the erf(x) result.
357 // (1 - erf(x)) * exp(x*x)
358
359 final double erfx = erf(x);
360 if (ax < EXP_XX_1) {
361 // No exponential required
362 return 1 - erfx;
363 }
364
365 // exp(x*x) - exp(x*x) * erf(x)
366 // Avoid use of exp(x*x) with expm1:
367 // exp(x*x) - 1 - (erf(x) * (exp(x*x) - 1)) - erf(x) + 1
368
369 // Sum small to large: |erf(x)| > expm1(x*x)
370 // -erf(x) * expm1(x*x) + expm1(x*x) - erf(x) + 1
371 // Negative x: erf(x) < 0, summed terms are positive, no cancellation occurs.
372 // Positive x: erf(x) > 0 so cancellation can occur.
373 // When terms are ordered by absolute magnitude the magnitude of the next term
374 // is above the round-off from adding the previous term to the sum. Thus
375 // cancellation is negligible compared to errors in the largest computed term (erf(x)).
376
377 final double em1 = Math.expm1(x * x);
378 return -erfx * em1 + em1 - erfx + 1;
379 }
380
381 // Handle negative arguments
382 if (x < 0) {
383 // erfcx(x) = 2*exp(x*x) - erfcx(-x)
384
385 if (x < -ERFCX_NEG_X_MAX) {
386 // Overflow
387 return Double.POSITIVE_INFINITY;
388 }
389
390 final double e = expxx(x);
391 return e - erfImp(-x, true, true) + e;
392 }
393
394 // Approximation for large positive x
395 if (x > ERFCX_APPROX) {
396 return ONE_OVER_ROOT_PI / x;
397 }
398
399 // Compute erfc scaled
400 return erfImp(x, true, true);
401 }
402
403 /**
404 * Returns the inverse complementary error function.
405 *
406 * @param z Value (in {@code [0, 2]}).
407 * @return t such that {@code z = erfc(t)}
408 */
409 static double erfcInv(double z) {
410 //
411 // Begin by testing for domain errors, and other special cases:
412 //
413 if (z < 0 || z > 2 || Double.isNaN(z)) {
414 // Argument outside range [0,2] in inverse erfc function
415 return Double.NaN;
416 }
417 // Domain bounds must be detected as the implementation computes NaN.
418 // (log(q=0) creates infinity and the rational number is
419 // infinity / infinity)
420 if (z == (int) z) {
421 // z return
422 // 2 -inf
423 // 1 0
424 // 0 inf
425 return z == 1 ? 0 : (1 - z) * Double.POSITIVE_INFINITY;
426 }
427
428 //
429 // Normalise the input, so it's in the range [0,1], we will
430 // negate the result if z is outside that range. This is a simple
431 // application of the erfc reflection formula: erfc(-z) = 2 - erfc(z)
432 //
433 final double p;
434 final double q;
435 final double s;
436 if (z > 1) {
437 q = 2 - z;
438 p = 1 - q;
439 s = -1;
440 } else {
441 p = 1 - z;
442 q = z;
443 s = 1;
444 }
445
446 //
447 // And get the result, negating where required:
448 //
449 return s * erfInvImp(p, q);
450 }
451
452 /**
453 * Returns the inverse error function.
454 *
455 * @param z Value (in {@code [-1, 1]}).
456 * @return t such that {@code z = erf(t)}
457 */
458 static double erfInv(double z) {
459 //
460 // Begin by testing for domain errors, and other special cases:
461 //
462 if (z < -1 || z > 1 || Double.isNaN(z)) {
463 // Argument outside range [-1, 1] in inverse erf function
464 return Double.NaN;
465 }
466 // Domain bounds must be detected as the implementation computes NaN.
467 // (log(q=0) creates infinity and the rational number is
468 // infinity / infinity)
469 if (z == (int) z) {
470 // z return
471 // -1 -inf
472 // -0 -0
473 // 0 0
474 // 1 inf
475 return z == 0 ? z : z * Double.POSITIVE_INFINITY;
476 }
477
478 //
479 // Normalise the input, so it's in the range [0,1], we will
480 // negate the result if z is outside that range. This is a simple
481 // application of the erf reflection formula: erf(-z) = -erf(z)
482 //
483 final double p;
484 final double q;
485 final double s;
486 if (z < 0) {
487 p = -z;
488 q = 1 - p;
489 s = -1;
490 } else {
491 p = z;
492 q = 1 - z;
493 s = 1;
494 }
495 //
496 // And get the result, negating where required:
497 //
498 return s * erfInvImp(p, q);
499 }
500
501 /**
502 * Common implementation for inverse erf and erfc functions.
503 *
504 * @param p P-value
505 * @param q Q-value (1-p)
506 * @return the inverse
507 */
508 private static double erfInvImp(double p, double q) {
509 final double result;
510
511 if (p <= 0.5) {
512 //
513 // Evaluate inverse erf using the rational approximation:
514 //
515 // x = p(p+10)(Y+R(p))
516 //
517 // Where Y is a constant, and R(p) is optimised for a low
518 // absolute error compared to |Y|.
519 //
520 // double: Max error found: 2.001849e-18
521 // long double: Max error found: 1.017064e-20
522 // Maximum Deviation Found (actual error term at infinite precision) 8.030e-21
523 //
524 final float Y = 0.0891314744949340820313f;
525 double P;
526 P = -0.00538772965071242932965;
527 P = 0.00822687874676915743155 + P * p;
528 P = 0.0219878681111168899165 + P * p;
529 P = -0.0365637971411762664006 + P * p;
530 P = -0.0126926147662974029034 + P * p;
531 P = 0.0334806625409744615033 + P * p;
532 P = -0.00836874819741736770379 + P * p;
533 P = -0.000508781949658280665617 + P * p;
534 double Q;
535 Q = 0.000886216390456424707504;
536 Q = -0.00233393759374190016776 + Q * p;
537 Q = 0.0795283687341571680018 + Q * p;
538 Q = -0.0527396382340099713954 + Q * p;
539 Q = -0.71228902341542847553 + Q * p;
540 Q = 0.662328840472002992063 + Q * p;
541 Q = 1.56221558398423026363 + Q * p;
542 Q = -1.56574558234175846809 + Q * p;
543 Q = -0.970005043303290640362 + Q * p;
544 Q = 1.0 + Q * p;
545 final double g = p * (p + 10);
546 final double r = P / Q;
547 result = g * Y + g * r;
548 } else if (q >= 0.25) {
549 //
550 // Rational approximation for 0.5 > q >= 0.25
551 //
552 // x = sqrt(-2*log(q)) / (Y + R(q))
553 //
554 // Where Y is a constant, and R(q) is optimised for a low
555 // absolute error compared to Y.
556 //
557 // double : Max error found: 7.403372e-17
558 // long double : Max error found: 6.084616e-20
559 // Maximum Deviation Found (error term) 4.811e-20
560 //
561 final float Y = 2.249481201171875f;
562 final double xs = q - 0.25f;
563 double P;
564 P = -3.67192254707729348546;
565 P = 21.1294655448340526258 + P * xs;
566 P = 17.445385985570866523 + P * xs;
567 P = -44.6382324441786960818 + P * xs;
568 P = -18.8510648058714251895 + P * xs;
569 P = 17.6447298408374015486 + P * xs;
570 P = 8.37050328343119927838 + P * xs;
571 P = 0.105264680699391713268 + P * xs;
572 P = -0.202433508355938759655 + P * xs;
573 double Q;
574 Q = 1.72114765761200282724;
575 Q = -22.6436933413139721736 + Q * xs;
576 Q = 10.8268667355460159008 + Q * xs;
577 Q = 48.5609213108739935468 + Q * xs;
578 Q = -20.1432634680485188801 + Q * xs;
579 Q = -28.6608180499800029974 + Q * xs;
580 Q = 3.9713437953343869095 + Q * xs;
581 Q = 6.24264124854247537712 + Q * xs;
582 Q = 1.0 + Q * xs;
583 final double g = Math.sqrt(-2 * Math.log(q));
584 final double r = P / Q;
585 result = g / (Y + r);
586 } else {
587 //
588 // For q < 0.25 we have a series of rational approximations all
589 // of the general form:
590 //
591 // let: x = sqrt(-log(q))
592 //
593 // Then the result is given by:
594 //
595 // x(Y+R(x-B))
596 //
597 // where Y is a constant, B is the lowest value of x for which
598 // the approximation is valid, and R(x-B) is optimised for a low
599 // absolute error compared to Y.
600 //
601 // Note that almost all code will really go through the first
602 // or maybe second approximation. After than we're dealing with very
603 // small input values indeed.
604 //
605 // Limit for a double: Math.sqrt(-Math.log(Double.MIN_VALUE)) = 27.28...
606 // Branches for x >= 44 (supporting 80 and 128 bit long double) have been removed.
607 final double x = Math.sqrt(-Math.log(q));
608 if (x < 3) {
609 // Max error found: 1.089051e-20
610 final float Y = 0.807220458984375f;
611 final double xs = x - 1.125f;
612 double P;
613 P = -0.681149956853776992068e-9;
614 P = 0.285225331782217055858e-7 + P * xs;
615 P = -0.679465575181126350155e-6 + P * xs;
616 P = 0.00214558995388805277169 + P * xs;
617 P = 0.0290157910005329060432 + P * xs;
618 P = 0.142869534408157156766 + P * xs;
619 P = 0.337785538912035898924 + P * xs;
620 P = 0.387079738972604337464 + P * xs;
621 P = 0.117030156341995252019 + P * xs;
622 P = -0.163794047193317060787 + P * xs;
623 P = -0.131102781679951906451 + P * xs;
624 double Q;
625 Q = 0.01105924229346489121;
626 Q = 0.152264338295331783612 + Q * xs;
627 Q = 0.848854343457902036425 + Q * xs;
628 Q = 2.59301921623620271374 + Q * xs;
629 Q = 4.77846592945843778382 + Q * xs;
630 Q = 5.38168345707006855425 + Q * xs;
631 Q = 3.46625407242567245975 + Q * xs;
632 Q = 1.0 + Q * xs;
633 final double R = P / Q;
634 result = Y * x + R * x;
635 } else if (x < 6) {
636 // Max error found: 8.389174e-21
637 final float Y = 0.93995571136474609375f;
638 final double xs = x - 3;
639 double P;
640 P = 0.266339227425782031962e-11;
641 P = -0.230404776911882601748e-9 + P * xs;
642 P = 0.460469890584317994083e-5 + P * xs;
643 P = 0.000157544617424960554631 + P * xs;
644 P = 0.00187123492819559223345 + P * xs;
645 P = 0.00950804701325919603619 + P * xs;
646 P = 0.0185573306514231072324 + P * xs;
647 P = -0.00222426529213447927281 + P * xs;
648 P = -0.0350353787183177984712 + P * xs;
649 double Q;
650 Q = 0.764675292302794483503e-4;
651 Q = 0.00263861676657015992959 + Q * xs;
652 Q = 0.0341589143670947727934 + Q * xs;
653 Q = 0.220091105764131249824 + Q * xs;
654 Q = 0.762059164553623404043 + Q * xs;
655 Q = 1.3653349817554063097 + Q * xs;
656 Q = 1.0 + Q * xs;
657 final double R = P / Q;
658 result = Y * x + R * x;
659 } else if (x < 18) {
660 // Max error found: 1.481312e-19
661 final float Y = 0.98362827301025390625f;
662 final double xs = x - 6;
663 double P;
664 P = 0.99055709973310326855e-16;
665 P = -0.281128735628831791805e-13 + P * xs;
666 P = 0.462596163522878599135e-8 + P * xs;
667 P = 0.449696789927706453732e-6 + P * xs;
668 P = 0.149624783758342370182e-4 + P * xs;
669 P = 0.000209386317487588078668 + P * xs;
670 P = 0.00105628862152492910091 + P * xs;
671 P = -0.00112951438745580278863 + P * xs;
672 P = -0.0167431005076633737133 + P * xs;
673 double Q;
674 Q = 0.282243172016108031869e-6;
675 Q = 0.275335474764726041141e-4 + Q * xs;
676 Q = 0.000964011807005165528527 + Q * xs;
677 Q = 0.0160746087093676504695 + Q * xs;
678 Q = 0.138151865749083321638 + Q * xs;
679 Q = 0.591429344886417493481 + Q * xs;
680 Q = 1.0 + Q * xs;
681 final double R = P / Q;
682 result = Y * x + R * x;
683 } else {
684 // x < 44
685 // Max error found: 5.697761e-20
686 final float Y = 0.99714565277099609375f;
687 final double xs = x - 18;
688 double P;
689 P = -0.116765012397184275695e-17;
690 P = 0.145596286718675035587e-11 + P * xs;
691 P = 0.411632831190944208473e-9 + P * xs;
692 P = 0.396341011304801168516e-7 + P * xs;
693 P = 0.162397777342510920873e-5 + P * xs;
694 P = 0.254723037413027451751e-4 + P * xs;
695 P = -0.779190719229053954292e-5 + P * xs;
696 P = -0.0024978212791898131227 + P * xs;
697 double Q;
698 Q = 0.509761276599778486139e-9;
699 Q = 0.144437756628144157666e-6 + Q * xs;
700 Q = 0.145007359818232637924e-4 + Q * xs;
701 Q = 0.000690538265622684595676 + Q * xs;
702 Q = 0.0169410838120975906478 + Q * xs;
703 Q = 0.207123112214422517181 + Q * xs;
704 Q = 1.0 + Q * xs;
705 final double R = P / Q;
706 result = Y * x + R * x;
707 }
708 }
709 return result;
710 }
711
712 /**
713 * Compute {@code exp(x*x)} with high accuracy. This is performed using
714 * information in the round-off from {@code x*x}.
715 *
716 * <p>This is accurate at large x to 1 ulp.
717 *
718 * <p>At small x the accuracy cannot be improved over using exp(x*x).
719 * This occurs at {@code x <= 1}.
720 *
721 * <p>Warning: This has no checks for overflow. The method is never called
722 * when {@code x*x > log(MAX_VALUE/2)}.
723 *
724 * @param x Value
725 * @return exp(x*x)
726 */
727 static double expxx(double x) {
728 // Note: If exp(a) overflows this can create NaN if the
729 // round-off b is negative or zero:
730 // exp(a) * exp1m(b) + exp(a)
731 // inf * 0 + inf or inf * -b + inf
732 final DD x2 = DD.ofSquare(x);
733 return expxx(x2.hi(), x2.lo());
734 }
735
736 /**
737 * Compute {@code exp(-x*x)} with high accuracy. This is performed using
738 * information in the round-off from {@code x*x}.
739 *
740 * <p>This is accurate at large x to 1 ulp until exp(-x*x) is close to
741 * sub-normal. For very small exp(-x*x) the adjustment is sub-normal and
742 * bits can be lost in the adjustment for a max observed error of {@code < 2} ulp.
743 *
744 * <p>At small x the accuracy cannot be improved over using exp(-x*x).
745 * This occurs at {@code x <= 1}.
746 *
747 * @param x Value
748 * @return exp(-x*x)
749 */
750 static double expmxx(double x) {
751 final DD x2 = DD.ofSquare(x);
752 return expxx(-x2.hi(), -x2.lo());
753 }
754
755 /**
756 * Compute {@code exp(a+b)} with high accuracy assuming {@code a+b = a}.
757 *
758 * <p>This is accurate at large positive a to 1 ulp. If a is negative and exp(a) is
759 * close to sub-normal a bit of precision may be lost when adjusting result
760 * as the adjustment is sub-normal (max observed error {@code < 2} ulp).
761 * For the use case of multiplication of a number less than 1 by exp(-x*x), a = -x*x,
762 * the result will be sub-normal and the rounding error is lost.
763 *
764 * <p>At small |a| the accuracy cannot be improved over using exp(a) as the
765 * round-off is too small to create terms that can adjust the standard result by
766 * more than 0.5 ulp. This occurs at {@code |a| <= 1}.
767 *
768 * @param a High bits of a split number
769 * @param b Low bits of a split number
770 * @return exp(a+b)
771 */
772 private static double expxx(double a, double b) {
773 // exp(a+b) = exp(a) * exp(b)
774 // = exp(a) * (exp(b) - 1) + exp(a)
775 // Assuming:
776 // 1. -746 < a < 710 for no under/overflow of exp(a)
777 // 2. a+b = a
778 // As b -> 0 then exp(b) -> 1; expm1(b) -> b
779 // The round-off b is limited to ~ 0.5 * ulp(746) ~ 5.68e-14
780 // and we can use an approximation for expm1 (x/1! + x^2/2! + ...)
781 // The second term is required for the expm1 result but the
782 // bits are not significant to change the following sum with exp(a)
783
784 final double ea = Math.exp(a);
785 // b ~ expm1(b)
786 return ea * b + ea;
787 }
788 }