1*71db0c75SAndroid Build Coastguard Worker //===-- Implementation of cbrt function -----------------------------------===//
2*71db0c75SAndroid Build Coastguard Worker //
3*71db0c75SAndroid Build Coastguard Worker // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*71db0c75SAndroid Build Coastguard Worker // See https://llvm.org/LICENSE.txt for license information.
5*71db0c75SAndroid Build Coastguard Worker // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*71db0c75SAndroid Build Coastguard Worker //
7*71db0c75SAndroid Build Coastguard Worker //===----------------------------------------------------------------------===//
8*71db0c75SAndroid Build Coastguard Worker
9*71db0c75SAndroid Build Coastguard Worker #include "src/math/cbrt.h"
10*71db0c75SAndroid Build Coastguard Worker #include "hdr/fenv_macros.h"
11*71db0c75SAndroid Build Coastguard Worker #include "src/__support/FPUtil/FEnvImpl.h"
12*71db0c75SAndroid Build Coastguard Worker #include "src/__support/FPUtil/FPBits.h"
13*71db0c75SAndroid Build Coastguard Worker #include "src/__support/FPUtil/PolyEval.h"
14*71db0c75SAndroid Build Coastguard Worker #include "src/__support/FPUtil/double_double.h"
15*71db0c75SAndroid Build Coastguard Worker #include "src/__support/FPUtil/dyadic_float.h"
16*71db0c75SAndroid Build Coastguard Worker #include "src/__support/FPUtil/multiply_add.h"
17*71db0c75SAndroid Build Coastguard Worker #include "src/__support/common.h"
18*71db0c75SAndroid Build Coastguard Worker #include "src/__support/integer_literals.h"
19*71db0c75SAndroid Build Coastguard Worker #include "src/__support/macros/config.h"
20*71db0c75SAndroid Build Coastguard Worker #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
21*71db0c75SAndroid Build Coastguard Worker
22*71db0c75SAndroid Build Coastguard Worker #if ((LIBC_MATH & LIBC_MATH_SKIP_ACCURATE_PASS) != 0)
23*71db0c75SAndroid Build Coastguard Worker #define LIBC_MATH_CBRT_SKIP_ACCURATE_PASS
24*71db0c75SAndroid Build Coastguard Worker #endif
25*71db0c75SAndroid Build Coastguard Worker
26*71db0c75SAndroid Build Coastguard Worker namespace LIBC_NAMESPACE_DECL {
27*71db0c75SAndroid Build Coastguard Worker
28*71db0c75SAndroid Build Coastguard Worker using DoubleDouble = fputil::DoubleDouble;
29*71db0c75SAndroid Build Coastguard Worker using Float128 = fputil::DyadicFloat<128>;
30*71db0c75SAndroid Build Coastguard Worker
31*71db0c75SAndroid Build Coastguard Worker namespace {
32*71db0c75SAndroid Build Coastguard Worker
33*71db0c75SAndroid Build Coastguard Worker // Initial approximation of x^(-2/3) for 1 <= x < 2.
34*71db0c75SAndroid Build Coastguard Worker // Polynomial generated by Sollya with:
35*71db0c75SAndroid Build Coastguard Worker // > P = fpminimax(x^(-2/3), 7, [|D...|], [1, 2]);
36*71db0c75SAndroid Build Coastguard Worker // > dirtyinfnorm(P/x^(-2/3) - 1, [1, 2]);
37*71db0c75SAndroid Build Coastguard Worker // 0x1.28...p-21
intial_approximation(double x)38*71db0c75SAndroid Build Coastguard Worker double intial_approximation(double x) {
39*71db0c75SAndroid Build Coastguard Worker constexpr double COEFFS[8] = {
40*71db0c75SAndroid Build Coastguard Worker 0x1.bc52aedead5c6p1, -0x1.b52bfebf110b3p2, 0x1.1d8d71d53d126p3,
41*71db0c75SAndroid Build Coastguard Worker -0x1.de2db9e81cf87p2, 0x1.0154ca06153bdp2, -0x1.5973c66ee6da7p0,
42*71db0c75SAndroid Build Coastguard Worker 0x1.07bf6ac832552p-2, -0x1.5e53d9ce41cb8p-6,
43*71db0c75SAndroid Build Coastguard Worker };
44*71db0c75SAndroid Build Coastguard Worker
45*71db0c75SAndroid Build Coastguard Worker double x_sq = x * x;
46*71db0c75SAndroid Build Coastguard Worker
47*71db0c75SAndroid Build Coastguard Worker double c0 = fputil::multiply_add(x, COEFFS[1], COEFFS[0]);
48*71db0c75SAndroid Build Coastguard Worker double c1 = fputil::multiply_add(x, COEFFS[3], COEFFS[2]);
49*71db0c75SAndroid Build Coastguard Worker double c2 = fputil::multiply_add(x, COEFFS[5], COEFFS[4]);
50*71db0c75SAndroid Build Coastguard Worker double c3 = fputil::multiply_add(x, COEFFS[7], COEFFS[6]);
51*71db0c75SAndroid Build Coastguard Worker
52*71db0c75SAndroid Build Coastguard Worker double x_4 = x_sq * x_sq;
53*71db0c75SAndroid Build Coastguard Worker double d0 = fputil::multiply_add(x_sq, c1, c0);
54*71db0c75SAndroid Build Coastguard Worker double d1 = fputil::multiply_add(x_sq, c3, c2);
55*71db0c75SAndroid Build Coastguard Worker
56*71db0c75SAndroid Build Coastguard Worker return fputil::multiply_add(x_4, d1, d0);
57*71db0c75SAndroid Build Coastguard Worker }
58*71db0c75SAndroid Build Coastguard Worker
59*71db0c75SAndroid Build Coastguard Worker // Get the error term for Newton iteration:
60*71db0c75SAndroid Build Coastguard Worker // h(x) = x^3 * a^2 - 1,
61*71db0c75SAndroid Build Coastguard Worker #ifdef LIBC_TARGET_CPU_HAS_FMA
get_error(const DoubleDouble & x_3,const DoubleDouble & a_sq)62*71db0c75SAndroid Build Coastguard Worker double get_error(const DoubleDouble &x_3, const DoubleDouble &a_sq) {
63*71db0c75SAndroid Build Coastguard Worker return fputil::multiply_add(x_3.hi, a_sq.hi, -1.0) +
64*71db0c75SAndroid Build Coastguard Worker fputil::multiply_add(x_3.lo, a_sq.hi, x_3.hi * a_sq.lo);
65*71db0c75SAndroid Build Coastguard Worker }
66*71db0c75SAndroid Build Coastguard Worker #else
get_error(const DoubleDouble & x_3,const DoubleDouble & a_sq)67*71db0c75SAndroid Build Coastguard Worker double get_error(const DoubleDouble &x_3, const DoubleDouble &a_sq) {
68*71db0c75SAndroid Build Coastguard Worker DoubleDouble x_3_a_sq = fputil::quick_mult(a_sq, x_3);
69*71db0c75SAndroid Build Coastguard Worker return (x_3_a_sq.hi - 1.0) + x_3_a_sq.lo;
70*71db0c75SAndroid Build Coastguard Worker }
71*71db0c75SAndroid Build Coastguard Worker #endif
72*71db0c75SAndroid Build Coastguard Worker
73*71db0c75SAndroid Build Coastguard Worker } // anonymous namespace
74*71db0c75SAndroid Build Coastguard Worker
75*71db0c75SAndroid Build Coastguard Worker // Correctly rounded cbrt algorithm:
76*71db0c75SAndroid Build Coastguard Worker //
77*71db0c75SAndroid Build Coastguard Worker // === Step 1 - Range reduction ===
78*71db0c75SAndroid Build Coastguard Worker // For x = (-1)^s * 2^e * (1.m), we get 2 reduced arguments x_r and a as:
79*71db0c75SAndroid Build Coastguard Worker // x_r = 1.m
80*71db0c75SAndroid Build Coastguard Worker // a = (-1)^s * 2^(e % 3) * (1.m)
81*71db0c75SAndroid Build Coastguard Worker // Then cbrt(x) = x^(1/3) can be computed as:
82*71db0c75SAndroid Build Coastguard Worker // x^(1/3) = 2^(e / 3) * a^(1/3).
83*71db0c75SAndroid Build Coastguard Worker //
84*71db0c75SAndroid Build Coastguard Worker // In order to avoid division, we compute a^(-2/3) using Newton method and then
85*71db0c75SAndroid Build Coastguard Worker // multiply the results by a:
86*71db0c75SAndroid Build Coastguard Worker // a^(1/3) = a * a^(-2/3).
87*71db0c75SAndroid Build Coastguard Worker //
88*71db0c75SAndroid Build Coastguard Worker // === Step 2 - First approximation to a^(-2/3) ===
89*71db0c75SAndroid Build Coastguard Worker // First, we use a degree-7 minimax polynomial generated by Sollya to
90*71db0c75SAndroid Build Coastguard Worker // approximate x_r^(-2/3) for 1 <= x_r < 2.
91*71db0c75SAndroid Build Coastguard Worker // p = P(x_r) ~ x_r^(-2/3),
92*71db0c75SAndroid Build Coastguard Worker // with relative errors bounded by:
93*71db0c75SAndroid Build Coastguard Worker // | p / x_r^(-2/3) - 1 | < 1.16 * 2^-21.
94*71db0c75SAndroid Build Coastguard Worker //
95*71db0c75SAndroid Build Coastguard Worker // Then we multiply with 2^(e % 3) from a small lookup table to get:
96*71db0c75SAndroid Build Coastguard Worker // x_0 = 2^(-2*(e % 3)/3) * p
97*71db0c75SAndroid Build Coastguard Worker // ~ 2^(-2*(e % 3)/3) * x_r^(-2/3)
98*71db0c75SAndroid Build Coastguard Worker // = a^(-2/3)
99*71db0c75SAndroid Build Coastguard Worker // With relative errors:
100*71db0c75SAndroid Build Coastguard Worker // | x_0 / a^(-2/3) - 1 | < 1.16 * 2^-21.
101*71db0c75SAndroid Build Coastguard Worker // This step is done in double precision.
102*71db0c75SAndroid Build Coastguard Worker //
103*71db0c75SAndroid Build Coastguard Worker // === Step 3 - First Newton iteration ===
104*71db0c75SAndroid Build Coastguard Worker // We follow the method described in:
105*71db0c75SAndroid Build Coastguard Worker // Sibidanov, A. and Zimmermann, P., "Correctly rounded cubic root evaluation
106*71db0c75SAndroid Build Coastguard Worker // in double precision", https://core-math.gitlabpages.inria.fr/cbrt64.pdf
107*71db0c75SAndroid Build Coastguard Worker // to derive multiplicative Newton iterations as below:
108*71db0c75SAndroid Build Coastguard Worker // Let x_n be the nth approximation to a^(-2/3). Define the n^th error as:
109*71db0c75SAndroid Build Coastguard Worker // h_n = x_n^3 * a^2 - 1
110*71db0c75SAndroid Build Coastguard Worker // Then:
111*71db0c75SAndroid Build Coastguard Worker // a^(-2/3) = x_n / (1 + h_n)^(1/3)
112*71db0c75SAndroid Build Coastguard Worker // = x_n * (1 - (1/3) * h_n + (2/9) * h_n^2 - (14/81) * h_n^3 + ...)
113*71db0c75SAndroid Build Coastguard Worker // using the Taylor series expansion of (1 + h_n)^(-1/3).
114*71db0c75SAndroid Build Coastguard Worker //
115*71db0c75SAndroid Build Coastguard Worker // Apply to x_0 above:
116*71db0c75SAndroid Build Coastguard Worker // h_0 = x_0^3 * a^2 - 1
117*71db0c75SAndroid Build Coastguard Worker // = a^2 * (x_0 - a^(-2/3)) * (x_0^2 + x_0 * a^(-2/3) + a^(-4/3)),
118*71db0c75SAndroid Build Coastguard Worker // it's bounded by:
119*71db0c75SAndroid Build Coastguard Worker // |h_0| < 4 * 3 * 1.16 * 2^-21 * 4 < 2^-17.
120*71db0c75SAndroid Build Coastguard Worker // So in the first iteration step, we use:
121*71db0c75SAndroid Build Coastguard Worker // x_1 = x_0 * (1 - (1/3) * h_n + (2/9) * h_n^2 - (14/81) * h_n^3)
122*71db0c75SAndroid Build Coastguard Worker // Its relative error is bounded by:
123*71db0c75SAndroid Build Coastguard Worker // | x_1 / a^(-2/3) - 1 | < 35/242 * |h_0|^4 < 2^-70.
124*71db0c75SAndroid Build Coastguard Worker // Then we perform Ziv's rounding test and check if the answer is exact.
125*71db0c75SAndroid Build Coastguard Worker // This step is done in double-double precision.
126*71db0c75SAndroid Build Coastguard Worker //
127*71db0c75SAndroid Build Coastguard Worker // === Step 4 - Second Newton iteration ===
128*71db0c75SAndroid Build Coastguard Worker // If the Ziv's rounding test from the previous step fails, we define the error
129*71db0c75SAndroid Build Coastguard Worker // term:
130*71db0c75SAndroid Build Coastguard Worker // h_1 = x_1^3 * a^2 - 1,
131*71db0c75SAndroid Build Coastguard Worker // And perform another iteration:
132*71db0c75SAndroid Build Coastguard Worker // x_2 = x_1 * (1 - h_1 / 3)
133*71db0c75SAndroid Build Coastguard Worker // with the relative errors exceed the precision of double-double.
134*71db0c75SAndroid Build Coastguard Worker // We then check the Ziv's accuracy test with relative errors < 2^-102 to
135*71db0c75SAndroid Build Coastguard Worker // compensate for rounding errors.
136*71db0c75SAndroid Build Coastguard Worker //
137*71db0c75SAndroid Build Coastguard Worker // === Step 5 - Final iteration ===
138*71db0c75SAndroid Build Coastguard Worker // If the Ziv's accuracy test from the previous step fails, we perform another
139*71db0c75SAndroid Build Coastguard Worker // iteration in 128-bit precision and check for exact outputs.
140*71db0c75SAndroid Build Coastguard Worker //
141*71db0c75SAndroid Build Coastguard Worker // TODO: It is possible to replace this costly computation step with special
142*71db0c75SAndroid Build Coastguard Worker // exceptional handling, similar to what was done in the CORE-MATH project:
143*71db0c75SAndroid Build Coastguard Worker // https://gitlab.inria.fr/core-math/core-math/-/blob/master/src/binary64/cbrt/cbrt.c
144*71db0c75SAndroid Build Coastguard Worker
145*71db0c75SAndroid Build Coastguard Worker LLVM_LIBC_FUNCTION(double, cbrt, (double x)) {
146*71db0c75SAndroid Build Coastguard Worker using FPBits = fputil::FPBits<double>;
147*71db0c75SAndroid Build Coastguard Worker
148*71db0c75SAndroid Build Coastguard Worker uint64_t x_abs = FPBits(x).abs().uintval();
149*71db0c75SAndroid Build Coastguard Worker
150*71db0c75SAndroid Build Coastguard Worker unsigned exp_bias_correction = 682; // 1023 * 2/3
151*71db0c75SAndroid Build Coastguard Worker
152*71db0c75SAndroid Build Coastguard Worker if (LIBC_UNLIKELY(x_abs < FPBits::min_normal().uintval() ||
153*71db0c75SAndroid Build Coastguard Worker x_abs >= FPBits::inf().uintval())) {
154*71db0c75SAndroid Build Coastguard Worker if (x == 0.0 || x_abs >= FPBits::inf().uintval())
155*71db0c75SAndroid Build Coastguard Worker // x is 0, Inf, or NaN.
156*71db0c75SAndroid Build Coastguard Worker // Make sure it works for FTZ/DAZ modes.
157*71db0c75SAndroid Build Coastguard Worker return static_cast<double>(x + x);
158*71db0c75SAndroid Build Coastguard Worker
159*71db0c75SAndroid Build Coastguard Worker // x is non-zero denormal number.
160*71db0c75SAndroid Build Coastguard Worker // Normalize x.
161*71db0c75SAndroid Build Coastguard Worker x *= 0x1.0p60;
162*71db0c75SAndroid Build Coastguard Worker exp_bias_correction -= 20;
163*71db0c75SAndroid Build Coastguard Worker }
164*71db0c75SAndroid Build Coastguard Worker
165*71db0c75SAndroid Build Coastguard Worker FPBits x_bits(x);
166*71db0c75SAndroid Build Coastguard Worker
167*71db0c75SAndroid Build Coastguard Worker // When using biased exponent of x in double precision,
168*71db0c75SAndroid Build Coastguard Worker // x_e = real_exponent_of_x + 1023
169*71db0c75SAndroid Build Coastguard Worker // Then:
170*71db0c75SAndroid Build Coastguard Worker // x_e / 3 = real_exponent_of_x / 3 + 1023/3
171*71db0c75SAndroid Build Coastguard Worker // = real_exponent_of_x / 3 + 341
172*71db0c75SAndroid Build Coastguard Worker // So to make it the correct biased exponent of x^(1/3), we add
173*71db0c75SAndroid Build Coastguard Worker // 1023 - 341 = 682
174*71db0c75SAndroid Build Coastguard Worker // to the quotient x_e / 3.
175*71db0c75SAndroid Build Coastguard Worker unsigned x_e = static_cast<unsigned>(x_bits.get_biased_exponent());
176*71db0c75SAndroid Build Coastguard Worker unsigned out_e = (x_e / 3 + exp_bias_correction);
177*71db0c75SAndroid Build Coastguard Worker unsigned shift_e = x_e % 3;
178*71db0c75SAndroid Build Coastguard Worker
179*71db0c75SAndroid Build Coastguard Worker // Set x_r = 1.mantissa
180*71db0c75SAndroid Build Coastguard Worker double x_r =
181*71db0c75SAndroid Build Coastguard Worker FPBits(x_bits.get_mantissa() |
182*71db0c75SAndroid Build Coastguard Worker (static_cast<uint64_t>(FPBits::EXP_BIAS) << FPBits::FRACTION_LEN))
183*71db0c75SAndroid Build Coastguard Worker .get_val();
184*71db0c75SAndroid Build Coastguard Worker
185*71db0c75SAndroid Build Coastguard Worker // Set a = (-1)^x_sign * 2^(x_e % 3) * (1.mantissa)
186*71db0c75SAndroid Build Coastguard Worker uint64_t a_bits = x_bits.uintval() & 0x800F'FFFF'FFFF'FFFF;
187*71db0c75SAndroid Build Coastguard Worker a_bits |=
188*71db0c75SAndroid Build Coastguard Worker (static_cast<uint64_t>(shift_e + static_cast<unsigned>(FPBits::EXP_BIAS))
189*71db0c75SAndroid Build Coastguard Worker << FPBits::FRACTION_LEN);
190*71db0c75SAndroid Build Coastguard Worker double a = FPBits(a_bits).get_val();
191*71db0c75SAndroid Build Coastguard Worker
192*71db0c75SAndroid Build Coastguard Worker // Initial approximation of x_r^(-2/3).
193*71db0c75SAndroid Build Coastguard Worker double p = intial_approximation(x_r);
194*71db0c75SAndroid Build Coastguard Worker
195*71db0c75SAndroid Build Coastguard Worker // Look up for 2^(-2*n/3) used for first approximation step.
196*71db0c75SAndroid Build Coastguard Worker constexpr double EXP2_M2_OVER_3[3] = {1.0, 0x1.428a2f98d728bp-1,
197*71db0c75SAndroid Build Coastguard Worker 0x1.965fea53d6e3dp-2};
198*71db0c75SAndroid Build Coastguard Worker
199*71db0c75SAndroid Build Coastguard Worker // x0 is an initial approximation of a^(-2/3) for 1 <= |a| < 8.
200*71db0c75SAndroid Build Coastguard Worker // Relative error: < 1.16 * 2^(-21).
201*71db0c75SAndroid Build Coastguard Worker double x0 = static_cast<double>(EXP2_M2_OVER_3[shift_e] * p);
202*71db0c75SAndroid Build Coastguard Worker
203*71db0c75SAndroid Build Coastguard Worker // First iteration in double precision.
204*71db0c75SAndroid Build Coastguard Worker DoubleDouble a_sq = fputil::exact_mult(a, a);
205*71db0c75SAndroid Build Coastguard Worker
206*71db0c75SAndroid Build Coastguard Worker // h0 = x0^3 * a^2 - 1
207*71db0c75SAndroid Build Coastguard Worker DoubleDouble x0_sq = fputil::exact_mult(x0, x0);
208*71db0c75SAndroid Build Coastguard Worker DoubleDouble x0_3 = fputil::quick_mult(x0, x0_sq);
209*71db0c75SAndroid Build Coastguard Worker
210*71db0c75SAndroid Build Coastguard Worker double h0 = get_error(x0_3, a_sq);
211*71db0c75SAndroid Build Coastguard Worker
212*71db0c75SAndroid Build Coastguard Worker #ifdef LIBC_MATH_CBRT_SKIP_ACCURATE_PASS
213*71db0c75SAndroid Build Coastguard Worker constexpr double REL_ERROR = 0;
214*71db0c75SAndroid Build Coastguard Worker #else
215*71db0c75SAndroid Build Coastguard Worker constexpr double REL_ERROR = 0x1.0p-51;
216*71db0c75SAndroid Build Coastguard Worker #endif // LIBC_MATH_CBRT_SKIP_ACCURATE_PASS
217*71db0c75SAndroid Build Coastguard Worker
218*71db0c75SAndroid Build Coastguard Worker // Taylor polynomial of (1 + h)^(-1/3):
219*71db0c75SAndroid Build Coastguard Worker // (1 + h)^(-1/3) = 1 - h/3 + 2 h^2 / 9 - 14 h^3 / 81 + ...
220*71db0c75SAndroid Build Coastguard Worker constexpr double ERR_COEFFS[3] = {
221*71db0c75SAndroid Build Coastguard Worker -0x1.5555555555555p-2 - REL_ERROR, // -1/3 - relative_error
222*71db0c75SAndroid Build Coastguard Worker 0x1.c71c71c71c71cp-3, // 2/9
223*71db0c75SAndroid Build Coastguard Worker -0x1.61f9add3c0ca4p-3, // -14/81
224*71db0c75SAndroid Build Coastguard Worker };
225*71db0c75SAndroid Build Coastguard Worker // e0 = -14 * h^2 / 81 + 2 * h / 9 - 1/3 - relative_error.
226*71db0c75SAndroid Build Coastguard Worker double e0 = fputil::polyeval(h0, ERR_COEFFS[0], ERR_COEFFS[1], ERR_COEFFS[2]);
227*71db0c75SAndroid Build Coastguard Worker double x0_h0 = x0 * h0;
228*71db0c75SAndroid Build Coastguard Worker
229*71db0c75SAndroid Build Coastguard Worker // x1 = x0 (1 - h0/3 + 2 h0^2 / 9 - 14 h0^3 / 81)
230*71db0c75SAndroid Build Coastguard Worker // x1 approximate a^(-2/3) with relative errors bounded by:
231*71db0c75SAndroid Build Coastguard Worker // | x1 / a^(-2/3) - 1 | < (34/243) h0^4 < h0 * REL_ERROR
232*71db0c75SAndroid Build Coastguard Worker DoubleDouble x1_dd{x0_h0 * e0, x0};
233*71db0c75SAndroid Build Coastguard Worker
234*71db0c75SAndroid Build Coastguard Worker // r1 = x1 * a ~ a^(-2/3) * a = a^(1/3).
235*71db0c75SAndroid Build Coastguard Worker DoubleDouble r1 = fputil::quick_mult(a, x1_dd);
236*71db0c75SAndroid Build Coastguard Worker
237*71db0c75SAndroid Build Coastguard Worker // Lambda function to update the exponent of the result.
__anon5d527db00202(double r) 238*71db0c75SAndroid Build Coastguard Worker auto update_exponent = [=](double r) -> double {
239*71db0c75SAndroid Build Coastguard Worker uint64_t r_m = FPBits(r).uintval() - 0x3FF0'0000'0000'0000;
240*71db0c75SAndroid Build Coastguard Worker // Adjust exponent and sign.
241*71db0c75SAndroid Build Coastguard Worker uint64_t r_bits =
242*71db0c75SAndroid Build Coastguard Worker r_m + (static_cast<uint64_t>(out_e) << FPBits::FRACTION_LEN);
243*71db0c75SAndroid Build Coastguard Worker return FPBits(r_bits).get_val();
244*71db0c75SAndroid Build Coastguard Worker };
245*71db0c75SAndroid Build Coastguard Worker
246*71db0c75SAndroid Build Coastguard Worker #ifdef LIBC_MATH_CBRT_SKIP_ACCURATE_PASS
247*71db0c75SAndroid Build Coastguard Worker // TODO: We probably don't need to use double-double if accurate tests and
248*71db0c75SAndroid Build Coastguard Worker // passes are skipped.
249*71db0c75SAndroid Build Coastguard Worker return update_exponent(r1.hi + r1.lo);
250*71db0c75SAndroid Build Coastguard Worker #else
251*71db0c75SAndroid Build Coastguard Worker // Accurate checks and passes.
252*71db0c75SAndroid Build Coastguard Worker double r1_lower = r1.hi + r1.lo;
253*71db0c75SAndroid Build Coastguard Worker double r1_upper =
254*71db0c75SAndroid Build Coastguard Worker r1.hi + fputil::multiply_add(x0_h0, 2.0 * REL_ERROR * a, r1.lo);
255*71db0c75SAndroid Build Coastguard Worker
256*71db0c75SAndroid Build Coastguard Worker // Ziv's accuracy test.
257*71db0c75SAndroid Build Coastguard Worker if (LIBC_LIKELY(r1_upper == r1_lower)) {
258*71db0c75SAndroid Build Coastguard Worker // Test for exact outputs.
259*71db0c75SAndroid Build Coastguard Worker // Check if lower (52 - 17 = 35) bits are 0's.
260*71db0c75SAndroid Build Coastguard Worker if (LIBC_UNLIKELY((FPBits(r1_lower).uintval() & 0x0000'0007'FFFF'FFFF) ==
261*71db0c75SAndroid Build Coastguard Worker 0)) {
262*71db0c75SAndroid Build Coastguard Worker double r1_err = (r1_lower - r1.hi) - r1.lo;
263*71db0c75SAndroid Build Coastguard Worker if (FPBits(r1_err).abs().get_val() < 0x1.0p69)
264*71db0c75SAndroid Build Coastguard Worker fputil::clear_except_if_required(FE_INEXACT);
265*71db0c75SAndroid Build Coastguard Worker }
266*71db0c75SAndroid Build Coastguard Worker
267*71db0c75SAndroid Build Coastguard Worker return update_exponent(r1_lower);
268*71db0c75SAndroid Build Coastguard Worker }
269*71db0c75SAndroid Build Coastguard Worker
270*71db0c75SAndroid Build Coastguard Worker // Accuracy test failed, perform another Newton iteration.
271*71db0c75SAndroid Build Coastguard Worker double x1 = x1_dd.hi + (e0 + REL_ERROR) * x0_h0;
272*71db0c75SAndroid Build Coastguard Worker
273*71db0c75SAndroid Build Coastguard Worker // Second iteration in double-double precision.
274*71db0c75SAndroid Build Coastguard Worker // h1 = x1^3 * a^2 - 1.
275*71db0c75SAndroid Build Coastguard Worker DoubleDouble x1_sq = fputil::exact_mult(x1, x1);
276*71db0c75SAndroid Build Coastguard Worker DoubleDouble x1_3 = fputil::quick_mult(x1, x1_sq);
277*71db0c75SAndroid Build Coastguard Worker double h1 = get_error(x1_3, a_sq);
278*71db0c75SAndroid Build Coastguard Worker
279*71db0c75SAndroid Build Coastguard Worker // e1 = -x1*h1/3.
280*71db0c75SAndroid Build Coastguard Worker double e1 = h1 * (x1 * -0x1.5555555555555p-2);
281*71db0c75SAndroid Build Coastguard Worker // x2 = x1*(1 - h1/3) = x1 + e1 ~ a^(-2/3) with relative errors < 2^-101.
282*71db0c75SAndroid Build Coastguard Worker DoubleDouble x2 = fputil::exact_add(x1, e1);
283*71db0c75SAndroid Build Coastguard Worker // r2 = a * x2 ~ a * a^(-2/3) = a^(1/3) with relative errors < 2^-100.
284*71db0c75SAndroid Build Coastguard Worker DoubleDouble r2 = fputil::quick_mult(a, x2);
285*71db0c75SAndroid Build Coastguard Worker
286*71db0c75SAndroid Build Coastguard Worker double r2_upper = r2.hi + fputil::multiply_add(a, 0x1.0p-102, r2.lo);
287*71db0c75SAndroid Build Coastguard Worker double r2_lower = r2.hi + fputil::multiply_add(a, -0x1.0p-102, r2.lo);
288*71db0c75SAndroid Build Coastguard Worker
289*71db0c75SAndroid Build Coastguard Worker // Ziv's accuracy test.
290*71db0c75SAndroid Build Coastguard Worker if (LIBC_LIKELY(r2_upper == r2_lower))
291*71db0c75SAndroid Build Coastguard Worker return update_exponent(r2_upper);
292*71db0c75SAndroid Build Coastguard Worker
293*71db0c75SAndroid Build Coastguard Worker // TODO: Investigate removing float128 and just list exceptional cases.
294*71db0c75SAndroid Build Coastguard Worker // Apply another Newton iteration with ~126-bit accuracy.
295*71db0c75SAndroid Build Coastguard Worker Float128 x2_f128 = fputil::quick_add(Float128(x2.hi), Float128(x2.lo));
296*71db0c75SAndroid Build Coastguard Worker // x2^3
297*71db0c75SAndroid Build Coastguard Worker Float128 x2_3 =
298*71db0c75SAndroid Build Coastguard Worker fputil::quick_mul(fputil::quick_mul(x2_f128, x2_f128), x2_f128);
299*71db0c75SAndroid Build Coastguard Worker // a^2
300*71db0c75SAndroid Build Coastguard Worker Float128 a_sq_f128 = fputil::quick_mul(Float128(a), Float128(a));
301*71db0c75SAndroid Build Coastguard Worker // x2^3 * a^2
302*71db0c75SAndroid Build Coastguard Worker Float128 x2_3_a_sq = fputil::quick_mul(x2_3, a_sq_f128);
303*71db0c75SAndroid Build Coastguard Worker // h2 = x2^3 * a^2 - 1
304*71db0c75SAndroid Build Coastguard Worker Float128 h2_f128 = fputil::quick_add(x2_3_a_sq, Float128(-1.0));
305*71db0c75SAndroid Build Coastguard Worker double h2 = static_cast<double>(h2_f128);
306*71db0c75SAndroid Build Coastguard Worker // t2 = 1 - h2 / 3
307*71db0c75SAndroid Build Coastguard Worker Float128 t2 =
308*71db0c75SAndroid Build Coastguard Worker fputil::quick_add(Float128(1.0), Float128(h2 * (-0x1.5555555555555p-2)));
309*71db0c75SAndroid Build Coastguard Worker // x3 = x2 * (1 - h2 / 3) ~ a^(-2/3)
310*71db0c75SAndroid Build Coastguard Worker Float128 x3 = fputil::quick_mul(x2_f128, t2);
311*71db0c75SAndroid Build Coastguard Worker // r3 = a * x3 ~ a * a^(-2/3) = a^(1/3)
312*71db0c75SAndroid Build Coastguard Worker Float128 r3 = fputil::quick_mul(Float128(a), x3);
313*71db0c75SAndroid Build Coastguard Worker
314*71db0c75SAndroid Build Coastguard Worker // Check for exact cases:
315*71db0c75SAndroid Build Coastguard Worker Float128::MantissaType rounding_bits =
316*71db0c75SAndroid Build Coastguard Worker r3.mantissa & 0x0000'0000'0000'03FF'FFFF'FFFF'FFFF'FFFF_u128;
317*71db0c75SAndroid Build Coastguard Worker
318*71db0c75SAndroid Build Coastguard Worker double result = static_cast<double>(r3);
319*71db0c75SAndroid Build Coastguard Worker if ((rounding_bits < 0x0000'0000'0000'0000'0000'0000'0000'000F_u128) ||
320*71db0c75SAndroid Build Coastguard Worker (rounding_bits >= 0x0000'0000'0000'03FF'FFFF'FFFF'FFFF'FFF0_u128)) {
321*71db0c75SAndroid Build Coastguard Worker // Output is exact.
322*71db0c75SAndroid Build Coastguard Worker r3.mantissa &= 0xFFFF'FFFF'FFFF'FFFF'FFFF'FFFF'FFFF'FFF0_u128;
323*71db0c75SAndroid Build Coastguard Worker
324*71db0c75SAndroid Build Coastguard Worker if (rounding_bits >= 0x0000'0000'0000'03FF'FFFF'FFFF'FFFF'FFF0_u128) {
325*71db0c75SAndroid Build Coastguard Worker Float128 tmp{r3.sign, r3.exponent - 123,
326*71db0c75SAndroid Build Coastguard Worker 0x8000'0000'0000'0000'0000'0000'0000'0000_u128};
327*71db0c75SAndroid Build Coastguard Worker Float128 r4 = fputil::quick_add(r3, tmp);
328*71db0c75SAndroid Build Coastguard Worker result = static_cast<double>(r4);
329*71db0c75SAndroid Build Coastguard Worker } else {
330*71db0c75SAndroid Build Coastguard Worker result = static_cast<double>(r3);
331*71db0c75SAndroid Build Coastguard Worker }
332*71db0c75SAndroid Build Coastguard Worker
333*71db0c75SAndroid Build Coastguard Worker fputil::clear_except_if_required(FE_INEXACT);
334*71db0c75SAndroid Build Coastguard Worker }
335*71db0c75SAndroid Build Coastguard Worker
336*71db0c75SAndroid Build Coastguard Worker return update_exponent(result);
337*71db0c75SAndroid Build Coastguard Worker #endif // LIBC_MATH_CBRT_SKIP_ACCURATE_PASS
338*71db0c75SAndroid Build Coastguard Worker }
339*71db0c75SAndroid Build Coastguard Worker
340*71db0c75SAndroid Build Coastguard Worker } // namespace LIBC_NAMESPACE_DECL
341