Math/interface/approx_exp.h

0001 #ifndef DataFormatsMathAPPROX_EXP_H
0002 #define DataFormatsMathAPPROX_EXP_H
0003 /*  Quick and not that dirty vectorizable exp implementations
0004     Author: Florent de Dinechin, Aric, ENS-Lyon
0005     with advice from Vincenzo Innocente, CERN
0006     All right reserved
0007
0008 Warning + disclaimers:
0009
0010 Feel free to distribute or insert in other programs etc, as long as this notice is attached.
0011     Comments, requests etc: Florent.de.Dinechin@ens-lyon.fr
0012
0013 Polynomials were obtained using Sollya scripts (in comments):
0014 please also keep these comments attached to the code.
0015
0016 If a derivative of this code ends up in the glibc I am too happy: the version with MANAGE_SUBNORMALS=1 and DEGREE=6 is faithful-accurate over the full 2^32 binary32 numbers and behaves well WRT exceptional cases. It is about 4 times faster than the stock expf on this PC, when compiled with gcc -O2.
0017
0018 This code is FMA-safe (i.e. accelerated and more accurate with an FMA) as long as my parentheses are respected.
0019
0020 A remaining TODO is to try and manage the over/underflow using only integer tests as per Astasiev et al, RNC conf.
0021 Not sure it makes that much sense in the vector context.
0022
0023 */
0024
0025 // #define MANAGE_SUBNORMALS 1 // No measurable perf difference, so let's be clean.
0026 // If set to zero we flush to zero the subnormal outputs, ie for x<-88 or so
0027
0028 // DEGREE
0029 // 6 is perfect.
0030 // 5 provides max 2-ulp error,
0031 // 4 loses 44 ulps (6 bits) for an acceleration of 10% WRT 6
0032 // (I don't subtract the loop and call overhead, so it would be more for constexprd code)
0033
0034 // see the comments in the code for the accuracy you get from a given degree
0035
0036 #include "DataFormats/Math/interface/approx_math.h"
0037
0038 template <int DEGREE>
0039 constexpr float approx_expf_P(float p);
0040
0041 // degree =  2   => absolute accuracy is  8 bits
0042 template <>
0043 constexpr float approx_expf_P<2>(float y) {
0044   return float(0x2.p0) + y * (float(0x2.07b99p0) + y * float(0x1.025b84p0));
0045 }
0046 // degree =  3   => absolute accuracy is  12 bits
0047 template <>
0048 constexpr float approx_expf_P<3>(float y) {
0049 #ifdef HORNER  // HORNER
0050   return float(0x2.p0) + y * (float(0x1.fff798p0) + y * (float(0x1.02249p0) + y * float(0x5.62042p-4)));
0051 #else  // ESTRIN
0052   float p23 = (float(0x1.02249p0) + y * float(0x5.62042p-4));
0053   float p01 = float(0x2.p0) + y * float(0x1.fff798p0);
0054   return p01 + y * y * p23;
0055 #endif
0056 }
0057 // degree =  4   => absolute accuracy is  17 bits
0058 template <>
0059 constexpr float approx_expf_P<4>(float y) {
0060   return float(0x2.p0) +
0061          y * (float(0x1.fffb1p0) + y * (float(0xf.ffe84p-4) + y * (float(0x5.5f9c1p-4) + y * float(0x1.57755p-4))));
0062 }
0063 // degree =  5   => absolute accuracy is  22 bits
0064 template <>
0065 constexpr float approx_expf_P<5>(float y) {
0066   return float(0x2.p0) +
0067          y * (float(0x2.p0) + y * (float(0xf.ffed8p-4) +
0068                                    y * (float(0x5.5551cp-4) + y * (float(0x1.5740d8p-4) + y * float(0x4.49368p-8)))));
0069 }
0070 // degree =  6   => absolute accuracy is  27 bits
0071 template <>
0072 constexpr float approx_expf_P<6>(float y) {
0073 #ifdef HORNER  // HORNER
0074   float p =
0075       float(0x2.p0) +
0076       y * (float(0x2.p0) +
0077            y * (float(0x1.p0) + y * (float(0x5.55523p-4) + y * (float(0x1.5554dcp-4) +
0078                                                                 y * (float(0x4.48f41p-8) + y * float(0xb.6ad4p-12))))));
0079 #else  // ESTRIN does seem to save a cycle or two
0080   float p56 = float(0x4.48f41p-8) + y * float(0xb.6ad4p-12);
0081   float p34 = float(0x5.55523p-4) + y * float(0x1.5554dcp-4);
0082   float y2 = y * y;
0083   float p12 = float(0x2.p0) + y;  // By chance we save one operation here! Funny.
0084   float p36 = p34 + y2 * p56;
0085   float p16 = p12 + y2 * p36;
0086   float p = float(0x2.p0) + y * p16;
0087 #endif
0088   return p;
0089 }
0090
0091 // degree =  7   => absolute accuracy is  31 bits
0092 template <>
0093 constexpr float approx_expf_P<7>(float y) {
0094   return float(0x2.p0) +
0095          y * (float(0x2.p0) +
0096               y * (float(0x1.p0) +
0097                    y * (float(0x5.55555p-4) +
0098                         y * (float(0x1.5554e4p-4) +
0099                              y * (float(0x4.444adp-8) + y * (float(0xb.6a8a6p-12) + y * float(0x1.9ec814p-12)))))));
0100 }
0101
0102 /* The Sollya script that computes the polynomials above
0103
0104
0105 f= 2*exp(y);
0106 I=[-log(2)/2;log(2)/2];
0107 filename="/tmp/polynomials";
0108 print("") > filename;
0109 for deg from 2 to 8 do begin
0110   p = fpminimax(f, deg,[|1,23...|],I, floating, absolute);
0111   display=decimal;
0112   acc=floor(-log2(sup(supnorm(p, f, I, absolute, 2^(-40)))));
0113   print( "   // degree = ", deg,
0114          "  => absolute accuracy is ",  acc, "bits" ) >> filename;
0115   print("#if ( DEGREE ==", deg, ")") >> filename;
0116   display=hexadecimal;
0117   print("   float p = ", horner(p) , ";") >> filename;
0118   print("#endif") >> filename;
0119 end;
0120
0121 */
0122
0123 // valid for -87.3365 < x < 88.7228
0124 template <int DEGREE>
0125 #ifdef CMS_UNDEFINED_SANITIZER
0126 //Supress UBSan runtime error about signed integer overflow: -2147483648 - 1
0127 //This function is an unsafe implementation of expf and rely on overflow feature
0128 //Vectorization is affected on x86_64 if change to unsigned int
0129 __attribute__((no_sanitize("signed-integer-overflow")))
0130 #endif
0131 constexpr float
0132 unsafe_expf_impl(float x) {
0133   using namespace approx_math;
0134   /* Sollya for the following constants:
0135      display=hexadecimal;
0136      1b23+1b22;
0137      single(1/log(2));
0138      log2H=round(log(2), 16, RN);
0139      log2L = single(log(2)-log2H);
0140      log2H; log2L;
0141
0142   */
0143   // constexpr float rnd_cst = float(0xc.p20);
0144   constexpr float inv_log2f = float(0x1.715476p0);
0145   constexpr float log2H = float(0xb.172p-4);
0146   constexpr float log2L = float(0x1.7f7d1cp-20);
0147
0148   float y = x;
0149   // This is doing round(x*inv_log2f) to the nearest integer
0150   float z = fpfloor((x * inv_log2f) + 0.5f);
0151   // Cody-and-Waite accurate range reduction. FMA-safe.
0152   y -= z * log2H;
0153   y -= z * log2L;
0154   // exponent
0155   int32_t e = z;
0156
0157   // we want RN above because it centers the interval around zero
0158   // but then we could have 2^e = below being infinity when it shouldn't
0159   // (when e=128 but p<1)
0160   // so we avoid this case by reducing e and evaluating a polynomial for 2*exp
0161   e -= 1;
0162
0163   // NaN inputs will propagate to the output as expected
0164
0165   float p = approx_expf_P<DEGREE>(y);
0166
0167   // cout << "x=" << x << "  e=" << e << "  y=" << y << "  p=" << p <<"\n";
0168   binary32 ef;
0169   uint32_t biased_exponent = e + 127;
0170   ef.ui32 = (biased_exponent << 23);
0171
0172   return p * ef.f;
0173 }
0174
0175 #ifndef NO_APPROX_MATH
0176
0177 template <int DEGREE>
0178 constexpr float unsafe_expf(float x) {
0179   return unsafe_expf_impl<DEGREE>(x);
0180 }
0181
0182 template <int DEGREE>
0183 constexpr float approx_expf(float x) {
0184   constexpr float inf_threshold = float(0x5.8b90cp4);
0185   // log of the smallest normal
0186   constexpr float zero_threshold_ftz = -float(0x5.75628p4);  // sollya: single(log(1b-126));
0187   // flush to zero on the output
0188   // manage infty output:
0189   // faster than directly on output!
0190   x = std::min(std::max(x, zero_threshold_ftz), inf_threshold);
0191   float r = unsafe_expf<DEGREE>(x);
0192
0193   return r;
0194 }
0195
0196 #else
0197 template <int DEGREE>
0198 constexpr float unsafe_expf(float x) {
0199   return std::exp(x);
0200 }
0201 template <int DEGREE>
0202 constexpr float approx_expf(float x) {
0203   return std::exp(x);
0204 }
0205 #endif  // NO_APPROX_MATH
0206
0207 #endif