Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-07-18 23:17:37

0001 #ifndef DataFormatsMathAPPROX_LOG_H
0002 #define DataFormatsMathAPPROX_LOG_H
0003 /*  Quick and dirty, branchless, log implementations
0004     Author: Florent de Dinechin, Aric, ENS-Lyon 
0005     All right reserved
0006 
0007 Warning + disclaimers:
0008 - no special case handling (infinite/NaN inputs, even zero input, etc)
0009 - no input subnormal handling, you'll get completely wrong results.
0010   This is the worst problem IMHO (leading to very rare but very bad bugs)
0011   However it is probable you can guarantee that your input numbers 
0012   are never subnormal, check that. Otherwise I'll fix it...
0013 - output accuracy reported is only absolute. 
0014   Relative accuracy may be arbitrary bad around log(1), 
0015   especially for approx_log0. approx_logf is more or less OK.
0016 - The larger/smaller the input x (i.e. away from 1), the better the accuracy.
0017 - For the higher degree polynomials it is possible to win a few cycles 
0018   by parallelizing the evaluation of the polynomial (Estrin). 
0019   It doesn't make much sense if you want to make a vector function. 
0020 - All this code is FMA-safe (and accelerated by FMA)
0021  
0022 Feel free to distribute or insert in other programs etc, as long as this notice is attached.
0023     Comments, requests etc: Florent.de.Dinechin@ens-lyon.fr
0024 
0025 Polynomials were obtained using Sollya scripts (in comments): 
0026 please also keep these comments attached to the code of approx_logf. 
0027 */
0028 
0029 #include "DataFormats/Math/interface/approx_math.h"
0030 
0031 template <int DEGREE>
0032 constexpr float approx_logf_P(float p);
0033 
0034 // the following is Sollya output
0035 
0036 // degree =  2   => absolute accuracy is  7 bits
0037 template <>
0038 constexpr float approx_logf_P<2>(float y) {
0039   return y * (float(0x1.0671c4p0) + y * (float(-0x7.27744p-4)));
0040 }
0041 
0042 // degree =  3   => absolute accuracy is  10 bits
0043 template <>
0044 constexpr float approx_logf_P<3>(float y) {
0045   return y * (float(0x1.013354p0) + y * (-float(0x8.33006p-4) + y * float(0x4.0d16cp-4)));
0046 }
0047 
0048 // degree =  4   => absolute accuracy is  13 bits
0049 template <>
0050 constexpr float approx_logf_P<4>(float y) {
0051   return y *
0052          (float(0xf.ff5bap-4) + y * (-float(0x8.13e5ep-4) + y * (float(0x5.826ep-4) + y * (-float(0x2.e87fb8p-4)))));
0053 }
0054 
0055 // degree =  5   => absolute accuracy is  16 bits
0056 template <>
0057 constexpr float approx_logf_P<5>(float y) {
0058   return y * (float(0xf.ff652p-4) +
0059               y * (-float(0x8.0048ap-4) +
0060                    y * (float(0x5.72782p-4) + y * (-float(0x4.20904p-4) + y * float(0x2.1d7fd8p-4)))));
0061 }
0062 
0063 // degree =  6   => absolute accuracy is  19 bits
0064 template <>
0065 constexpr float approx_logf_P<6>(float y) {
0066   return y * (float(0xf.fff14p-4) +
0067               y * (-float(0x7.ff4bfp-4) +
0068                    y * (float(0x5.582f6p-4) +
0069                         y * (-float(0x4.1dcf2p-4) + y * (float(0x3.3863f8p-4) + y * (-float(0x1.9288d4p-4)))))));
0070 }
0071 
0072 // degree =  7   => absolute accuracy is  21 bits
0073 template <>
0074 constexpr float approx_logf_P<7>(float y) {
0075   return y * (float(0x1.000034p0) +
0076               y * (-float(0x7.ffe57p-4) +
0077                    y * (float(0x5.5422ep-4) +
0078                         y * (-float(0x4.037a6p-4) +
0079                              y * (float(0x3.541c88p-4) + y * (-float(0x2.af842p-4) + y * float(0x1.48b3d8p-4)))))));
0080 }
0081 
0082 // degree =  8   => absolute accuracy is  24 bits
0083 template <>
0084 constexpr float approx_logf_P<8>(float y) {
0085   return y *
0086          (float(0x1.00000cp0) +
0087           y * (float(-0x8.0003p-4) +
0088                y * (float(0x5.55087p-4) +
0089                     y * (float(-0x3.fedcep-4) +
0090                          y * (float(0x3.3a1dap-4) +
0091                               y * (float(-0x2.cb55fp-4) + y * (float(0x2.38831p-4) + y * (float(-0xf.e87cap-8)))))))));
0092 }
0093 
0094 template <int DEGREE>
0095 constexpr float unsafe_logf_impl(float x) {
0096   using namespace approx_math;
0097 
0098   binary32 xx, m;
0099   xx.f = x;
0100 
0101   // as many integer computations as possible, most are 1-cycle only, and lots of ILP.
0102   int e = (((xx.i32) >> 23) & 0xFF) - 127;     // extract exponent
0103   m.i32 = (xx.i32 & 0x007FFFFF) | 0x3F800000;  // extract mantissa as an FP number
0104 
0105   int adjust = (xx.i32 >> 22) & 1;  // first bit of the mantissa, tells us if 1.m > 1.5
0106   m.i32 -= adjust << 23;            // if so, divide 1.m by 2 (exact operation, no rounding)
0107   e += adjust;                      // and update exponent so we still have x=2^E*y
0108 
0109   // now back to floating-point
0110   float y = m.f - 1.0f;  // Sterbenz-exact; cancels but we don't care about output relative error
0111   // all the computations so far were free of rounding errors...
0112 
0113   // the following is based on Sollya output
0114   float p = approx_logf_P<DEGREE>(y);
0115 
0116   constexpr float Log2 = 0xb.17218p-4;  // 0.693147182464599609375
0117   return float(e) * Log2 + p;
0118 }
0119 
0120 #ifndef NO_APPROX_MATH
0121 template <int DEGREE>
0122 constexpr float unsafe_logf(float x) {
0123   return unsafe_logf_impl<DEGREE>(x);
0124 }
0125 
0126 template <int DEGREE>
0127 constexpr float approx_logf(float x) {
0128   using namespace approx_math;
0129 
0130   constexpr float MAXNUMF = 3.4028234663852885981170418348451692544e38f;
0131 
0132   //x = std::max(std::min(x,MAXNUMF),0.f);
0133   float res = unsafe_logf<DEGREE>(x);
0134   res = (x < MAXNUMF) ? res : std::numeric_limits<float>::max();
0135   return (x > 0) ? res : std::numeric_limits<float>::quiet_NaN();
0136 }
0137 
0138 #else
0139 template <int DEGREE>
0140 constexpr float unsafe_logf(float x) {
0141   return std::log(x);
0142 }
0143 
0144 template <int DEGREE>
0145 constexpr float approx_logf(float x) {
0146   return std::log(x);
0147 }
0148 
0149 #endif  // NO_APPROX_MATH
0150 
0151 #endif