File indexing completed on 2024-10-19 04:58:19
0001 #ifndef DataFormatsMathAPPROX_EXP_H
0002 #define DataFormatsMathAPPROX_EXP_H
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036 #include "DataFormats/Math/interface/approx_math.h"
0037
0038 template <int DEGREE>
0039 constexpr float approx_expf_P(float p);
0040
0041
0042 template <>
0043 constexpr float approx_expf_P<2>(float y) {
0044 return float(0x2.p0) + y * (float(0x2.07b99p0) + y * float(0x1.025b84p0));
0045 }
0046
0047 template <>
0048 constexpr float approx_expf_P<3>(float y) {
0049 #ifdef HORNER
0050 return float(0x2.p0) + y * (float(0x1.fff798p0) + y * (float(0x1.02249p0) + y * float(0x5.62042p-4)));
0051 #else
0052 float p23 = (float(0x1.02249p0) + y * float(0x5.62042p-4));
0053 float p01 = float(0x2.p0) + y * float(0x1.fff798p0);
0054 return p01 + y * y * p23;
0055 #endif
0056 }
0057
0058 template <>
0059 constexpr float approx_expf_P<4>(float y) {
0060 return float(0x2.p0) +
0061 y * (float(0x1.fffb1p0) + y * (float(0xf.ffe84p-4) + y * (float(0x5.5f9c1p-4) + y * float(0x1.57755p-4))));
0062 }
0063
0064 template <>
0065 constexpr float approx_expf_P<5>(float y) {
0066 return float(0x2.p0) +
0067 y * (float(0x2.p0) + y * (float(0xf.ffed8p-4) +
0068 y * (float(0x5.5551cp-4) + y * (float(0x1.5740d8p-4) + y * float(0x4.49368p-8)))));
0069 }
0070
0071 template <>
0072 constexpr float approx_expf_P<6>(float y) {
0073 #ifdef HORNER
0074 float p =
0075 float(0x2.p0) +
0076 y * (float(0x2.p0) +
0077 y * (float(0x1.p0) + y * (float(0x5.55523p-4) + y * (float(0x1.5554dcp-4) +
0078 y * (float(0x4.48f41p-8) + y * float(0xb.6ad4p-12))))));
0079 #else
0080 float p56 = float(0x4.48f41p-8) + y * float(0xb.6ad4p-12);
0081 float p34 = float(0x5.55523p-4) + y * float(0x1.5554dcp-4);
0082 float y2 = y * y;
0083 float p12 = float(0x2.p0) + y;
0084 float p36 = p34 + y2 * p56;
0085 float p16 = p12 + y2 * p36;
0086 float p = float(0x2.p0) + y * p16;
0087 #endif
0088 return p;
0089 }
0090
0091
0092 template <>
0093 constexpr float approx_expf_P<7>(float y) {
0094 return float(0x2.p0) +
0095 y * (float(0x2.p0) +
0096 y * (float(0x1.p0) +
0097 y * (float(0x5.55555p-4) +
0098 y * (float(0x1.5554e4p-4) +
0099 y * (float(0x4.444adp-8) + y * (float(0xb.6a8a6p-12) + y * float(0x1.9ec814p-12)))))));
0100 }
0101
0102
0103
0104
0105
0106
0107
0108
0109
0110
0111
0112
0113
0114
0115
0116
0117
0118
0119
0120
0121
0122
0123
0124 template <int DEGREE>
0125 #ifdef CMS_UNDEFINED_SANITIZER
0126
0127
0128
0129 __attribute__((no_sanitize("signed-integer-overflow")))
0130 #endif
0131 constexpr float
0132 unsafe_expf_impl(float x) {
0133 using namespace approx_math;
0134
0135
0136
0137
0138
0139
0140
0141
0142
0143
0144 constexpr float inv_log2f = float(0x1.715476p0);
0145 constexpr float log2H = float(0xb.172p-4);
0146 constexpr float log2L = float(0x1.7f7d1cp-20);
0147
0148 float y = x;
0149
0150 float z = fpfloor((x * inv_log2f) + 0.5f);
0151
0152 y -= z * log2H;
0153 y -= z * log2L;
0154
0155 int32_t e = z;
0156
0157
0158
0159
0160
0161 e -= 1;
0162
0163
0164
0165 float p = approx_expf_P<DEGREE>(y);
0166
0167
0168 binary32 ef;
0169 uint32_t biased_exponent = e + 127;
0170 ef.ui32 = (biased_exponent << 23);
0171
0172 return p * ef.f;
0173 }
0174
0175 #ifndef NO_APPROX_MATH
0176
0177 template <int DEGREE>
0178 constexpr float unsafe_expf(float x) {
0179 return unsafe_expf_impl<DEGREE>(x);
0180 }
0181
0182 template <int DEGREE>
0183 constexpr float approx_expf(float x) {
0184 constexpr float inf_threshold = float(0x5.8b90cp4);
0185
0186 constexpr float zero_threshold_ftz = -float(0x5.75628p4);
0187
0188
0189
0190 x = std::min(std::max(x, zero_threshold_ftz), inf_threshold);
0191 float r = unsafe_expf<DEGREE>(x);
0192
0193 return r;
0194 }
0195
0196 #else
0197 template <int DEGREE>
0198 constexpr float unsafe_expf(float x) {
0199 return std::exp(x);
0200 }
0201 template <int DEGREE>
0202 constexpr float approx_expf(float x) {
0203 return std::exp(x);
0204 }
0205 #endif
0206
0207 #endif