File indexing completed on 2024-04-06 12:04:41
0001 #ifndef DataFormatsMathAPPROX_EXP_H
0002 #define DataFormatsMathAPPROX_EXP_H
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036 #include "DataFormats/Math/interface/approx_math.h"
0037
0038 template <int DEGREE>
0039 constexpr float approx_expf_P(float p);
0040
0041
0042 template <>
0043 constexpr float approx_expf_P<2>(float y) {
0044 return float(0x2.p0) + y * (float(0x2.07b99p0) + y * float(0x1.025b84p0));
0045 }
0046
0047 template <>
0048 constexpr float approx_expf_P<3>(float y) {
0049 #ifdef HORNER
0050 return float(0x2.p0) + y * (float(0x1.fff798p0) + y * (float(0x1.02249p0) + y * float(0x5.62042p-4)));
0051 #else
0052 float p23 = (float(0x1.02249p0) + y * float(0x5.62042p-4));
0053 float p01 = float(0x2.p0) + y * float(0x1.fff798p0);
0054 return p01 + y * y * p23;
0055 #endif
0056 }
0057
0058 template <>
0059 constexpr float approx_expf_P<4>(float y) {
0060 return float(0x2.p0) +
0061 y * (float(0x1.fffb1p0) + y * (float(0xf.ffe84p-4) + y * (float(0x5.5f9c1p-4) + y * float(0x1.57755p-4))));
0062 }
0063
0064 template <>
0065 constexpr float approx_expf_P<5>(float y) {
0066 return float(0x2.p0) +
0067 y * (float(0x2.p0) + y * (float(0xf.ffed8p-4) +
0068 y * (float(0x5.5551cp-4) + y * (float(0x1.5740d8p-4) + y * float(0x4.49368p-8)))));
0069 }
0070
0071 template <>
0072 constexpr float approx_expf_P<6>(float y) {
0073 #ifdef HORNER
0074 float p =
0075 float(0x2.p0) +
0076 y * (float(0x2.p0) +
0077 y * (float(0x1.p0) + y * (float(0x5.55523p-4) + y * (float(0x1.5554dcp-4) +
0078 y * (float(0x4.48f41p-8) + y * float(0xb.6ad4p-12))))));
0079 #else
0080 float p56 = float(0x4.48f41p-8) + y * float(0xb.6ad4p-12);
0081 float p34 = float(0x5.55523p-4) + y * float(0x1.5554dcp-4);
0082 float y2 = y * y;
0083 float p12 = float(0x2.p0) + y;
0084 float p36 = p34 + y2 * p56;
0085 float p16 = p12 + y2 * p36;
0086 float p = float(0x2.p0) + y * p16;
0087 #endif
0088 return p;
0089 }
0090
0091
0092 template <>
0093 constexpr float approx_expf_P<7>(float y) {
0094 return float(0x2.p0) +
0095 y * (float(0x2.p0) +
0096 y * (float(0x1.p0) +
0097 y * (float(0x5.55555p-4) +
0098 y * (float(0x1.5554e4p-4) +
0099 y * (float(0x4.444adp-8) + y * (float(0xb.6a8a6p-12) + y * float(0x1.9ec814p-12)))))));
0100 }
0101
0102
0103
0104
0105
0106
0107
0108
0109
0110
0111
0112
0113
0114
0115
0116
0117
0118
0119
0120
0121
0122
0123
0124 template <int DEGREE>
0125 constexpr float unsafe_expf_impl(float x) {
0126 using namespace approx_math;
0127
0128
0129
0130
0131
0132
0133
0134
0135
0136
0137 constexpr float inv_log2f = float(0x1.715476p0);
0138 constexpr float log2H = float(0xb.172p-4);
0139 constexpr float log2L = float(0x1.7f7d1cp-20);
0140
0141 float y = x;
0142
0143 float z = fpfloor((x * inv_log2f) + 0.5f);
0144
0145 y -= z * log2H;
0146 y -= z * log2L;
0147
0148 int32_t e = z;
0149
0150
0151
0152
0153
0154 e -= 1;
0155
0156
0157
0158 float p = approx_expf_P<DEGREE>(y);
0159
0160
0161 binary32 ef;
0162 uint32_t biased_exponent = e + 127;
0163 ef.ui32 = (biased_exponent << 23);
0164
0165 return p * ef.f;
0166 }
0167
0168 #ifndef NO_APPROX_MATH
0169
0170 template <int DEGREE>
0171 constexpr float unsafe_expf(float x) {
0172 return unsafe_expf_impl<DEGREE>(x);
0173 }
0174
0175 template <int DEGREE>
0176 constexpr float approx_expf(float x) {
0177 constexpr float inf_threshold = float(0x5.8b90cp4);
0178
0179 constexpr float zero_threshold_ftz = -float(0x5.75628p4);
0180
0181
0182
0183 x = std::min(std::max(x, zero_threshold_ftz), inf_threshold);
0184 float r = unsafe_expf<DEGREE>(x);
0185
0186 return r;
0187 }
0188
0189 #else
0190 template <int DEGREE>
0191 constexpr float unsafe_expf(float x) {
0192 return std::exp(x);
0193 }
0194 template <int DEGREE>
0195 constexpr float approx_expf(float x) {
0196 return std::exp(x);
0197 }
0198 #endif
0199
0200 #endif