1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
|
#ifndef DataFormatsMathAPPROX_EXP_H
#define DataFormatsMathAPPROX_EXP_H
/* Quick and not that dirty vectorizable exp implementations
Author: Florent de Dinechin, Aric, ENS-Lyon
with advice from Vincenzo Innocente, CERN
All right reserved
Warning + disclaimers:
Feel free to distribute or insert in other programs etc, as long as this notice is attached.
Comments, requests etc: Florent.de.Dinechin@ens-lyon.fr
Polynomials were obtained using Sollya scripts (in comments):
please also keep these comments attached to the code.
If a derivative of this code ends up in the glibc I am too happy: the version with MANAGE_SUBNORMALS=1 and DEGREE=6 is faithful-accurate over the full 2^32 binary32 numbers and behaves well WRT exceptional cases. It is about 4 times faster than the stock expf on this PC, when compiled with gcc -O2.
This code is FMA-safe (i.e. accelerated and more accurate with an FMA) as long as my parentheses are respected.
A remaining TODO is to try and manage the over/underflow using only integer tests as per Astasiev et al, RNC conf.
Not sure it makes that much sense in the vector context.
*/
// #define MANAGE_SUBNORMALS 1 // No measurable perf difference, so let's be clean.
// If set to zero we flush to zero the subnormal outputs, ie for x<-88 or so
// DEGREE
// 6 is perfect.
// 5 provides max 2-ulp error,
// 4 loses 44 ulps (6 bits) for an acceleration of 10% WRT 6
// (I don't subtract the loop and call overhead, so it would be more for constexprd code)
// see the comments in the code for the accuracy you get from a given degree
#include "DataFormats/Math/interface/approx_math.h"
template <int DEGREE>
constexpr float approx_expf_P(float p);
// degree = 2 => absolute accuracy is 8 bits
template <>
constexpr float approx_expf_P<2>(float y) {
return float(0x2.p0) + y * (float(0x2.07b99p0) + y * float(0x1.025b84p0));
}
// degree = 3 => absolute accuracy is 12 bits
template <>
constexpr float approx_expf_P<3>(float y) {
#ifdef HORNER // HORNER
return float(0x2.p0) + y * (float(0x1.fff798p0) + y * (float(0x1.02249p0) + y * float(0x5.62042p-4)));
#else // ESTRIN
float p23 = (float(0x1.02249p0) + y * float(0x5.62042p-4));
float p01 = float(0x2.p0) + y * float(0x1.fff798p0);
return p01 + y * y * p23;
#endif
}
// degree = 4 => absolute accuracy is 17 bits
template <>
constexpr float approx_expf_P<4>(float y) {
return float(0x2.p0) +
y * (float(0x1.fffb1p0) + y * (float(0xf.ffe84p-4) + y * (float(0x5.5f9c1p-4) + y * float(0x1.57755p-4))));
}
// degree = 5 => absolute accuracy is 22 bits
template <>
constexpr float approx_expf_P<5>(float y) {
return float(0x2.p0) +
y * (float(0x2.p0) + y * (float(0xf.ffed8p-4) +
y * (float(0x5.5551cp-4) + y * (float(0x1.5740d8p-4) + y * float(0x4.49368p-8)))));
}
// degree = 6 => absolute accuracy is 27 bits
template <>
constexpr float approx_expf_P<6>(float y) {
#ifdef HORNER // HORNER
float p =
float(0x2.p0) +
y * (float(0x2.p0) +
y * (float(0x1.p0) + y * (float(0x5.55523p-4) + y * (float(0x1.5554dcp-4) +
y * (float(0x4.48f41p-8) + y * float(0xb.6ad4p-12))))));
#else // ESTRIN does seem to save a cycle or two
float p56 = float(0x4.48f41p-8) + y * float(0xb.6ad4p-12);
float p34 = float(0x5.55523p-4) + y * float(0x1.5554dcp-4);
float y2 = y * y;
float p12 = float(0x2.p0) + y; // By chance we save one operation here! Funny.
float p36 = p34 + y2 * p56;
float p16 = p12 + y2 * p36;
float p = float(0x2.p0) + y * p16;
#endif
return p;
}
// degree = 7 => absolute accuracy is 31 bits
template <>
constexpr float approx_expf_P<7>(float y) {
return float(0x2.p0) +
y * (float(0x2.p0) +
y * (float(0x1.p0) +
y * (float(0x5.55555p-4) +
y * (float(0x1.5554e4p-4) +
y * (float(0x4.444adp-8) + y * (float(0xb.6a8a6p-12) + y * float(0x1.9ec814p-12)))))));
}
/* The Sollya script that computes the polynomials above
f= 2*exp(y);
I=[-log(2)/2;log(2)/2];
filename="/tmp/polynomials";
print("") > filename;
for deg from 2 to 8 do begin
p = fpminimax(f, deg,[|1,23...|],I, floating, absolute);
display=decimal;
acc=floor(-log2(sup(supnorm(p, f, I, absolute, 2^(-40)))));
print( " // degree = ", deg,
" => absolute accuracy is ", acc, "bits" ) >> filename;
print("#if ( DEGREE ==", deg, ")") >> filename;
display=hexadecimal;
print(" float p = ", horner(p) , ";") >> filename;
print("#endif") >> filename;
end;
*/
// valid for -87.3365 < x < 88.7228
template <int DEGREE>
#ifdef CMS_UNDEFINED_SANITIZER
//Supress UBSan runtime error about signed integer overflow: -2147483648 - 1
//This function is an unsafe implementation of expf and rely on overflow feature
//Vectorization is affected on x86_64 if change to unsigned int
__attribute__((no_sanitize("signed-integer-overflow")))
#endif
constexpr float
unsafe_expf_impl(float x) {
using namespace approx_math;
/* Sollya for the following constants:
display=hexadecimal;
1b23+1b22;
single(1/log(2));
log2H=round(log(2), 16, RN);
log2L = single(log(2)-log2H);
log2H; log2L;
*/
// constexpr float rnd_cst = float(0xc.p20);
constexpr float inv_log2f = float(0x1.715476p0);
constexpr float log2H = float(0xb.172p-4);
constexpr float log2L = float(0x1.7f7d1cp-20);
float y = x;
// This is doing round(x*inv_log2f) to the nearest integer
float z = fpfloor((x * inv_log2f) + 0.5f);
// Cody-and-Waite accurate range reduction. FMA-safe.
y -= z * log2H;
y -= z * log2L;
// exponent
int32_t e = z;
// we want RN above because it centers the interval around zero
// but then we could have 2^e = below being infinity when it shouldn't
// (when e=128 but p<1)
// so we avoid this case by reducing e and evaluating a polynomial for 2*exp
e -= 1;
// NaN inputs will propagate to the output as expected
float p = approx_expf_P<DEGREE>(y);
// cout << "x=" << x << " e=" << e << " y=" << y << " p=" << p <<"\n";
binary32 ef;
uint32_t biased_exponent = e + 127;
ef.ui32 = (biased_exponent << 23);
return p * ef.f;
}
#ifndef NO_APPROX_MATH
template <int DEGREE>
constexpr float unsafe_expf(float x) {
return unsafe_expf_impl<DEGREE>(x);
}
template <int DEGREE>
constexpr float approx_expf(float x) {
constexpr float inf_threshold = float(0x5.8b90cp4);
// log of the smallest normal
constexpr float zero_threshold_ftz = -float(0x5.75628p4); // sollya: single(log(1b-126));
// flush to zero on the output
// manage infty output:
// faster than directly on output!
x = std::min(std::max(x, zero_threshold_ftz), inf_threshold);
float r = unsafe_expf<DEGREE>(x);
return r;
}
#else
template <int DEGREE>
constexpr float unsafe_expf(float x) {
return std::exp(x);
}
template <int DEGREE>
constexpr float approx_expf(float x) {
return std::exp(x);
}
#endif // NO_APPROX_MATH
#endif
|