Math/interface/FastMath.h

0001 #ifndef DataFormats_Math_FastMath_h
0002 #define DataFormats_Math_FastMath_h
0003 // faster function will a limited precision
0004
0005 #include <cmath>
0006 #include <utility>
0007 #ifdef __SSE2__
0008 #include <emmintrin.h>
0009 #endif
0010 namespace fastmath {
0011   inline float invSqrt(float in) {
0012 #ifndef __SSE2__
0013     return 1.f / std::sqrt(in);
0014 #else
0015     float out;
0016     _mm_store_ss(&out, _mm_rsqrt_ss(_mm_load_ss(&in)));  // compiles to movss, rsqrtss, movss
0017     // return out; // already good enough!
0018     return out * (1.5f - 0.5f * in * out * out);  // One (more?) round of Newton's method
0019 #endif
0020   }
0021
0022   inline double invSqrt(double in) { return 1. / std::sqrt(in); }
0023
0024 }  // namespace fastmath
0025
0026 namespace fastmath_details {
0027   const double _2pi = (2.0 * 3.1415926535897932384626434);
0028   const float _2pif = float(_2pi);
0029   extern float atanbuf_[257 * 2];
0030   extern double datanbuf_[513 * 2];
0031 }  // namespace fastmath_details
0032
0033 namespace fastmath {
0034
0035   // =====================================================================
0036   // arctan, single-precision; returns phi and r (or 1/r if overR=true)
0037   // =====================================================================
0038   inline std::pair<float, float> atan2r(float y_, float x_, bool overR = false) {
0039     using namespace fastmath_details;
0040     float mag2 = x_ * x_ + y_ * y_;
0041     if (!(mag2 > 0)) {
0042       return std::pair<float, float>(0.f, 0.f);
0043     }  // degenerate case
0044
0045     // float r_ = std::sqrt(mag2);
0046     float rinv = invSqrt(mag2);
0047     unsigned int flags = 0;
0048     float x, y;
0049     union {
0050       float f;
0051       int i;
0052     } yp;
0053     yp.f = 32768.f;
0054     if (y_ < 0) {
0055       flags |= 4;
0056       y_ = -y_;
0057     }
0058     if (x_ < 0) {
0059       flags |= 2;
0060       x_ = -x_;
0061     }
0062     if (y_ > x_) {
0063       flags |= 1;
0064       x = rinv * y_;
0065       y = rinv * x_;
0066       yp.f += y;
0067     } else {
0068       x = rinv * x_;
0069       y = rinv * y_;
0070       yp.f += y;
0071     }
0072     int ind = (yp.i & 0x01FF) * 2;
0073
0074     float* asbuf = (float*)(atanbuf_ + ind);
0075     float sv = yp.f - 32768.f;
0076     float cv = asbuf[0];
0077     float asv = asbuf[1];
0078     sv = y * cv - x * sv;  // delta sin value
0079     // ____ compute arcsin directly
0080     float asvd = 6.f + sv * sv;
0081     sv *= float(1.0f / 6.0f);
0082     float th = asv + asvd * sv;
0083     if (flags & 1) {
0084       th = (_2pif / 4.f) - th;
0085     }
0086     if (flags & 2) {
0087       th = (_2pif / 2.f) - th;
0088     }
0089     if (flags & 4) {
0090       th = -th;
0091     }
0092     return std::pair<float, float>(th, overR ? rinv : rinv * mag2);
0093   }
0094
0095   // =====================================================================
0096   // arctan, double-precision; returns phi and r (or 1/r if overR=true)
0097   // =====================================================================
0098   inline std::pair<double, double> atan2r(double y_, double x_, bool overR = false) {
0099     using namespace fastmath_details;
0100     // assert(ataninited);
0101     double mag2 = x_ * x_ + y_ * y_;
0102     if (!(mag2 > 0)) {
0103       return std::pair<double, double>(0., 0.);
0104     }  // degenerate case
0105
0106     double r_ = std::sqrt(mag2);
0107     double rinv = 1. / r_;
0108     unsigned int flags = 0;
0109     double x, y;
0110     const double _2p43 = 65536.0 * 65536.0 * 2048.0;
0111     union {
0112       double d;
0113       int i[2];
0114     } yp;
0115
0116     yp.d = _2p43;
0117     if (y_ < 0) {
0118       flags |= 4;
0119       y_ = -y_;
0120     }
0121     if (x_ < 0) {
0122       flags |= 2;
0123       x_ = -x_;
0124     }
0125     if (y_ > x_) {
0126       flags |= 1;
0127       x = rinv * y_;
0128       y = rinv * x_;
0129       yp.d += y;
0130     } else {
0131       x = rinv * x_;
0132       y = rinv * y_;
0133       yp.d += y;
0134     }
0135
0136     int ind = (yp.i[0] & 0x03FF) * 2;  // 0 for little indian
0137
0138     double* dasbuf = (double*)(datanbuf_ + ind);
0139     double sv = yp.d - _2p43;  // index fraction
0140     double cv = dasbuf[0];
0141     double asv = dasbuf[1];
0142     sv = y * cv - x * sv;  // delta sin value
0143     // double sv = y *(cv-x);
0144     // ____ compute arcsin directly
0145     double asvd = 6 + sv * sv;
0146     sv *= double(1.0 / 6.0);
0147     double th = asv + asvd * sv;
0148     if (flags & 1) {
0149       th = (_2pi / 4) - th;
0150     }
0151     if (flags & 2) {
0152       th = (_2pi / 2) - th;
0153     }
0154     if (flags & 4) {
0155       th = -th;
0156     }
0157     return std::pair<double, double>(th, overR ? rinv : r_);
0158   }
0159
0160   // return eta phi saving some computation
0161   template <typename T>
0162   inline std::pair<T, T> etaphi(T x, T y, T z) {
0163     std::pair<T, T> por = atan2r(y, x, true);
0164     x = z * por.second;
0165     return std::pair<float, float>(std::log(x + std::sqrt(x * x + T(1))), por.first);
0166   }
0167
0168 }  // namespace fastmath
0169
0170 #endif