Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2021-09-07 22:41:48

0001 #ifndef DataFormat_Math_AVXVec_H
0002 #define DataFormat_Math_AVXVec_H
0003 
0004 // in principle it should not be used alone
0005 // only as part of SSEVec
0006 namespace mathSSE {
0007 
0008   template <>
0009   union Vec4<double> {
0010     typedef __m256d nativeType;
0011     __m256d vec;
0012     double __attribute__((aligned(32))) arr[4];
0013     OldVec<double> o;
0014 
0015     Vec4(__m256d ivec) : vec(ivec) {}
0016 
0017     Vec4(OldVec<double> const& ivec) : o(ivec) {}
0018 
0019     Vec4() { vec = _mm256_setzero_pd(); }
0020 
0021     inline Vec4(Vec4<float> ivec) { vec = _mm256_cvtps_pd(ivec.vec); }
0022 
0023     explicit Vec4(double f1) { set1(f1); }
0024 
0025     Vec4(double f1, double f2, double f3, double f4 = 0) {
0026       arr[0] = f1;
0027       arr[1] = f2;
0028       arr[2] = f3;
0029       arr[3] = f4;
0030     }
0031 
0032     Vec4(Vec2<double> ivec0, Vec2<double> ivec1) {
0033       vec = _mm256_insertf128_pd(vec, ivec0.vec, 0);
0034       vec = _mm256_insertf128_pd(vec, ivec1.vec, 1);
0035     }
0036 
0037     Vec4(Vec2<double> ivec0, double f3, double f4 = 0) {
0038       vec = _mm256_insertf128_pd(vec, ivec0.vec, 0);
0039       arr[2] = f3;
0040       arr[3] = f4;
0041     }
0042 
0043     Vec4(Vec2<double> ivec0) {
0044       vec = _mm256_setzero_pd();
0045       vec = _mm256_insertf128_pd(vec, ivec0.vec, 0);
0046     }
0047 
0048     // for masking
0049     void setMask(unsigned int m1, unsigned int m2, unsigned int m3, unsigned int m4) {
0050       Mask4<double> mask(m1, m2, m3, m4);
0051       vec = mask.vec;
0052     }
0053 
0054     void set(double f1, double f2, double f3, double f4 = 0) { vec = _mm256_set_pd(f4, f3, f2, f1); }
0055 
0056     void set1(double f1) { vec = _mm256_set1_pd(f1); }
0057 
0058     template <int N>
0059     Vec4 get1() const {
0060       return _mm256_set1_pd(arr[N]);  //FIXME
0061     }
0062     /*
0063     Vec4 get1(unsigned int n) const { 
0064       return _mm256_set1_pd(arr[n]); //FIXME
0065     }
0066     */
0067     double& operator[](unsigned int n) { return arr[n]; }
0068 
0069     double operator[](unsigned int n) const { return arr[n]; }
0070 
0071     Vec2<double> xy() const { return Vec2<double>(_mm256_castpd256_pd128(vec)); }
0072     Vec2<double> zw() const { return Vec2<double>(_mm256_castpd256_pd128(_mm256_permute2f128_pd(vec, vec, 1))); }
0073   };
0074 
0075   inline Vec4<float>::Vec4(Vec4<double> ivec) { vec = _mm256_cvtpd_ps(ivec.vec); }
0076 }  // namespace mathSSE
0077 
0078 inline bool operator==(mathSSE::Vec4<double> a, mathSSE::Vec4<double> b) {
0079   return _mm256_movemask_pd(_mm256_cmp_pd(a.vec, b.vec, _CMP_EQ_OS)) == 0xf;
0080 }
0081 
0082 inline mathSSE::Vec4<double> cmpeq(mathSSE::Vec4<double> a, mathSSE::Vec4<double> b) {
0083   return _mm256_cmp_pd(a.vec, b.vec, _CMP_EQ_OS);
0084 }
0085 
0086 inline mathSSE::Vec4<double> cmpgt(mathSSE::Vec4<double> a, mathSSE::Vec4<double> b) {
0087   return _mm256_cmp_pd(a.vec, b.vec, _CMP_GT_OS);
0088 }
0089 
0090 inline mathSSE::Vec4<double> hadd(mathSSE::Vec4<double> a, mathSSE::Vec4<double> b) {
0091   return _mm256_hadd_pd(a.vec, b.vec);
0092 }
0093 
0094 inline mathSSE::Vec4<double> operator-(mathSSE::Vec4<double> a) {
0095   const __m256d neg = _mm256_set_pd(-0.0, -0.0, -0.0, -0.0);
0096   return _mm256_xor_pd(a.vec, neg);
0097 }
0098 
0099 inline mathSSE::Vec4<double> operator&(mathSSE::Vec4<double> a, mathSSE::Vec4<double> b) {
0100   return _mm256_and_pd(a.vec, b.vec);
0101 }
0102 inline mathSSE::Vec4<double> operator|(mathSSE::Vec4<double> a, mathSSE::Vec4<double> b) {
0103   return _mm256_or_pd(a.vec, b.vec);
0104 }
0105 inline mathSSE::Vec4<double> operator^(mathSSE::Vec4<double> a, mathSSE::Vec4<double> b) {
0106   return _mm256_xor_pd(a.vec, b.vec);
0107 }
0108 inline mathSSE::Vec4<double> andnot(mathSSE::Vec4<double> a, mathSSE::Vec4<double> b) {
0109   return _mm256_andnot_pd(a.vec, b.vec);
0110 }
0111 
0112 inline mathSSE::Vec4<double> operator+(mathSSE::Vec4<double> a, mathSSE::Vec4<double> b) {
0113   return _mm256_add_pd(a.vec, b.vec);
0114 }
0115 
0116 inline mathSSE::Vec4<double> operator-(mathSSE::Vec4<double> a, mathSSE::Vec4<double> b) {
0117   return _mm256_sub_pd(a.vec, b.vec);
0118 }
0119 
0120 inline mathSSE::Vec4<double> operator*(mathSSE::Vec4<double> a, mathSSE::Vec4<double> b) {
0121   return _mm256_mul_pd(a.vec, b.vec);
0122 }
0123 
0124 inline mathSSE::Vec4<double> operator/(mathSSE::Vec4<double> a, mathSSE::Vec4<double> b) {
0125   return _mm256_div_pd(a.vec, b.vec);
0126 }
0127 
0128 inline mathSSE::Vec4<double> operator*(double a, mathSSE::Vec4<double> b) {
0129   return _mm256_mul_pd(_mm256_set1_pd(a), b.vec);
0130 }
0131 
0132 inline mathSSE::Vec4<double> operator*(mathSSE::Vec4<double> b, double a) {
0133   return _mm256_mul_pd(_mm256_set1_pd(a), b.vec);
0134 }
0135 
0136 inline mathSSE::Vec4<double> operator/(mathSSE::Vec4<double> b, double a) {
0137   return _mm256_div_pd(b.vec, _mm256_set1_pd(a));
0138 }
0139 
0140 inline double __attribute__((always_inline)) __attribute__((pure))
0141 dot(mathSSE::Vec4<double> a, mathSSE::Vec4<double> b) {
0142   using mathSSE::_mm256_dot_pd;
0143   mathSSE::Vec4<double> ret;
0144   ret.vec = _mm256_dot_pd(a.vec, b.vec);
0145   return ret.arr[0];
0146 }
0147 
0148 inline mathSSE::Vec4<double> __attribute__((always_inline)) __attribute__((pure))
0149 cross(mathSSE::Vec4<double> a, mathSSE::Vec4<double> b) {
0150   using mathSSE::_mm256_cross_pd;
0151   return _mm256_cross_pd(a.vec, b.vec);
0152 }
0153 
0154 inline double __attribute__((always_inline)) __attribute__((pure))
0155 dotxy(mathSSE::Vec4<double> a, mathSSE::Vec4<double> b) {
0156   mathSSE::Vec4<double> mul = a * b;
0157   mul = hadd(mul, mul);
0158   return mul.arr[0];
0159 }
0160 
0161 #endif