interface/private/AVXVec.h

0001 #ifndef DataFormat_Math_AVXVec_H
0002 #define DataFormat_Math_AVXVec_H
0003
0004 // in principle it should not be used alone
0005 // only as part of SSEVec
0006 namespace mathSSE {
0007
0008   template <>
0009   union Vec4<double> {
0010     typedef __m256d nativeType;
0011     __m256d vec;
0012     double __attribute__((aligned(32))) arr[4];
0013     OldVec<double> o;
0014
0015     Vec4(__m256d ivec) : vec(ivec) {}
0016
0017     Vec4(OldVec<double> const& ivec) : o(ivec) {}
0018
0019     Vec4() { vec = _mm256_setzero_pd(); }
0020
0021     inline Vec4(Vec4<float> ivec) { vec = _mm256_cvtps_pd(ivec.vec); }
0022
0023     explicit Vec4(double f1) { set1(f1); }
0024
0025     Vec4(double f1, double f2, double f3, double f4 = 0) {
0026       arr[0] = f1;
0027       arr[1] = f2;
0028       arr[2] = f3;
0029       arr[3] = f4;
0030     }
0031
0032     Vec4(Vec2<double> ivec0, Vec2<double> ivec1) { vec = _mm256_set_m128d(ivec1.vec, ivec0.vec); }
0033
0034     Vec4(Vec2<double> ivec0, double f3, double f4 = 0) {
0035       vec = _mm256_insertf128_pd(vec, ivec0.vec, 0);
0036       arr[2] = f3;
0037       arr[3] = f4;
0038     }
0039
0040     Vec4(Vec2<double> ivec0) {
0041       vec = _mm256_setzero_pd();
0042       vec = _mm256_insertf128_pd(vec, ivec0.vec, 0);
0043     }
0044
0045     // for masking
0046     void setMask(unsigned int m1, unsigned int m2, unsigned int m3, unsigned int m4) {
0047       Mask4<double> mask(m1, m2, m3, m4);
0048       vec = mask.vec;
0049     }
0050
0051     void set(double f1, double f2, double f3, double f4 = 0) { vec = _mm256_set_pd(f4, f3, f2, f1); }
0052
0053     void set1(double f1) { vec = _mm256_set1_pd(f1); }
0054
0055     template <int N>
0056     Vec4 get1() const {
0057       return _mm256_set1_pd(arr[N]);  //FIXME
0058     }
0059     /*
0060     Vec4 get1(unsigned int n) const {
0061       return _mm256_set1_pd(arr[n]); //FIXME
0062     }
0063     */
0064     double& operator[](unsigned int n) { return arr[n]; }
0065
0066     double operator[](unsigned int n) const { return arr[n]; }
0067
0068     Vec2<double> xy() const { return Vec2<double>(_mm256_castpd256_pd128(vec)); }
0069     Vec2<double> zw() const { return Vec2<double>(_mm256_castpd256_pd128(_mm256_permute2f128_pd(vec, vec, 1))); }
0070   };
0071
0072   inline Vec4<float>::Vec4(Vec4<double> ivec) { vec = _mm256_cvtpd_ps(ivec.vec); }
0073 }  // namespace mathSSE
0074
0075 inline bool operator==(mathSSE::Vec4<double> a, mathSSE::Vec4<double> b) {
0076   return _mm256_movemask_pd(_mm256_cmp_pd(a.vec, b.vec, _CMP_EQ_OS)) == 0xf;
0077 }
0078
0079 inline mathSSE::Vec4<double> cmpeq(mathSSE::Vec4<double> a, mathSSE::Vec4<double> b) {
0080   return _mm256_cmp_pd(a.vec, b.vec, _CMP_EQ_OS);
0081 }
0082
0083 inline mathSSE::Vec4<double> cmpgt(mathSSE::Vec4<double> a, mathSSE::Vec4<double> b) {
0084   return _mm256_cmp_pd(a.vec, b.vec, _CMP_GT_OS);
0085 }
0086
0087 inline mathSSE::Vec4<double> hadd(mathSSE::Vec4<double> a, mathSSE::Vec4<double> b) {
0088   return _mm256_hadd_pd(a.vec, b.vec);
0089 }
0090
0091 inline mathSSE::Vec4<double> operator-(mathSSE::Vec4<double> a) {
0092   const __m256d neg = _mm256_set_pd(-0.0, -0.0, -0.0, -0.0);
0093   return _mm256_xor_pd(a.vec, neg);
0094 }
0095
0096 inline mathSSE::Vec4<double> operator&(mathSSE::Vec4<double> a, mathSSE::Vec4<double> b) {
0097   return _mm256_and_pd(a.vec, b.vec);
0098 }
0099 inline mathSSE::Vec4<double> operator|(mathSSE::Vec4<double> a, mathSSE::Vec4<double> b) {
0100   return _mm256_or_pd(a.vec, b.vec);
0101 }
0102 inline mathSSE::Vec4<double> operator^(mathSSE::Vec4<double> a, mathSSE::Vec4<double> b) {
0103   return _mm256_xor_pd(a.vec, b.vec);
0104 }
0105 inline mathSSE::Vec4<double> andnot(mathSSE::Vec4<double> a, mathSSE::Vec4<double> b) {
0106   return _mm256_andnot_pd(a.vec, b.vec);
0107 }
0108
0109 inline mathSSE::Vec4<double> operator+(mathSSE::Vec4<double> a, mathSSE::Vec4<double> b) {
0110   return _mm256_add_pd(a.vec, b.vec);
0111 }
0112
0113 inline mathSSE::Vec4<double> operator-(mathSSE::Vec4<double> a, mathSSE::Vec4<double> b) {
0114   return _mm256_sub_pd(a.vec, b.vec);
0115 }
0116
0117 inline mathSSE::Vec4<double> operator*(mathSSE::Vec4<double> a, mathSSE::Vec4<double> b) {
0118   return _mm256_mul_pd(a.vec, b.vec);
0119 }
0120
0121 inline mathSSE::Vec4<double> operator/(mathSSE::Vec4<double> a, mathSSE::Vec4<double> b) {
0122   return _mm256_div_pd(a.vec, b.vec);
0123 }
0124
0125 inline mathSSE::Vec4<double> operator*(double a, mathSSE::Vec4<double> b) {
0126   return _mm256_mul_pd(_mm256_set1_pd(a), b.vec);
0127 }
0128
0129 inline mathSSE::Vec4<double> operator*(mathSSE::Vec4<double> b, double a) {
0130   return _mm256_mul_pd(_mm256_set1_pd(a), b.vec);
0131 }
0132
0133 inline mathSSE::Vec4<double> operator/(mathSSE::Vec4<double> b, double a) {
0134   return _mm256_div_pd(b.vec, _mm256_set1_pd(a));
0135 }
0136
0137 inline double __attribute__((always_inline)) __attribute__((pure)) dot(mathSSE::Vec4<double> a,
0138                                                                        mathSSE::Vec4<double> b) {
0139   using mathSSE::_mm256_dot_pd;
0140   mathSSE::Vec4<double> ret;
0141   ret.vec = _mm256_dot_pd(a.vec, b.vec);
0142   return ret.arr[0];
0143 }
0144
0145 inline mathSSE::Vec4<double> __attribute__((always_inline)) __attribute__((pure)) cross(mathSSE::Vec4<double> a,
0146                                                                                         mathSSE::Vec4<double> b) {
0147   using mathSSE::_mm256_cross_pd;
0148   return _mm256_cross_pd(a.vec, b.vec);
0149 }
0150
0151 inline double __attribute__((always_inline)) __attribute__((pure)) dotxy(mathSSE::Vec4<double> a,
0152                                                                          mathSSE::Vec4<double> b) {
0153   mathSSE::Vec4<double> mul = a * b;
0154   mul = hadd(mul, mul);
0155   return mul.arr[0];
0156 }
0157
0158 #endif