File indexing completed on 2024-04-06 12:04:40
0001 #ifndef DataFormat_Math_AVXVec_H
0002 #define DataFormat_Math_AVXVec_H
0003
0004
0005
0006 namespace mathSSE {
0007
0008 template <>
0009 union Vec4<double> {
0010 typedef __m256d nativeType;
0011 __m256d vec;
0012 double __attribute__((aligned(32))) arr[4];
0013 OldVec<double> o;
0014
0015 Vec4(__m256d ivec) : vec(ivec) {}
0016
0017 Vec4(OldVec<double> const& ivec) : o(ivec) {}
0018
0019 Vec4() { vec = _mm256_setzero_pd(); }
0020
0021 inline Vec4(Vec4<float> ivec) { vec = _mm256_cvtps_pd(ivec.vec); }
0022
0023 explicit Vec4(double f1) { set1(f1); }
0024
0025 Vec4(double f1, double f2, double f3, double f4 = 0) {
0026 arr[0] = f1;
0027 arr[1] = f2;
0028 arr[2] = f3;
0029 arr[3] = f4;
0030 }
0031
0032 Vec4(Vec2<double> ivec0, Vec2<double> ivec1) { vec = _mm256_set_m128d(ivec1.vec, ivec0.vec); }
0033
0034 Vec4(Vec2<double> ivec0, double f3, double f4 = 0) {
0035 vec = _mm256_insertf128_pd(vec, ivec0.vec, 0);
0036 arr[2] = f3;
0037 arr[3] = f4;
0038 }
0039
0040 Vec4(Vec2<double> ivec0) {
0041 vec = _mm256_setzero_pd();
0042 vec = _mm256_insertf128_pd(vec, ivec0.vec, 0);
0043 }
0044
0045
0046 void setMask(unsigned int m1, unsigned int m2, unsigned int m3, unsigned int m4) {
0047 Mask4<double> mask(m1, m2, m3, m4);
0048 vec = mask.vec;
0049 }
0050
0051 void set(double f1, double f2, double f3, double f4 = 0) { vec = _mm256_set_pd(f4, f3, f2, f1); }
0052
0053 void set1(double f1) { vec = _mm256_set1_pd(f1); }
0054
0055 template <int N>
0056 Vec4 get1() const {
0057 return _mm256_set1_pd(arr[N]);
0058 }
0059
0060
0061
0062
0063
0064 double& operator[](unsigned int n) { return arr[n]; }
0065
0066 double operator[](unsigned int n) const { return arr[n]; }
0067
0068 Vec2<double> xy() const { return Vec2<double>(_mm256_castpd256_pd128(vec)); }
0069 Vec2<double> zw() const { return Vec2<double>(_mm256_castpd256_pd128(_mm256_permute2f128_pd(vec, vec, 1))); }
0070 };
0071
0072 inline Vec4<float>::Vec4(Vec4<double> ivec) { vec = _mm256_cvtpd_ps(ivec.vec); }
0073 }
0074
0075 inline bool operator==(mathSSE::Vec4<double> a, mathSSE::Vec4<double> b) {
0076 return _mm256_movemask_pd(_mm256_cmp_pd(a.vec, b.vec, _CMP_EQ_OS)) == 0xf;
0077 }
0078
0079 inline mathSSE::Vec4<double> cmpeq(mathSSE::Vec4<double> a, mathSSE::Vec4<double> b) {
0080 return _mm256_cmp_pd(a.vec, b.vec, _CMP_EQ_OS);
0081 }
0082
0083 inline mathSSE::Vec4<double> cmpgt(mathSSE::Vec4<double> a, mathSSE::Vec4<double> b) {
0084 return _mm256_cmp_pd(a.vec, b.vec, _CMP_GT_OS);
0085 }
0086
0087 inline mathSSE::Vec4<double> hadd(mathSSE::Vec4<double> a, mathSSE::Vec4<double> b) {
0088 return _mm256_hadd_pd(a.vec, b.vec);
0089 }
0090
0091 inline mathSSE::Vec4<double> operator-(mathSSE::Vec4<double> a) {
0092 const __m256d neg = _mm256_set_pd(-0.0, -0.0, -0.0, -0.0);
0093 return _mm256_xor_pd(a.vec, neg);
0094 }
0095
0096 inline mathSSE::Vec4<double> operator&(mathSSE::Vec4<double> a, mathSSE::Vec4<double> b) {
0097 return _mm256_and_pd(a.vec, b.vec);
0098 }
0099 inline mathSSE::Vec4<double> operator|(mathSSE::Vec4<double> a, mathSSE::Vec4<double> b) {
0100 return _mm256_or_pd(a.vec, b.vec);
0101 }
0102 inline mathSSE::Vec4<double> operator^(mathSSE::Vec4<double> a, mathSSE::Vec4<double> b) {
0103 return _mm256_xor_pd(a.vec, b.vec);
0104 }
0105 inline mathSSE::Vec4<double> andnot(mathSSE::Vec4<double> a, mathSSE::Vec4<double> b) {
0106 return _mm256_andnot_pd(a.vec, b.vec);
0107 }
0108
0109 inline mathSSE::Vec4<double> operator+(mathSSE::Vec4<double> a, mathSSE::Vec4<double> b) {
0110 return _mm256_add_pd(a.vec, b.vec);
0111 }
0112
0113 inline mathSSE::Vec4<double> operator-(mathSSE::Vec4<double> a, mathSSE::Vec4<double> b) {
0114 return _mm256_sub_pd(a.vec, b.vec);
0115 }
0116
0117 inline mathSSE::Vec4<double> operator*(mathSSE::Vec4<double> a, mathSSE::Vec4<double> b) {
0118 return _mm256_mul_pd(a.vec, b.vec);
0119 }
0120
0121 inline mathSSE::Vec4<double> operator/(mathSSE::Vec4<double> a, mathSSE::Vec4<double> b) {
0122 return _mm256_div_pd(a.vec, b.vec);
0123 }
0124
0125 inline mathSSE::Vec4<double> operator*(double a, mathSSE::Vec4<double> b) {
0126 return _mm256_mul_pd(_mm256_set1_pd(a), b.vec);
0127 }
0128
0129 inline mathSSE::Vec4<double> operator*(mathSSE::Vec4<double> b, double a) {
0130 return _mm256_mul_pd(_mm256_set1_pd(a), b.vec);
0131 }
0132
0133 inline mathSSE::Vec4<double> operator/(mathSSE::Vec4<double> b, double a) {
0134 return _mm256_div_pd(b.vec, _mm256_set1_pd(a));
0135 }
0136
0137 inline double __attribute__((always_inline)) __attribute__((pure))
0138 dot(mathSSE::Vec4<double> a, mathSSE::Vec4<double> b) {
0139 using mathSSE::_mm256_dot_pd;
0140 mathSSE::Vec4<double> ret;
0141 ret.vec = _mm256_dot_pd(a.vec, b.vec);
0142 return ret.arr[0];
0143 }
0144
0145 inline mathSSE::Vec4<double> __attribute__((always_inline)) __attribute__((pure))
0146 cross(mathSSE::Vec4<double> a, mathSSE::Vec4<double> b) {
0147 using mathSSE::_mm256_cross_pd;
0148 return _mm256_cross_pd(a.vec, b.vec);
0149 }
0150
0151 inline double __attribute__((always_inline)) __attribute__((pure))
0152 dotxy(mathSSE::Vec4<double> a, mathSSE::Vec4<double> b) {
0153 mathSSE::Vec4<double> mul = a * b;
0154 mul = hadd(mul, mul);
0155 return mul.arr[0];
0156 }
0157
0158 #endif