Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:28:24

0001 #include "immintrin.h"
0002 
0003 #include <cstdio>
0004 
0005 const int NN = 64;
0006 
0007 #define LD(a, i)      _mm512_load_ps(&a[i*16])
0008 #define ADD(a, b)     _mm512_add_ps(a, b) 
0009 #define MUL(a, b)     _mm512_mul_ps(a, b)
0010 #define FMA(a, b, v)  _mm512_fmadd_ps(a, b, v)
0011 #define ST(a, i, r)   _mm512_store_ps(&a[i*16], r)
0012 
0013 // Can even be global!
0014 __m512 all_ones = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 };
0015 
0016 int main()
0017 {
0018   float *p = (float*) std::aligned_alloc(64, NN*sizeof(float));
0019   float *q = (float*) std::aligned_alloc(64, NN*sizeof(float));
0020 
0021   for (int i = 0; i < NN; ++i)
0022   {
0023     p[i] = i;
0024   }
0025 
0026   __m512 a = LD(p, 0);
0027   __m512 b = { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 };//LD(p, 1);
0028 
0029   b = all_ones;
0030 
0031   __m512 c = ADD(a, b);
0032 
0033   ST(q, 0, c);
0034 
0035   for (int i = 0; i < 16; ++i)
0036   {
0037     printf("%2d %4.0f %4.0f %4.0f\n", i, p[i], p[i+16], q[i]);
0038   }
0039 
0040   std::free(p);
0041   std::free(q);
0042 
0043   return 0;
0044 }