File indexing completed on 2024-04-06 12:28:24
0001 #include "immintrin.h"
0002
0003 #include <cstdio>
0004
0005 const int NN = 64;
0006
0007 #define LD(a, i) _mm512_load_ps(&a[i*16])
0008 #define ADD(a, b) _mm512_add_ps(a, b)
0009 #define MUL(a, b) _mm512_mul_ps(a, b)
0010 #define FMA(a, b, v) _mm512_fmadd_ps(a, b, v)
0011 #define ST(a, i, r) _mm512_store_ps(&a[i*16], r)
0012
0013
0014 __m512 all_ones = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 };
0015
0016 int main()
0017 {
0018 float *p = (float*) std::aligned_alloc(64, NN*sizeof(float));
0019 float *q = (float*) std::aligned_alloc(64, NN*sizeof(float));
0020
0021 for (int i = 0; i < NN; ++i)
0022 {
0023 p[i] = i;
0024 }
0025
0026 __m512 a = LD(p, 0);
0027 __m512 b = { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 };
0028
0029 b = all_ones;
0030
0031 __m512 c = ADD(a, b);
0032
0033 ST(q, 0, c);
0034
0035 for (int i = 0; i < 16; ++i)
0036 {
0037 printf("%2d %4.0f %4.0f %4.0f\n", i, p[i], p[i+16], q[i]);
0038 }
0039
0040 std::free(p);
0041 std::free(q);
0042
0043 return 0;
0044 }