Back to home page

Project CMSSW displayed by LXR

 
 

    


Warning, /RecoTracker/MkFitCore/src/KH.ah is written in an unsupported language. File is not indexed.

0001 #ifdef MPLEX_INTRINSICS
0002 
0003    for (int n = 0; n < N; n += MPLEX_INTRINSICS_WIDTH_BYTES / sizeof(T))
0004    {
0005       #ifdef AVX512_INTRINSICS
0006       IntrVec_t all_zeros = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
0007       #else
0008       IntrVec_t all_zeros = { 0, 0, 0, 0, 0, 0, 0, 0 };
0009       #endif
0010 
0011       IntrVec_t a_0 = LD(a, 0);
0012       IntrVec_t b_0 = LD(b, 0);
0013       IntrVec_t c_0 = MUL(a_0, b_0);
0014       IntrVec_t b_1 = LD(b, 1);
0015       IntrVec_t c_1 = MUL(a_0, b_1);
0016 
0017       IntrVec_t a_1 = LD(a, 1);
0018       IntrVec_t c_2 = a_1;
0019 
0020       ST(c, 3, all_zeros);
0021       ST(c, 4, all_zeros);
0022       ST(c, 5, all_zeros);
0023 
0024       IntrVec_t a_3 = LD(a, 3);
0025       IntrVec_t c_6 = MUL(a_3, b_0);
0026       IntrVec_t c_7 = MUL(a_3, b_1);
0027 
0028       IntrVec_t a_4 = LD(a, 4);
0029       IntrVec_t c_8 = a_4;
0030       ST(c, 0, c_0);
0031       ST(c, 1, c_1);
0032       ST(c, 2, c_2);
0033 
0034       ST(c, 9, all_zeros);
0035       ST(c, 10, all_zeros);
0036       ST(c, 11, all_zeros);
0037 
0038       IntrVec_t a_6 = LD(a, 6);
0039       IntrVec_t c_12 = MUL(a_6, b_0);
0040       IntrVec_t c_13 = MUL(a_6, b_1);
0041 
0042       IntrVec_t a_7 = LD(a, 7);
0043       IntrVec_t c_14 = a_7;
0044       ST(c, 6, c_6);
0045       ST(c, 7, c_7);
0046       ST(c, 8, c_8);
0047 
0048       ST(c, 15, all_zeros);
0049       ST(c, 16, all_zeros);
0050       ST(c, 17, all_zeros);
0051 
0052       IntrVec_t a_9 = LD(a, 9);
0053       IntrVec_t c_18 = MUL(a_9, b_0);
0054       IntrVec_t c_19 = MUL(a_9, b_1);
0055 
0056       IntrVec_t a_10 = LD(a, 10);
0057       IntrVec_t c_20 = a_10;
0058       ST(c, 12, c_12);
0059       ST(c, 13, c_13);
0060       ST(c, 14, c_14);
0061 
0062       ST(c, 21, all_zeros);
0063       ST(c, 22, all_zeros);
0064       ST(c, 23, all_zeros);
0065 
0066       IntrVec_t a_12 = LD(a, 12);
0067       IntrVec_t c_24 = MUL(a_12, b_0);
0068       IntrVec_t c_25 = MUL(a_12, b_1);
0069 
0070       IntrVec_t a_13 = LD(a, 13);
0071       IntrVec_t c_26 = a_13;
0072       ST(c, 18, c_18);
0073       ST(c, 19, c_19);
0074       ST(c, 20, c_20);
0075 
0076       ST(c, 27, all_zeros);
0077       ST(c, 28, all_zeros);
0078       ST(c, 29, all_zeros);
0079 
0080       IntrVec_t a_15 = LD(a, 15);
0081       IntrVec_t c_30 = MUL(a_15, b_0);
0082       IntrVec_t c_31 = MUL(a_15, b_1);
0083 
0084       IntrVec_t a_16 = LD(a, 16);
0085       IntrVec_t c_32 = a_16;
0086       ST(c, 24, c_24);
0087       ST(c, 25, c_25);
0088       ST(c, 26, c_26);
0089 
0090       ST(c, 33, all_zeros);
0091       ST(c, 34, all_zeros);
0092       ST(c, 35, all_zeros);
0093       ST(c, 30, c_30);
0094       ST(c, 31, c_31);
0095       ST(c, 32, c_32);
0096    }
0097 
0098 #else
0099 
0100 #pragma omp simd
0101    for (int n = 0; n < N; ++n)
0102    {
0103       c[ 0*N+n] = a[ 0*N+n]*b[ 0*N+n];
0104       c[ 1*N+n] = a[ 0*N+n]*b[ 1*N+n];
0105       c[ 2*N+n] = a[ 1*N+n];
0106       c[ 3*N+n] = 0;
0107       c[ 4*N+n] = 0;
0108       c[ 5*N+n] = 0;
0109       c[ 6*N+n] = a[ 3*N+n]*b[ 0*N+n];
0110       c[ 7*N+n] = a[ 3*N+n]*b[ 1*N+n];
0111       c[ 8*N+n] = a[ 4*N+n];
0112       c[ 9*N+n] = 0;
0113       c[10*N+n] = 0;
0114       c[11*N+n] = 0;
0115       c[12*N+n] = a[ 6*N+n]*b[ 0*N+n];
0116       c[13*N+n] = a[ 6*N+n]*b[ 1*N+n];
0117       c[14*N+n] = a[ 7*N+n];
0118       c[15*N+n] = 0;
0119       c[16*N+n] = 0;
0120       c[17*N+n] = 0;
0121       c[18*N+n] = a[ 9*N+n]*b[ 0*N+n];
0122       c[19*N+n] = a[ 9*N+n]*b[ 1*N+n];
0123       c[20*N+n] = a[10*N+n];
0124       c[21*N+n] = 0;
0125       c[22*N+n] = 0;
0126       c[23*N+n] = 0;
0127       c[24*N+n] = a[12*N+n]*b[ 0*N+n];
0128       c[25*N+n] = a[12*N+n]*b[ 1*N+n];
0129       c[26*N+n] = a[13*N+n];
0130       c[27*N+n] = 0;
0131       c[28*N+n] = 0;
0132       c[29*N+n] = 0;
0133       c[30*N+n] = a[15*N+n]*b[ 0*N+n];
0134       c[31*N+n] = a[15*N+n]*b[ 1*N+n];
0135       c[32*N+n] = a[16*N+n];
0136       c[33*N+n] = 0;
0137       c[34*N+n] = 0;
0138       c[35*N+n] = 0;
0139    }
0140 #endif