Back to home page

Project CMSSW displayed by LXR

 
 

    


Warning, /RecoTracker/MkFitCore/src/JacLoc2CCS.ah is written in an unsupported language. File is not indexed.

0001 #ifdef MPLEX_INTRINSICS
0002 
0003 for (int n = 0; n < N; n += MPLEX_INTRINSICS_WIDTH_BYTES / sizeof(T)) {
0004 #ifdef AVX512_INTRINSICS
0005   IntrVec_t all_zeros = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
0006 #else
0007   IntrVec_t all_zeros = {0, 0, 0, 0, 0, 0, 0, 0};
0008 #endif
0009 
0010   IntrVec_t a_3 = LD(a, 3);
0011   IntrVec_t b_18 = LD(b, 18);
0012   IntrVec_t c_3 = MUL(a_3, b_18);
0013   IntrVec_t b_19 = LD(b, 19);
0014   IntrVec_t c_4 = MUL(a_3, b_19);
0015 
0016   ST(c, 0, all_zeros);
0017   ST(c, 1, all_zeros);
0018   ST(c, 2, all_zeros);
0019   IntrVec_t a_4 = LD(a, 4);
0020   IntrVec_t b_23 = LD(b, 23);
0021   c_3 = FMA(a_4, b_23, c_3);
0022   IntrVec_t b_24 = LD(b, 24);
0023   c_4 = FMA(a_4, b_24, c_4);
0024 
0025   IntrVec_t a_8 = LD(a, 8);
0026   IntrVec_t c_8 = MUL(a_8, b_18);
0027   ST(c, 3, c_3);
0028   IntrVec_t c_9 = MUL(a_8, b_19);
0029   ST(c, 4, c_4);
0030 
0031   ST(c, 5, all_zeros);
0032   ST(c, 6, all_zeros);
0033   ST(c, 7, all_zeros);
0034   IntrVec_t a_9 = LD(a, 9);
0035   c_8 = FMA(a_9, b_23, c_8);
0036   c_9 = FMA(a_9, b_24, c_9);
0037 
0038   ST(c, 10, all_zeros);
0039   ST(c, 11, all_zeros);
0040   ST(c, 12, all_zeros);
0041   IntrVec_t a_14 = LD(a, 14);
0042   IntrVec_t c_13 = MUL(a_14, b_23);
0043   IntrVec_t c_14 = MUL(a_14, b_24);
0044   ST(c, 8, c_8);
0045   ST(c, 9, c_9);
0046 
0047   IntrVec_t a_15 = LD(a, 15);
0048   IntrVec_t c_15 = a_15;
0049   ST(c, 13, c_13);
0050   ST(c, 14, c_14);
0051 
0052   IntrVec_t a_16 = LD(a, 16);
0053   IntrVec_t b_6 = LD(b, 6);
0054   IntrVec_t c_16 = MUL(a_16, b_6);
0055   IntrVec_t b_7 = LD(b, 7);
0056   IntrVec_t c_17 = MUL(a_16, b_7);
0057 
0058   ST(c, 18, all_zeros);
0059   ST(c, 19, all_zeros);
0060 
0061   IntrVec_t b_11 = LD(b, 11);
0062   IntrVec_t c_21 = b_11;
0063   IntrVec_t b_12 = LD(b, 12);
0064   IntrVec_t c_22 = b_12;
0065   ST(c, 15, c_15);
0066   ST(c, 16, c_16);
0067   ST(c, 17, c_17);
0068   IntrVec_t b_13 = LD(b, 13);
0069   IntrVec_t c_23 = b_13;
0070   IntrVec_t b_14 = LD(b, 14);
0071   IntrVec_t c_24 = b_14;
0072 
0073   ST(c, 20, all_zeros);
0074 
0075   IntrVec_t a_26 = LD(a, 26);
0076   IntrVec_t c_26 = MUL(a_26, b_6);
0077   IntrVec_t c_27 = MUL(a_26, b_7);
0078 
0079   ST(c, 25, all_zeros);
0080   ST(c, 28, all_zeros);
0081   ST(c, 29, all_zeros);
0082   ST(c, 21, c_21);
0083   ST(c, 22, c_22);
0084   ST(c, 23, c_23);
0085   ST(c, 24, c_24);
0086   ST(c, 26, c_26);
0087   ST(c, 27, c_27);
0088 }
0089 
0090 #else
0091 
0092 #pragma omp simd
0093 for (int n = 0; n < N; ++n) {
0094   c[0 * N + n] = 0;
0095   c[1 * N + n] = 0;
0096   c[2 * N + n] = 0;
0097   c[3 * N + n] = a[3 * N + n] * b[18 * N + n] + a[4 * N + n] * b[23 * N + n];
0098   c[4 * N + n] = a[3 * N + n] * b[19 * N + n] + a[4 * N + n] * b[24 * N + n];
0099   c[5 * N + n] = 0;
0100   c[6 * N + n] = 0;
0101   c[7 * N + n] = 0;
0102   c[8 * N + n] = a[8 * N + n] * b[18 * N + n] + a[9 * N + n] * b[23 * N + n];
0103   c[9 * N + n] = a[8 * N + n] * b[19 * N + n] + a[9 * N + n] * b[24 * N + n];
0104   c[10 * N + n] = 0;
0105   c[11 * N + n] = 0;
0106   c[12 * N + n] = 0;
0107   c[13 * N + n] = a[14 * N + n] * b[23 * N + n];
0108   c[14 * N + n] = a[14 * N + n] * b[24 * N + n];
0109   c[15 * N + n] = a[15 * N + n];
0110   c[16 * N + n] = a[16 * N + n] * b[6 * N + n];
0111   c[17 * N + n] = a[16 * N + n] * b[7 * N + n];
0112   c[18 * N + n] = 0;
0113   c[19 * N + n] = 0;
0114   c[20 * N + n] = 0;
0115   c[21 * N + n] = b[11 * N + n];
0116   c[22 * N + n] = b[12 * N + n];
0117   c[23 * N + n] = b[13 * N + n];
0118   c[24 * N + n] = b[14 * N + n];
0119   c[25 * N + n] = 0;
0120   c[26 * N + n] = a[26 * N + n] * b[6 * N + n];
0121   c[27 * N + n] = a[26 * N + n] * b[7 * N + n];
0122   c[28 * N + n] = 0;
0123   c[29 * N + n] = 0;
0124 }
0125 #endif