Back to home page

Project CMSSW displayed by LXR

 
 

    


Warning, /RecoTracker/MkFitCore/src/JacCCS2Loc.ah is written in an unsupported language. File is not indexed.

0001 #ifdef MPLEX_INTRINSICS
0002 
0003 for (int n = 0; n < N; n += MPLEX_INTRINSICS_WIDTH_BYTES / sizeof(T)) {
0004 #ifdef AVX512_INTRINSICS
0005   IntrVec_t all_zeros = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
0006 #else
0007   IntrVec_t all_zeros = {0, 0, 0, 0, 0, 0, 0, 0};
0008 #endif
0009 
0010   IntrVec_t b_3 = LD(b, 3);
0011   IntrVec_t c_3 = b_3;
0012   IntrVec_t b_5 = LD(b, 5);
0013   IntrVec_t c_5 = b_5;
0014 
0015   ST(c, 0, all_zeros);
0016   ST(c, 1, all_zeros);
0017   ST(c, 2, all_zeros);
0018   ST(c, 4, all_zeros);
0019 
0020   IntrVec_t a_6 = LD(a, 6);
0021   IntrVec_t b_11 = LD(b, 11);
0022   IntrVec_t c_11 = MUL(a_6, b_11);
0023 
0024   IntrVec_t a_7 = LD(a, 7);
0025   IntrVec_t c_10 = a_7;
0026   ST(c, 3, c_3);
0027   ST(c, 5, c_5);
0028 
0029   IntrVec_t a_8 = LD(a, 8);
0030   IntrVec_t b_18 = LD(b, 18);
0031   IntrVec_t c_6 = MUL(a_8, b_18);
0032   IntrVec_t b_19 = LD(b, 19);
0033   IntrVec_t c_7 = MUL(a_8, b_19);
0034 
0035   IntrVec_t a_9 = LD(a, 9);
0036   IntrVec_t b_24 = LD(b, 24);
0037   c_6 = FMA(a_9, b_24, c_6);
0038   IntrVec_t b_25 = LD(b, 25);
0039   c_7 = FMA(a_9, b_25, c_7);
0040   IntrVec_t b_26 = LD(b, 26);
0041   IntrVec_t c_8 = MUL(a_9, b_26);
0042   ST(c, 6, c_6);
0043   ST(c, 9, all_zeros);
0044 
0045   IntrVec_t a_11 = LD(a, 11);
0046   IntrVec_t c_17 = MUL(a_11, b_11);
0047   ST(c, 7, c_7);
0048   ST(c, 8, c_8);
0049   ST(c, 10, c_10);
0050   ST(c, 11, c_11);
0051 
0052   IntrVec_t a_12 = LD(a, 12);
0053   IntrVec_t c_16 = a_12;
0054 
0055   IntrVec_t a_13 = LD(a, 13);
0056   IntrVec_t c_12 = MUL(a_13, b_18);
0057   IntrVec_t c_13 = MUL(a_13, b_19);
0058 
0059   IntrVec_t a_14 = LD(a, 14);
0060   c_12 = FMA(a_14, b_24, c_12);
0061   c_13 = FMA(a_14, b_25, c_13);
0062   IntrVec_t c_14 = MUL(a_14, b_26);
0063   ST(c, 15, all_zeros);
0064 
0065   IntrVec_t a_18 = LD(a, 18);
0066   IntrVec_t c_18 = MUL(a_18, b_18);
0067   ST(c, 12, c_12);
0068   ST(c, 13, c_13);
0069   ST(c, 14, c_14);
0070   ST(c, 16, c_16);
0071   ST(c, 17, c_17);
0072   IntrVec_t c_19 = MUL(a_18, b_19);
0073 
0074   IntrVec_t a_19 = LD(a, 19);
0075   c_18 = FMA(a_19, b_24, c_18);
0076   c_19 = FMA(a_19, b_25, c_19);
0077   IntrVec_t c_20 = MUL(a_19, b_26);
0078   ST(c, 21, all_zeros);
0079   ST(c, 22, all_zeros);
0080   ST(c, 23, all_zeros);
0081 
0082   IntrVec_t a_23 = LD(a, 23);
0083   IntrVec_t c_24 = MUL(a_23, b_18);
0084   ST(c, 18, c_18);
0085   ST(c, 19, c_19);
0086   ST(c, 20, c_20);
0087   IntrVec_t c_25 = MUL(a_23, b_19);
0088 
0089   IntrVec_t a_24 = LD(a, 24);
0090   c_24 = FMA(a_24, b_24, c_24);
0091   c_25 = FMA(a_24, b_25, c_25);
0092   IntrVec_t c_26 = MUL(a_24, b_26);
0093   ST(c, 27, all_zeros);
0094   ST(c, 28, all_zeros);
0095   ST(c, 29, all_zeros);
0096   ST(c, 24, c_24);
0097   ST(c, 25, c_25);
0098   ST(c, 26, c_26);
0099 }
0100 
0101 #else
0102 
0103 #pragma omp simd
0104 for (int n = 0; n < N; ++n) {
0105   c[0 * N + n] = 0;
0106   c[1 * N + n] = 0;
0107   c[2 * N + n] = 0;
0108   c[3 * N + n] = b[3 * N + n];
0109   c[4 * N + n] = 0;
0110   c[5 * N + n] = b[5 * N + n];
0111   c[6 * N + n] = a[8 * N + n] * b[18 * N + n] + a[9 * N + n] * b[24 * N + n];
0112   c[7 * N + n] = a[8 * N + n] * b[19 * N + n] + a[9 * N + n] * b[25 * N + n];
0113   c[8 * N + n] = a[9 * N + n] * b[26 * N + n];
0114   c[9 * N + n] = 0;
0115   c[10 * N + n] = a[7 * N + n];
0116   c[11 * N + n] = a[6 * N + n] * b[11 * N + n];
0117   c[12 * N + n] = a[13 * N + n] * b[18 * N + n] + a[14 * N + n] * b[24 * N + n];
0118   c[13 * N + n] = a[13 * N + n] * b[19 * N + n] + a[14 * N + n] * b[25 * N + n];
0119   c[14 * N + n] = a[14 * N + n] * b[26 * N + n];
0120   c[15 * N + n] = 0;
0121   c[16 * N + n] = a[12 * N + n];
0122   c[17 * N + n] = a[11 * N + n] * b[11 * N + n];
0123   c[18 * N + n] = a[18 * N + n] * b[18 * N + n] + a[19 * N + n] * b[24 * N + n];
0124   c[19 * N + n] = a[18 * N + n] * b[19 * N + n] + a[19 * N + n] * b[25 * N + n];
0125   c[20 * N + n] = a[19 * N + n] * b[26 * N + n];
0126   c[21 * N + n] = 0;
0127   c[22 * N + n] = 0;
0128   c[23 * N + n] = 0;
0129   c[24 * N + n] = a[23 * N + n] * b[18 * N + n] + a[24 * N + n] * b[24 * N + n];
0130   c[25 * N + n] = a[23 * N + n] * b[19 * N + n] + a[24 * N + n] * b[25 * N + n];
0131   c[26 * N + n] = a[24 * N + n] * b[26 * N + n];
0132   c[27 * N + n] = 0;
0133   c[28 * N + n] = 0;
0134   c[29 * N + n] = 0;
0135 }
0136 #endif