Back to home page

Project CMSSW displayed by LXR

 
 

    


Warning, /RecoTracker/MkFitCore/src/JacErrPropCurv1.ah is written in an unsupported language. File is not indexed.

0001 #ifdef MPLEX_INTRINSICS
0002 
0003 for (int n = 0; n < N; n += MPLEX_INTRINSICS_WIDTH_BYTES / sizeof(T)) {
0004 #ifdef AVX512_INTRINSICS
0005   IntrVec_t all_zeros = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
0006 #else
0007   IntrVec_t all_zeros = {0, 0, 0, 0, 0, 0, 0, 0};
0008 #endif
0009 
0010   IntrVec_t a_3 = LD(a, 3);
0011   IntrVec_t b_15 = LD(b, 15);
0012   IntrVec_t c_0 = MUL(a_3, b_15);
0013   IntrVec_t b_16 = LD(b, 16);
0014   IntrVec_t c_1 = MUL(a_3, b_16);
0015   IntrVec_t b_17 = LD(b, 17);
0016   IntrVec_t c_2 = MUL(a_3, b_17);
0017   IntrVec_t b_18 = LD(b, 18);
0018   IntrVec_t c_3 = MUL(a_3, b_18);
0019   IntrVec_t b_19 = LD(b, 19);
0020   IntrVec_t c_4 = MUL(a_3, b_19);
0021 
0022   IntrVec_t a_4 = LD(a, 4);
0023   IntrVec_t b_20 = LD(b, 20);
0024   c_0 = FMA(a_4, b_20, c_0);
0025   IntrVec_t b_21 = LD(b, 21);
0026   c_1 = FMA(a_4, b_21, c_1);
0027   IntrVec_t b_22 = LD(b, 22);
0028   c_2 = FMA(a_4, b_22, c_2);
0029   ST(c, 0, c_0);
0030   IntrVec_t b_23 = LD(b, 23);
0031   c_3 = FMA(a_4, b_23, c_3);
0032   ST(c, 1, c_1);
0033   ST(c, 2, c_2);
0034   IntrVec_t b_24 = LD(b, 24);
0035   c_4 = FMA(a_4, b_24, c_4);
0036   ST(c, 3, c_3);
0037 
0038   IntrVec_t a_8 = LD(a, 8);
0039   IntrVec_t c_5 = MUL(a_8, b_15);
0040   IntrVec_t c_6 = MUL(a_8, b_16);
0041   ST(c, 4, c_4);
0042   IntrVec_t c_7 = MUL(a_8, b_17);
0043   IntrVec_t c_8 = MUL(a_8, b_18);
0044   IntrVec_t c_9 = MUL(a_8, b_19);
0045 
0046   IntrVec_t a_9 = LD(a, 9);
0047   c_5 = FMA(a_9, b_20, c_5);
0048   c_6 = FMA(a_9, b_21, c_6);
0049   c_7 = FMA(a_9, b_22, c_7);
0050   c_8 = FMA(a_9, b_23, c_8);
0051   c_9 = FMA(a_9, b_24, c_9);
0052   ST(c, 5, c_5);
0053   ST(c, 6, c_6);
0054   ST(c, 7, c_7);
0055   ST(c, 8, c_8);
0056   ST(c, 9, c_9);
0057 
0058   IntrVec_t a_14 = LD(a, 14);
0059   IntrVec_t c_10 = MUL(a_14, b_20);
0060   IntrVec_t c_11 = MUL(a_14, b_21);
0061   IntrVec_t c_12 = MUL(a_14, b_22);
0062   IntrVec_t c_13 = MUL(a_14, b_23);
0063   IntrVec_t c_14 = MUL(a_14, b_24);
0064   ST(c, 10, c_10);
0065   ST(c, 11, c_11);
0066   ST(c, 12, c_12);
0067   ST(c, 13, c_13);
0068   ST(c, 14, c_14);
0069 
0070   IntrVec_t a_15 = LD(a, 15);
0071   IntrVec_t c_15 = a_15;
0072 
0073   IntrVec_t a_16 = LD(a, 16);
0074   IntrVec_t b_6 = LD(b, 6);
0075   IntrVec_t c_16 = MUL(a_16, b_6);
0076   IntrVec_t b_7 = LD(b, 7);
0077   IntrVec_t c_17 = MUL(a_16, b_7);
0078 
0079   ST(c, 18, all_zeros);
0080   ST(c, 19, all_zeros);
0081 
0082   IntrVec_t b_10 = LD(b, 10);
0083   IntrVec_t c_20 = b_10;
0084   IntrVec_t b_11 = LD(b, 11);
0085   IntrVec_t c_21 = b_11;
0086   ST(c, 15, c_15);
0087   ST(c, 16, c_16);
0088   ST(c, 17, c_17);
0089   IntrVec_t b_12 = LD(b, 12);
0090   IntrVec_t c_22 = b_12;
0091   IntrVec_t b_13 = LD(b, 13);
0092   IntrVec_t c_23 = b_13;
0093   IntrVec_t b_14 = LD(b, 14);
0094   IntrVec_t c_24 = b_14;
0095 
0096   IntrVec_t a_26 = LD(a, 26);
0097   IntrVec_t c_26 = MUL(a_26, b_6);
0098   IntrVec_t c_27 = MUL(a_26, b_7);
0099 
0100   ST(c, 25, all_zeros);
0101   ST(c, 28, all_zeros);
0102   ST(c, 29, all_zeros);
0103   ST(c, 20, c_20);
0104   ST(c, 21, c_21);
0105   ST(c, 22, c_22);
0106   ST(c, 23, c_23);
0107   ST(c, 24, c_24);
0108   ST(c, 26, c_26);
0109   ST(c, 27, c_27);
0110 }
0111 
0112 #else
0113 
0114 #pragma omp simd
0115 for (int n = 0; n < N; ++n) {
0116   c[0 * N + n] = a[3 * N + n] * b[15 * N + n] + a[4 * N + n] * b[20 * N + n];
0117   c[1 * N + n] = a[3 * N + n] * b[16 * N + n] + a[4 * N + n] * b[21 * N + n];
0118   c[2 * N + n] = a[3 * N + n] * b[17 * N + n] + a[4 * N + n] * b[22 * N + n];
0119   c[3 * N + n] = a[3 * N + n] * b[18 * N + n] + a[4 * N + n] * b[23 * N + n];
0120   c[4 * N + n] = a[3 * N + n] * b[19 * N + n] + a[4 * N + n] * b[24 * N + n];
0121   c[5 * N + n] = a[8 * N + n] * b[15 * N + n] + a[9 * N + n] * b[20 * N + n];
0122   c[6 * N + n] = a[8 * N + n] * b[16 * N + n] + a[9 * N + n] * b[21 * N + n];
0123   c[7 * N + n] = a[8 * N + n] * b[17 * N + n] + a[9 * N + n] * b[22 * N + n];
0124   c[8 * N + n] = a[8 * N + n] * b[18 * N + n] + a[9 * N + n] * b[23 * N + n];
0125   c[9 * N + n] = a[8 * N + n] * b[19 * N + n] + a[9 * N + n] * b[24 * N + n];
0126   c[10 * N + n] = a[14 * N + n] * b[20 * N + n];
0127   c[11 * N + n] = a[14 * N + n] * b[21 * N + n];
0128   c[12 * N + n] = a[14 * N + n] * b[22 * N + n];
0129   c[13 * N + n] = a[14 * N + n] * b[23 * N + n];
0130   c[14 * N + n] = a[14 * N + n] * b[24 * N + n];
0131   c[15 * N + n] = a[15 * N + n];
0132   c[16 * N + n] = a[16 * N + n] * b[6 * N + n];
0133   c[17 * N + n] = a[16 * N + n] * b[7 * N + n];
0134   c[18 * N + n] = 0;
0135   c[19 * N + n] = 0;
0136   c[20 * N + n] = b[10 * N + n];
0137   c[21 * N + n] = b[11 * N + n];
0138   c[22 * N + n] = b[12 * N + n];
0139   c[23 * N + n] = b[13 * N + n];
0140   c[24 * N + n] = b[14 * N + n];
0141   c[25 * N + n] = 0;
0142   c[26 * N + n] = a[26 * N + n] * b[6 * N + n];
0143   c[27 * N + n] = a[26 * N + n] * b[7 * N + n];
0144   c[28 * N + n] = 0;
0145   c[29 * N + n] = 0;
0146 }
0147 #endif