Back to home page

Project CMSSW displayed by LXR

 
 

    


Warning, /RecoTracker/MkFitCore/src/PsErrLocTransp.ah is written in an unsupported language. File is not indexed.

0001 #ifdef MPLEX_INTRINSICS
0002 
0003 for (int n = 0; n < N; n += MPLEX_INTRINSICS_WIDTH_BYTES / sizeof(T)) {
0004   IntrVec_t b_3 = LD(b, 3);
0005   IntrVec_t a_3 = LD(a, 3);
0006   IntrVec_t c_0 = MUL(b_3, a_3);
0007 
0008   IntrVec_t b_5 = LD(b, 5);
0009   IntrVec_t a_5 = LD(a, 5);
0010   c_0 = FMA(b_5, a_5, c_0);
0011 
0012   IntrVec_t b_6 = LD(b, 6);
0013   IntrVec_t a_6 = LD(a, 6);
0014   IntrVec_t c_2 = MUL(b_6, a_6);
0015 
0016   IntrVec_t b_7 = LD(b, 7);
0017   IntrVec_t a_7 = LD(a, 7);
0018   c_2 = FMA(b_7, a_7, c_2);
0019   ST(c, 0, c_0);
0020 
0021   IntrVec_t b_8 = LD(b, 8);
0022   IntrVec_t a_8 = LD(a, 8);
0023   c_2 = FMA(b_8, a_8, c_2);
0024 
0025   IntrVec_t b_9 = LD(b, 9);
0026   IntrVec_t c_1 = MUL(b_9, a_3);
0027 
0028   IntrVec_t b_10 = LD(b, 10);
0029   IntrVec_t a_10 = LD(a, 10);
0030   c_2 = FMA(b_10, a_10, c_2);
0031 
0032   IntrVec_t b_11 = LD(b, 11);
0033   c_1 = FMA(b_11, a_5, c_1);
0034   IntrVec_t a_11 = LD(a, 11);
0035   c_2 = FMA(b_11, a_11, c_2);
0036 
0037   IntrVec_t b_12 = LD(b, 12);
0038   IntrVec_t c_4 = MUL(b_12, a_6);
0039   ST(c, 1, c_1);
0040   IntrVec_t a_12 = LD(a, 12);
0041   IntrVec_t c_5 = MUL(b_12, a_12);
0042   ST(c, 2, c_2);
0043 
0044   IntrVec_t b_13 = LD(b, 13);
0045   c_4 = FMA(b_13, a_7, c_4);
0046   IntrVec_t a_13 = LD(a, 13);
0047   c_5 = FMA(b_13, a_13, c_5);
0048 
0049   IntrVec_t b_14 = LD(b, 14);
0050   c_4 = FMA(b_14, a_8, c_4);
0051   IntrVec_t a_14 = LD(a, 14);
0052   c_5 = FMA(b_14, a_14, c_5);
0053 
0054   IntrVec_t b_15 = LD(b, 15);
0055   IntrVec_t c_3 = MUL(b_15, a_3);
0056 
0057   IntrVec_t b_16 = LD(b, 16);
0058   c_4 = FMA(b_16, a_10, c_4);
0059   IntrVec_t a_16 = LD(a, 16);
0060   c_5 = FMA(b_16, a_16, c_5);
0061 
0062   IntrVec_t b_17 = LD(b, 17);
0063   c_3 = FMA(b_17, a_5, c_3);
0064   c_4 = FMA(b_17, a_11, c_4);
0065   IntrVec_t a_17 = LD(a, 17);
0066   c_5 = FMA(b_17, a_17, c_5);
0067 
0068   IntrVec_t b_18 = LD(b, 18);
0069   IntrVec_t c_7 = MUL(b_18, a_6);
0070   ST(c, 3, c_3);
0071   ST(c, 4, c_4);
0072   ST(c, 5, c_5);
0073   IntrVec_t c_8 = MUL(b_18, a_12);
0074   IntrVec_t a_18 = LD(a, 18);
0075   IntrVec_t c_9 = MUL(b_18, a_18);
0076 
0077   IntrVec_t b_19 = LD(b, 19);
0078   c_7 = FMA(b_19, a_7, c_7);
0079   c_8 = FMA(b_19, a_13, c_8);
0080   IntrVec_t a_19 = LD(a, 19);
0081   c_9 = FMA(b_19, a_19, c_9);
0082 
0083   IntrVec_t b_20 = LD(b, 20);
0084   c_7 = FMA(b_20, a_8, c_7);
0085   c_8 = FMA(b_20, a_14, c_8);
0086   IntrVec_t a_20 = LD(a, 20);
0087   c_9 = FMA(b_20, a_20, c_9);
0088 
0089   IntrVec_t b_21 = LD(b, 21);
0090   IntrVec_t c_6 = MUL(b_21, a_3);
0091 
0092   IntrVec_t b_22 = LD(b, 22);
0093   c_7 = FMA(b_22, a_10, c_7);
0094   c_8 = FMA(b_22, a_16, c_8);
0095 
0096   IntrVec_t b_23 = LD(b, 23);
0097   c_6 = FMA(b_23, a_5, c_6);
0098   c_7 = FMA(b_23, a_11, c_7);
0099   c_8 = FMA(b_23, a_17, c_8);
0100 
0101   IntrVec_t b_24 = LD(b, 24);
0102   IntrVec_t c_11 = MUL(b_24, a_6);
0103   ST(c, 6, c_6);
0104   ST(c, 7, c_7);
0105   ST(c, 8, c_8);
0106   ST(c, 9, c_9);
0107   IntrVec_t c_12 = MUL(b_24, a_12);
0108   IntrVec_t c_13 = MUL(b_24, a_18);
0109   IntrVec_t a_24 = LD(a, 24);
0110   IntrVec_t c_14 = MUL(b_24, a_24);
0111 
0112   IntrVec_t b_25 = LD(b, 25);
0113   c_11 = FMA(b_25, a_7, c_11);
0114   c_12 = FMA(b_25, a_13, c_12);
0115   c_13 = FMA(b_25, a_19, c_13);
0116   IntrVec_t a_25 = LD(a, 25);
0117   c_14 = FMA(b_25, a_25, c_14);
0118 
0119   IntrVec_t b_26 = LD(b, 26);
0120   c_11 = FMA(b_26, a_8, c_11);
0121   c_12 = FMA(b_26, a_14, c_12);
0122   c_13 = FMA(b_26, a_20, c_13);
0123   IntrVec_t a_26 = LD(a, 26);
0124   c_14 = FMA(b_26, a_26, c_14);
0125 
0126   IntrVec_t b_27 = LD(b, 27);
0127   IntrVec_t c_10 = MUL(b_27, a_3);
0128 
0129   IntrVec_t b_28 = LD(b, 28);
0130   c_11 = FMA(b_28, a_10, c_11);
0131   c_12 = FMA(b_28, a_16, c_12);
0132 
0133   IntrVec_t b_29 = LD(b, 29);
0134   c_10 = FMA(b_29, a_5, c_10);
0135   c_11 = FMA(b_29, a_11, c_11);
0136   c_12 = FMA(b_29, a_17, c_12);
0137   ST(c, 10, c_10);
0138   ST(c, 11, c_11);
0139   ST(c, 12, c_12);
0140   ST(c, 13, c_13);
0141   ST(c, 14, c_14);
0142 }
0143 
0144 #else
0145 
0146 #pragma omp simd
0147 for (int n = 0; n < N; ++n) {
0148   c[0 * N + n] = b[3 * N + n] * a[3 * N + n] + b[5 * N + n] * a[5 * N + n];
0149   c[1 * N + n] = b[9 * N + n] * a[3 * N + n] + b[11 * N + n] * a[5 * N + n];
0150   c[2 * N + n] = b[6 * N + n] * a[6 * N + n] + b[7 * N + n] * a[7 * N + n] + b[8 * N + n] * a[8 * N + n] +
0151                  b[10 * N + n] * a[10 * N + n] + b[11 * N + n] * a[11 * N + n];
0152   c[3 * N + n] = b[15 * N + n] * a[3 * N + n] + b[17 * N + n] * a[5 * N + n];
0153   c[4 * N + n] = b[12 * N + n] * a[6 * N + n] + b[13 * N + n] * a[7 * N + n] + b[14 * N + n] * a[8 * N + n] +
0154                  b[16 * N + n] * a[10 * N + n] + b[17 * N + n] * a[11 * N + n];
0155   c[5 * N + n] = b[12 * N + n] * a[12 * N + n] + b[13 * N + n] * a[13 * N + n] + b[14 * N + n] * a[14 * N + n] +
0156                  b[16 * N + n] * a[16 * N + n] + b[17 * N + n] * a[17 * N + n];
0157   c[6 * N + n] = b[21 * N + n] * a[3 * N + n] + b[23 * N + n] * a[5 * N + n];
0158   c[7 * N + n] = b[18 * N + n] * a[6 * N + n] + b[19 * N + n] * a[7 * N + n] + b[20 * N + n] * a[8 * N + n] +
0159                  b[22 * N + n] * a[10 * N + n] + b[23 * N + n] * a[11 * N + n];
0160   c[8 * N + n] = b[18 * N + n] * a[12 * N + n] + b[19 * N + n] * a[13 * N + n] + b[20 * N + n] * a[14 * N + n] +
0161                  b[22 * N + n] * a[16 * N + n] + b[23 * N + n] * a[17 * N + n];
0162   c[9 * N + n] = b[18 * N + n] * a[18 * N + n] + b[19 * N + n] * a[19 * N + n] + b[20 * N + n] * a[20 * N + n];
0163   c[10 * N + n] = b[27 * N + n] * a[3 * N + n] + b[29 * N + n] * a[5 * N + n];
0164   c[11 * N + n] = b[24 * N + n] * a[6 * N + n] + b[25 * N + n] * a[7 * N + n] + b[26 * N + n] * a[8 * N + n] +
0165                   b[28 * N + n] * a[10 * N + n] + b[29 * N + n] * a[11 * N + n];
0166   c[12 * N + n] = b[24 * N + n] * a[12 * N + n] + b[25 * N + n] * a[13 * N + n] + b[26 * N + n] * a[14 * N + n] +
0167                   b[28 * N + n] * a[16 * N + n] + b[29 * N + n] * a[17 * N + n];
0168   c[13 * N + n] = b[24 * N + n] * a[18 * N + n] + b[25 * N + n] * a[19 * N + n] + b[26 * N + n] * a[20 * N + n];
0169   c[14 * N + n] = b[24 * N + n] * a[24 * N + n] + b[25 * N + n] * a[25 * N + n] + b[26 * N + n] * a[26 * N + n];
0170 }
0171 #endif