Back to home page

Project CMSSW displayed by LXR

 
 

    


Warning, /RecoTracker/MkFitCore/src/JacErrPropCurv2.ah is written in an unsupported language. File is not indexed.

0001 #ifdef MPLEX_INTRINSICS
0002 
0003 for (int n = 0; n < N; n += MPLEX_INTRINSICS_WIDTH_BYTES / sizeof(T)) {
0004   IntrVec_t a_0 = LD(a, 0);
0005   IntrVec_t b_3 = LD(b, 3);
0006   IntrVec_t c_3 = MUL(a_0, b_3);
0007   IntrVec_t b_5 = LD(b, 5);
0008   IntrVec_t c_5 = MUL(a_0, b_5);
0009 
0010   IntrVec_t a_1 = LD(a, 1);
0011   IntrVec_t b_11 = LD(b, 11);
0012   c_5 = FMA(a_1, b_11, c_5);
0013 
0014   IntrVec_t a_2 = LD(a, 2);
0015   IntrVec_t c_4 = a_2;
0016 
0017   IntrVec_t a_3 = LD(a, 3);
0018   IntrVec_t b_18 = LD(b, 18);
0019   IntrVec_t c_0 = MUL(a_3, b_18);
0020   IntrVec_t b_19 = LD(b, 19);
0021   IntrVec_t c_1 = MUL(a_3, b_19);
0022 
0023   IntrVec_t a_4 = LD(a, 4);
0024   IntrVec_t b_24 = LD(b, 24);
0025   c_0 = FMA(a_4, b_24, c_0);
0026   IntrVec_t b_25 = LD(b, 25);
0027   c_1 = FMA(a_4, b_25, c_1);
0028   IntrVec_t b_26 = LD(b, 26);
0029   IntrVec_t c_2 = MUL(a_4, b_26);
0030   ST(c, 0, c_0);
0031 
0032   IntrVec_t a_5 = LD(a, 5);
0033   IntrVec_t c_9 = MUL(a_5, b_3);
0034   ST(c, 1, c_1);
0035   ST(c, 2, c_2);
0036   ST(c, 3, c_3);
0037   ST(c, 4, c_4);
0038   ST(c, 5, c_5);
0039   IntrVec_t c_11 = MUL(a_5, b_5);
0040 
0041   IntrVec_t a_6 = LD(a, 6);
0042   c_11 = FMA(a_6, b_11, c_11);
0043 
0044   IntrVec_t a_7 = LD(a, 7);
0045   IntrVec_t c_10 = a_7;
0046 
0047   IntrVec_t a_8 = LD(a, 8);
0048   IntrVec_t c_6 = MUL(a_8, b_18);
0049   IntrVec_t c_7 = MUL(a_8, b_19);
0050 
0051   IntrVec_t a_9 = LD(a, 9);
0052   c_6 = FMA(a_9, b_24, c_6);
0053   c_7 = FMA(a_9, b_25, c_7);
0054   IntrVec_t c_8 = MUL(a_9, b_26);
0055 
0056   IntrVec_t a_10 = LD(a, 10);
0057   IntrVec_t c_15 = MUL(a_10, b_3);
0058   ST(c, 6, c_6);
0059   ST(c, 7, c_7);
0060   ST(c, 8, c_8);
0061   ST(c, 9, c_9);
0062   ST(c, 10, c_10);
0063   ST(c, 11, c_11);
0064   IntrVec_t c_17 = MUL(a_10, b_5);
0065 
0066   IntrVec_t a_11 = LD(a, 11);
0067   c_17 = FMA(a_11, b_11, c_17);
0068 
0069   IntrVec_t a_12 = LD(a, 12);
0070   IntrVec_t c_16 = a_12;
0071 
0072   IntrVec_t a_13 = LD(a, 13);
0073   IntrVec_t c_12 = MUL(a_13, b_18);
0074   IntrVec_t c_13 = MUL(a_13, b_19);
0075 
0076   IntrVec_t a_14 = LD(a, 14);
0077   c_12 = FMA(a_14, b_24, c_12);
0078   c_13 = FMA(a_14, b_25, c_13);
0079   IntrVec_t c_14 = MUL(a_14, b_26);
0080 
0081   IntrVec_t a_15 = LD(a, 15);
0082   IntrVec_t c_21 = MUL(a_15, b_3);
0083   ST(c, 12, c_12);
0084   ST(c, 13, c_13);
0085   ST(c, 14, c_14);
0086   ST(c, 15, c_15);
0087   ST(c, 16, c_16);
0088   ST(c, 17, c_17);
0089   IntrVec_t c_23 = MUL(a_15, b_5);
0090 
0091   IntrVec_t a_16 = LD(a, 16);
0092   c_23 = FMA(a_16, b_11, c_23);
0093 
0094   IntrVec_t a_17 = LD(a, 17);
0095   IntrVec_t c_22 = a_17;
0096 
0097   IntrVec_t a_18 = LD(a, 18);
0098   IntrVec_t c_18 = MUL(a_18, b_18);
0099   IntrVec_t c_19 = MUL(a_18, b_19);
0100 
0101   IntrVec_t a_19 = LD(a, 19);
0102   c_18 = FMA(a_19, b_24, c_18);
0103   c_19 = FMA(a_19, b_25, c_19);
0104   IntrVec_t c_20 = MUL(a_19, b_26);
0105 
0106   IntrVec_t a_20 = LD(a, 20);
0107   IntrVec_t c_27 = MUL(a_20, b_3);
0108   ST(c, 18, c_18);
0109   ST(c, 19, c_19);
0110   ST(c, 20, c_20);
0111   ST(c, 21, c_21);
0112   ST(c, 22, c_22);
0113   ST(c, 23, c_23);
0114   IntrVec_t c_29 = MUL(a_20, b_5);
0115 
0116   IntrVec_t a_21 = LD(a, 21);
0117   c_29 = FMA(a_21, b_11, c_29);
0118 
0119   IntrVec_t a_22 = LD(a, 22);
0120   IntrVec_t c_28 = a_22;
0121 
0122   IntrVec_t a_23 = LD(a, 23);
0123   IntrVec_t c_24 = MUL(a_23, b_18);
0124   IntrVec_t c_25 = MUL(a_23, b_19);
0125 
0126   IntrVec_t a_24 = LD(a, 24);
0127   c_24 = FMA(a_24, b_24, c_24);
0128   c_25 = FMA(a_24, b_25, c_25);
0129   IntrVec_t c_26 = MUL(a_24, b_26);
0130 
0131   IntrVec_t a_25 = LD(a, 25);
0132   IntrVec_t c_33 = MUL(a_25, b_3);
0133   ST(c, 24, c_24);
0134   ST(c, 25, c_25);
0135   ST(c, 26, c_26);
0136   ST(c, 27, c_27);
0137   ST(c, 28, c_28);
0138   ST(c, 29, c_29);
0139   IntrVec_t c_35 = MUL(a_25, b_5);
0140 
0141   IntrVec_t a_26 = LD(a, 26);
0142   c_35 = FMA(a_26, b_11, c_35);
0143 
0144   IntrVec_t a_27 = LD(a, 27);
0145   IntrVec_t c_34 = a_27;
0146 
0147   IntrVec_t a_28 = LD(a, 28);
0148   IntrVec_t c_30 = MUL(a_28, b_18);
0149   IntrVec_t c_31 = MUL(a_28, b_19);
0150 
0151   IntrVec_t a_29 = LD(a, 29);
0152   c_30 = FMA(a_29, b_24, c_30);
0153   c_31 = FMA(a_29, b_25, c_31);
0154   IntrVec_t c_32 = MUL(a_29, b_26);
0155   ST(c, 30, c_30);
0156   ST(c, 31, c_31);
0157   ST(c, 32, c_32);
0158   ST(c, 33, c_33);
0159   ST(c, 34, c_34);
0160   ST(c, 35, c_35);
0161 }
0162 
0163 #else
0164 
0165 #pragma omp simd
0166 for (int n = 0; n < N; ++n) {
0167   c[0 * N + n] = a[3 * N + n] * b[18 * N + n] + a[4 * N + n] * b[24 * N + n];
0168   c[1 * N + n] = a[3 * N + n] * b[19 * N + n] + a[4 * N + n] * b[25 * N + n];
0169   c[2 * N + n] = a[4 * N + n] * b[26 * N + n];
0170   c[3 * N + n] = a[0 * N + n] * b[3 * N + n];
0171   c[4 * N + n] = a[2 * N + n];
0172   c[5 * N + n] = a[0 * N + n] * b[5 * N + n] + a[1 * N + n] * b[11 * N + n];
0173   c[6 * N + n] = a[8 * N + n] * b[18 * N + n] + a[9 * N + n] * b[24 * N + n];
0174   c[7 * N + n] = a[8 * N + n] * b[19 * N + n] + a[9 * N + n] * b[25 * N + n];
0175   c[8 * N + n] = a[9 * N + n] * b[26 * N + n];
0176   c[9 * N + n] = a[5 * N + n] * b[3 * N + n];
0177   c[10 * N + n] = a[7 * N + n];
0178   c[11 * N + n] = a[5 * N + n] * b[5 * N + n] + a[6 * N + n] * b[11 * N + n];
0179   c[12 * N + n] = a[13 * N + n] * b[18 * N + n] + a[14 * N + n] * b[24 * N + n];
0180   c[13 * N + n] = a[13 * N + n] * b[19 * N + n] + a[14 * N + n] * b[25 * N + n];
0181   c[14 * N + n] = a[14 * N + n] * b[26 * N + n];
0182   c[15 * N + n] = a[10 * N + n] * b[3 * N + n];
0183   c[16 * N + n] = a[12 * N + n];
0184   c[17 * N + n] = a[10 * N + n] * b[5 * N + n] + a[11 * N + n] * b[11 * N + n];
0185   c[18 * N + n] = a[18 * N + n] * b[18 * N + n] + a[19 * N + n] * b[24 * N + n];
0186   c[19 * N + n] = a[18 * N + n] * b[19 * N + n] + a[19 * N + n] * b[25 * N + n];
0187   c[20 * N + n] = a[19 * N + n] * b[26 * N + n];
0188   c[21 * N + n] = a[15 * N + n] * b[3 * N + n];
0189   c[22 * N + n] = a[17 * N + n];
0190   c[23 * N + n] = a[15 * N + n] * b[5 * N + n] + a[16 * N + n] * b[11 * N + n];
0191   c[24 * N + n] = a[23 * N + n] * b[18 * N + n] + a[24 * N + n] * b[24 * N + n];
0192   c[25 * N + n] = a[23 * N + n] * b[19 * N + n] + a[24 * N + n] * b[25 * N + n];
0193   c[26 * N + n] = a[24 * N + n] * b[26 * N + n];
0194   c[27 * N + n] = a[20 * N + n] * b[3 * N + n];
0195   c[28 * N + n] = a[22 * N + n];
0196   c[29 * N + n] = a[20 * N + n] * b[5 * N + n] + a[21 * N + n] * b[11 * N + n];
0197   c[30 * N + n] = a[28 * N + n] * b[18 * N + n] + a[29 * N + n] * b[24 * N + n];
0198   c[31 * N + n] = a[28 * N + n] * b[19 * N + n] + a[29 * N + n] * b[25 * N + n];
0199   c[32 * N + n] = a[29 * N + n] * b[26 * N + n];
0200   c[33 * N + n] = a[25 * N + n] * b[3 * N + n];
0201   c[34 * N + n] = a[27 * N + n];
0202   c[35 * N + n] = a[25 * N + n] * b[5 * N + n] + a[26 * N + n] * b[11 * N + n];
0203 }
0204 #endif