Back to home page

Project CMSSW displayed by LXR

 
 

    


Warning, /RecoTracker/MkFitCore/src/CartesianErrTransp.ah is written in an unsupported language. File is not indexed.

0001 #ifdef MPLEX_INTRINSICS
0002 
0003    for (int n = 0; n < N; n += MPLEX_INTRINSICS_WIDTH_BYTES / sizeof(T))
0004    {
0005       IntrVec_t b_0 = LD(b, 0);
0006       IntrVec_t c_0 = b_0;
0007 
0008 
0009 
0010 
0011 
0012 
0013       IntrVec_t b_6 = LD(b, 6);
0014       IntrVec_t c_1 = b_6;
0015 
0016       IntrVec_t b_7 = LD(b, 7);
0017       IntrVec_t c_2 = b_7;
0018       ST(c, 0, c_0);
0019 
0020 
0021 
0022 
0023 
0024       IntrVec_t b_12 = LD(b, 12);
0025       IntrVec_t c_3 = b_12;
0026 
0027       IntrVec_t b_13 = LD(b, 13);
0028       IntrVec_t c_4 = b_13;
0029       ST(c, 1, c_1);
0030       ST(c, 2, c_2);
0031 
0032       IntrVec_t b_14 = LD(b, 14);
0033       IntrVec_t c_5 = b_14;
0034 
0035 
0036 
0037 
0038       IntrVec_t b_18 = LD(b, 18);
0039       IntrVec_t c_6 = b_18;
0040 
0041       IntrVec_t b_19 = LD(b, 19);
0042       IntrVec_t c_7 = b_19;
0043       ST(c, 3, c_3);
0044       ST(c, 4, c_4);
0045       ST(c, 5, c_5);
0046 
0047       IntrVec_t b_20 = LD(b, 20);
0048       IntrVec_t c_8 = b_20;
0049 
0050       IntrVec_t b_21 = LD(b, 21);
0051       IntrVec_t a_21 = LD(a, 21);
0052       IntrVec_t c_9 = MUL(b_21, a_21);
0053 
0054       IntrVec_t b_22 = LD(b, 22);
0055       IntrVec_t a_22 = LD(a, 22);
0056       c_9 = FMA(b_22, a_22, c_9);
0057 
0058 
0059       IntrVec_t b_24 = LD(b, 24);
0060       IntrVec_t c_10 = b_24;
0061 
0062       IntrVec_t b_25 = LD(b, 25);
0063       IntrVec_t c_11 = b_25;
0064       ST(c, 6, c_6);
0065       ST(c, 7, c_7);
0066       ST(c, 8, c_8);
0067       ST(c, 9, c_9);
0068 
0069       IntrVec_t b_26 = LD(b, 26);
0070       IntrVec_t c_12 = b_26;
0071 
0072       IntrVec_t b_27 = LD(b, 27);
0073       IntrVec_t c_13 = MUL(b_27, a_21);
0074       IntrVec_t a_27 = LD(a, 27);
0075       IntrVec_t c_14 = MUL(b_27, a_27);
0076 
0077       IntrVec_t b_28 = LD(b, 28);
0078       c_13 = FMA(b_28, a_22, c_13);
0079       IntrVec_t a_28 = LD(a, 28);
0080       c_14 = FMA(b_28, a_28, c_14);
0081 
0082 
0083       IntrVec_t b_30 = LD(b, 30);
0084       IntrVec_t c_15 = b_30;
0085 
0086       IntrVec_t b_31 = LD(b, 31);
0087       IntrVec_t c_16 = b_31;
0088       ST(c, 10, c_10);
0089       ST(c, 11, c_11);
0090       ST(c, 12, c_12);
0091       ST(c, 13, c_13);
0092       ST(c, 14, c_14);
0093 
0094       IntrVec_t b_32 = LD(b, 32);
0095       IntrVec_t c_17 = b_32;
0096 
0097       IntrVec_t b_33 = LD(b, 33);
0098       IntrVec_t c_18 = MUL(b_33, a_21);
0099       IntrVec_t c_19 = MUL(b_33, a_27);
0100       IntrVec_t a_33 = LD(a, 33);
0101       IntrVec_t c_20 = MUL(b_33, a_33);
0102 
0103       IntrVec_t b_34 = LD(b, 34);
0104       c_18 = FMA(b_34, a_22, c_18);
0105       c_19 = FMA(b_34, a_28, c_19);
0106 
0107       IntrVec_t b_35 = LD(b, 35);
0108       IntrVec_t a_35 = LD(a, 35);
0109       c_20 = FMA(b_35, a_35, c_20);
0110       ST(c, 15, c_15);
0111       ST(c, 16, c_16);
0112       ST(c, 17, c_17);
0113       ST(c, 18, c_18);
0114       ST(c, 19, c_19);
0115       ST(c, 20, c_20);
0116    }
0117 
0118 #else
0119 
0120 #pragma omp simd
0121    for (int n = 0; n < N; ++n)
0122    {
0123       c[ 0*N+n] = b[ 0*N+n];
0124       c[ 1*N+n] = b[ 6*N+n];
0125       c[ 2*N+n] = b[ 7*N+n];
0126       c[ 3*N+n] = b[12*N+n];
0127       c[ 4*N+n] = b[13*N+n];
0128       c[ 5*N+n] = b[14*N+n];
0129       c[ 6*N+n] = b[18*N+n];
0130       c[ 7*N+n] = b[19*N+n];
0131       c[ 8*N+n] = b[20*N+n];
0132       c[ 9*N+n] = b[21*N+n]*a[21*N+n] + b[22*N+n]*a[22*N+n];
0133       c[10*N+n] = b[24*N+n];
0134       c[11*N+n] = b[25*N+n];
0135       c[12*N+n] = b[26*N+n];
0136       c[13*N+n] = b[27*N+n]*a[21*N+n] + b[28*N+n]*a[22*N+n];
0137       c[14*N+n] = b[27*N+n]*a[27*N+n] + b[28*N+n]*a[28*N+n];
0138       c[15*N+n] = b[30*N+n];
0139       c[16*N+n] = b[31*N+n];
0140       c[17*N+n] = b[32*N+n];
0141       c[18*N+n] = b[33*N+n]*a[21*N+n] + b[34*N+n]*a[22*N+n];
0142       c[19*N+n] = b[33*N+n]*a[27*N+n] + b[34*N+n]*a[28*N+n];
0143       c[20*N+n] = b[33*N+n]*a[33*N+n] + b[35*N+n]*a[35*N+n];
0144    }
0145 #endif