Back to home page

Project CMSSW displayed by LXR

 
 

    


Warning, /RecoTracker/MkFitCore/src/upParam_kalmanGain_x_propErr.ah is written in an unsupported language. File is not indexed.

0001 #ifdef MPLEX_INTRINSICS
0002 
0003    for (int n = 0; n < N; n += MPLEX_INTRINSICS_WIDTH_BYTES / sizeof(T))
0004    {
0005       IntrVec_t a_0 = LD(a, 0);
0006       IntrVec_t b_0 = LD(b, 0);
0007       IntrVec_t c_0 = MUL(a_0, b_0);
0008 
0009       IntrVec_t a_1 = LD(a, 1);
0010       IntrVec_t b_1 = LD(b, 1);
0011       c_0 = FMA(a_1, b_1, c_0);
0012 
0013       IntrVec_t a_2 = LD(a, 2);
0014       IntrVec_t b_3 = LD(b, 3);
0015       c_0 = FMA(a_2, b_3, c_0);
0016 
0017       IntrVec_t a_3 = LD(a, 3);
0018       IntrVec_t c_1 = MUL(a_3, b_0);
0019       IntrVec_t c_2 = MUL(a_3, b_1);
0020 
0021       IntrVec_t a_4 = LD(a, 4);
0022       c_1 = FMA(a_4, b_1, c_1);
0023       ST(c, 0, c_0);
0024       IntrVec_t b_2 = LD(b, 2);
0025       c_2 = FMA(a_4, b_2, c_2);
0026 
0027       IntrVec_t a_5 = LD(a, 5);
0028       c_1 = FMA(a_5, b_3, c_1);
0029       IntrVec_t b_4 = LD(b, 4);
0030       c_2 = FMA(a_5, b_4, c_2);
0031 
0032       IntrVec_t a_6 = LD(a, 6);
0033       IntrVec_t c_3 = MUL(a_6, b_0);
0034       ST(c, 1, c_1);
0035       IntrVec_t c_4 = MUL(a_6, b_1);
0036       ST(c, 2, c_2);
0037       IntrVec_t c_5 = MUL(a_6, b_3);
0038 
0039       IntrVec_t a_7 = LD(a, 7);
0040       c_3 = FMA(a_7, b_1, c_3);
0041       c_4 = FMA(a_7, b_2, c_4);
0042       c_5 = FMA(a_7, b_4, c_5);
0043 
0044       IntrVec_t a_8 = LD(a, 8);
0045       c_3 = FMA(a_8, b_3, c_3);
0046       c_4 = FMA(a_8, b_4, c_4);
0047       IntrVec_t b_5 = LD(b, 5);
0048       c_5 = FMA(a_8, b_5, c_5);
0049 
0050       IntrVec_t a_9 = LD(a, 9);
0051       IntrVec_t c_6 = MUL(a_9, b_0);
0052       ST(c, 3, c_3);
0053       ST(c, 4, c_4);
0054       ST(c, 5, c_5);
0055       IntrVec_t c_7 = MUL(a_9, b_1);
0056       IntrVec_t c_8 = MUL(a_9, b_3);
0057       IntrVec_t b_6 = LD(b, 6);
0058       IntrVec_t c_9 = MUL(a_9, b_6);
0059 
0060       IntrVec_t a_10 = LD(a, 10);
0061       c_6 = FMA(a_10, b_1, c_6);
0062       c_7 = FMA(a_10, b_2, c_7);
0063       c_8 = FMA(a_10, b_4, c_8);
0064       IntrVec_t b_7 = LD(b, 7);
0065       c_9 = FMA(a_10, b_7, c_9);
0066 
0067       IntrVec_t a_11 = LD(a, 11);
0068       c_6 = FMA(a_11, b_3, c_6);
0069       c_7 = FMA(a_11, b_4, c_7);
0070       c_8 = FMA(a_11, b_5, c_8);
0071       IntrVec_t b_8 = LD(b, 8);
0072       c_9 = FMA(a_11, b_8, c_9);
0073       ST(c, 6, c_6);
0074       ST(c, 7, c_7);
0075       ST(c, 8, c_8);
0076 
0077       IntrVec_t a_12 = LD(a, 12);
0078       IntrVec_t c_10 = MUL(a_12, b_0);
0079       ST(c, 9, c_9);
0080       IntrVec_t c_11 = MUL(a_12, b_1);
0081       IntrVec_t c_12 = MUL(a_12, b_3);
0082       IntrVec_t c_13 = MUL(a_12, b_6);
0083       IntrVec_t b_10 = LD(b, 10);
0084       IntrVec_t c_14 = MUL(a_12, b_10);
0085 
0086       IntrVec_t a_13 = LD(a, 13);
0087       c_10 = FMA(a_13, b_1, c_10);
0088       c_11 = FMA(a_13, b_2, c_11);
0089       c_12 = FMA(a_13, b_4, c_12);
0090       c_13 = FMA(a_13, b_7, c_13);
0091       IntrVec_t b_11 = LD(b, 11);
0092       c_14 = FMA(a_13, b_11, c_14);
0093 
0094       IntrVec_t a_14 = LD(a, 14);
0095       c_10 = FMA(a_14, b_3, c_10);
0096       c_11 = FMA(a_14, b_4, c_11);
0097       c_12 = FMA(a_14, b_5, c_12);
0098       c_13 = FMA(a_14, b_8, c_13);
0099       IntrVec_t b_12 = LD(b, 12);
0100       c_14 = FMA(a_14, b_12, c_14);
0101       ST(c, 10, c_10);
0102       ST(c, 11, c_11);
0103       ST(c, 12, c_12);
0104       ST(c, 13, c_13);
0105       ST(c, 14, c_14);
0106 
0107       IntrVec_t a_15 = LD(a, 15);
0108       IntrVec_t c_15 = MUL(a_15, b_0);
0109       IntrVec_t c_16 = MUL(a_15, b_1);
0110       IntrVec_t c_17 = MUL(a_15, b_3);
0111       IntrVec_t c_18 = MUL(a_15, b_6);
0112       IntrVec_t c_19 = MUL(a_15, b_10);
0113       IntrVec_t b_15 = LD(b, 15);
0114       IntrVec_t c_20 = MUL(a_15, b_15);
0115 
0116       IntrVec_t a_16 = LD(a, 16);
0117       c_15 = FMA(a_16, b_1, c_15);
0118       c_16 = FMA(a_16, b_2, c_16);
0119       c_17 = FMA(a_16, b_4, c_17);
0120       c_18 = FMA(a_16, b_7, c_18);
0121       c_19 = FMA(a_16, b_11, c_19);
0122       IntrVec_t b_16 = LD(b, 16);
0123       c_20 = FMA(a_16, b_16, c_20);
0124 
0125       IntrVec_t a_17 = LD(a, 17);
0126       c_15 = FMA(a_17, b_3, c_15);
0127       c_16 = FMA(a_17, b_4, c_16);
0128       c_17 = FMA(a_17, b_5, c_17);
0129       c_18 = FMA(a_17, b_8, c_18);
0130       c_19 = FMA(a_17, b_12, c_19);
0131       ST(c, 15, c_15);
0132       ST(c, 16, c_16);
0133       ST(c, 17, c_17);
0134       ST(c, 18, c_18);
0135       ST(c, 19, c_19);
0136       IntrVec_t b_17 = LD(b, 17);
0137       c_20 = FMA(a_17, b_17, c_20);
0138       ST(c, 20, c_20);
0139    }
0140 
0141 #else
0142 
0143 #pragma omp simd
0144    for (int n = 0; n < N; ++n)
0145    {
0146       c[ 0*N+n] = a[ 0*N+n]*b[ 0*N+n] + a[ 1*N+n]*b[ 1*N+n] + a[ 2*N+n]*b[ 3*N+n];
0147       c[ 1*N+n] = a[ 3*N+n]*b[ 0*N+n] + a[ 4*N+n]*b[ 1*N+n] + a[ 5*N+n]*b[ 3*N+n];
0148       c[ 2*N+n] = a[ 3*N+n]*b[ 1*N+n] + a[ 4*N+n]*b[ 2*N+n] + a[ 5*N+n]*b[ 4*N+n];
0149       c[ 3*N+n] = a[ 6*N+n]*b[ 0*N+n] + a[ 7*N+n]*b[ 1*N+n] + a[ 8*N+n]*b[ 3*N+n];
0150       c[ 4*N+n] = a[ 6*N+n]*b[ 1*N+n] + a[ 7*N+n]*b[ 2*N+n] + a[ 8*N+n]*b[ 4*N+n];
0151       c[ 5*N+n] = a[ 6*N+n]*b[ 3*N+n] + a[ 7*N+n]*b[ 4*N+n] + a[ 8*N+n]*b[ 5*N+n];
0152       c[ 6*N+n] = a[ 9*N+n]*b[ 0*N+n] + a[10*N+n]*b[ 1*N+n] + a[11*N+n]*b[ 3*N+n];
0153       c[ 7*N+n] = a[ 9*N+n]*b[ 1*N+n] + a[10*N+n]*b[ 2*N+n] + a[11*N+n]*b[ 4*N+n];
0154       c[ 8*N+n] = a[ 9*N+n]*b[ 3*N+n] + a[10*N+n]*b[ 4*N+n] + a[11*N+n]*b[ 5*N+n];
0155       c[ 9*N+n] = a[ 9*N+n]*b[ 6*N+n] + a[10*N+n]*b[ 7*N+n] + a[11*N+n]*b[ 8*N+n];
0156       c[10*N+n] = a[12*N+n]*b[ 0*N+n] + a[13*N+n]*b[ 1*N+n] + a[14*N+n]*b[ 3*N+n];
0157       c[11*N+n] = a[12*N+n]*b[ 1*N+n] + a[13*N+n]*b[ 2*N+n] + a[14*N+n]*b[ 4*N+n];
0158       c[12*N+n] = a[12*N+n]*b[ 3*N+n] + a[13*N+n]*b[ 4*N+n] + a[14*N+n]*b[ 5*N+n];
0159       c[13*N+n] = a[12*N+n]*b[ 6*N+n] + a[13*N+n]*b[ 7*N+n] + a[14*N+n]*b[ 8*N+n];
0160       c[14*N+n] = a[12*N+n]*b[10*N+n] + a[13*N+n]*b[11*N+n] + a[14*N+n]*b[12*N+n];
0161       c[15*N+n] = a[15*N+n]*b[ 0*N+n] + a[16*N+n]*b[ 1*N+n] + a[17*N+n]*b[ 3*N+n];
0162       c[16*N+n] = a[15*N+n]*b[ 1*N+n] + a[16*N+n]*b[ 2*N+n] + a[17*N+n]*b[ 4*N+n];
0163       c[17*N+n] = a[15*N+n]*b[ 3*N+n] + a[16*N+n]*b[ 4*N+n] + a[17*N+n]*b[ 5*N+n];
0164       c[18*N+n] = a[15*N+n]*b[ 6*N+n] + a[16*N+n]*b[ 7*N+n] + a[17*N+n]*b[ 8*N+n];
0165       c[19*N+n] = a[15*N+n]*b[10*N+n] + a[16*N+n]*b[11*N+n] + a[17*N+n]*b[12*N+n];
0166       c[20*N+n] = a[15*N+n]*b[15*N+n] + a[16*N+n]*b[16*N+n] + a[17*N+n]*b[17*N+n];
0167    }
0168 #endif