Back to home page

Project CMSSW displayed by LXR

 
 

    


Warning, /RecoTracker/MkFitCore/src/KalmanHTG.ah is written in an unsupported language. File is not indexed.

0001 #ifdef MPLEX_INTRINSICS
0002 
0003    for (int n = 0; n < N; n += MPLEX_INTRINSICS_WIDTH_BYTES / sizeof(T))
0004    {
0005       IntrVec_t a_0 = LD(a, 0);
0006       IntrVec_t b_0 = LD(b, 0);
0007       IntrVec_t c_0 = MUL(a_0, b_0);
0008       IntrVec_t b_1 = LD(b, 1);
0009       IntrVec_t c_1 = MUL(a_0, b_1);
0010       IntrVec_t b_3 = LD(b, 3);
0011       IntrVec_t c_2 = MUL(a_0, b_3);
0012 
0013 
0014       IntrVec_t a_2 = LD(a, 2);
0015       c_0 = FMA(a_2, b_3, c_0);
0016       IntrVec_t b_4 = LD(b, 4);
0017       c_1 = FMA(a_2, b_4, c_1);
0018       IntrVec_t b_5 = LD(b, 5);
0019       c_2 = FMA(a_2, b_5, c_2);
0020       ST(c, 0, c_0);
0021 
0022       IntrVec_t a_3 = LD(a, 3);
0023       IntrVec_t c_3 = MUL(a_3, b_0);
0024       ST(c, 1, c_1);
0025       ST(c, 2, c_2);
0026       IntrVec_t c_4 = MUL(a_3, b_1);
0027       IntrVec_t c_5 = MUL(a_3, b_3);
0028 
0029 
0030       IntrVec_t a_5 = LD(a, 5);
0031       c_3 = FMA(a_5, b_3, c_3);
0032       c_4 = FMA(a_5, b_4, c_4);
0033       c_5 = FMA(a_5, b_5, c_5);
0034 
0035 
0036       IntrVec_t c_6 = b_1;
0037       IntrVec_t b_2 = LD(b, 2);
0038       IntrVec_t c_7 = b_2;
0039       ST(c, 3, c_3);
0040       ST(c, 4, c_4);
0041       ST(c, 5, c_5);
0042       IntrVec_t c_8 = b_4;
0043 
0044       ST(c, 6, c_6);
0045       ST(c, 7, c_7);
0046       ST(c, 8, c_8);
0047    }
0048 
0049 #else
0050 
0051 #pragma omp simd
0052    for (int n = 0; n < N; ++n)
0053    {
0054       c[ 0*N+n] = a[ 0*N+n]*b[ 0*N+n] + a[ 2*N+n]*b[ 3*N+n];
0055       c[ 1*N+n] = a[ 0*N+n]*b[ 1*N+n] + a[ 2*N+n]*b[ 4*N+n];
0056       c[ 2*N+n] = a[ 0*N+n]*b[ 3*N+n] + a[ 2*N+n]*b[ 5*N+n];
0057       c[ 3*N+n] = a[ 3*N+n]*b[ 0*N+n] + a[ 5*N+n]*b[ 3*N+n];
0058       c[ 4*N+n] = a[ 3*N+n]*b[ 1*N+n] + a[ 5*N+n]*b[ 4*N+n];
0059       c[ 5*N+n] = a[ 3*N+n]*b[ 3*N+n] + a[ 5*N+n]*b[ 5*N+n];
0060       c[ 6*N+n] = b[ 1*N+n];
0061       c[ 7*N+n] = b[ 2*N+n];
0062       c[ 8*N+n] = b[ 4*N+n];
0063    }
0064 #endif