Back to home page

Project CMSSW displayed by LXR

 
 

    


Warning, /RecoTracker/MkFitCore/src/ProjectResErr.ah is written in an unsupported language. File is not indexed.

0001 #ifdef MPLEX_INTRINSICS
0002 
0003    for (int n = 0; n < N; n += MPLEX_INTRINSICS_WIDTH_BYTES / sizeof(T))
0004    {
0005       IntrVec_t a_0 = LD(a, 0);
0006       IntrVec_t b_0 = LD(b, 0);
0007       IntrVec_t c_0 = MUL(a_0, b_0);
0008       IntrVec_t b_1 = LD(b, 1);
0009       IntrVec_t c_1 = MUL(a_0, b_1);
0010       IntrVec_t b_3 = LD(b, 3);
0011       IntrVec_t c_2 = MUL(a_0, b_3);
0012 
0013       IntrVec_t a_1 = LD(a, 1);
0014       c_0 = FMA(a_1, b_1, c_0);
0015       IntrVec_t b_2 = LD(b, 2);
0016       c_1 = FMA(a_1, b_2, c_1);
0017       IntrVec_t b_4 = LD(b, 4);
0018       c_2 = FMA(a_1, b_4, c_2);
0019 
0020 
0021 
0022 
0023       IntrVec_t c_3 = b_3;
0024       IntrVec_t c_4 = b_4;
0025       IntrVec_t b_5 = LD(b, 5);
0026       IntrVec_t c_5 = b_5;
0027       ST(c, 0, c_0);
0028       ST(c, 1, c_1);
0029       ST(c, 2, c_2);
0030       ST(c, 3, c_3);
0031       ST(c, 4, c_4);
0032       ST(c, 5, c_5);
0033 
0034       IntrVec_t a_6 = LD(a, 6);
0035       IntrVec_t c_6 = MUL(a_6, b_0);
0036       IntrVec_t c_7 = MUL(a_6, b_1);
0037       IntrVec_t c_8 = MUL(a_6, b_3);
0038 
0039       IntrVec_t a_7 = LD(a, 7);
0040       c_6 = FMA(a_7, b_1, c_6);
0041       c_7 = FMA(a_7, b_2, c_7);
0042       c_8 = FMA(a_7, b_4, c_8);
0043 
0044       ST(c, 6, c_6);
0045       ST(c, 7, c_7);
0046       ST(c, 8, c_8);
0047    }
0048 
0049 #else
0050 
0051 #pragma omp simd
0052    for (int n = 0; n < N; ++n)
0053    {
0054       c[ 0*N+n] = a[ 0*N+n]*b[ 0*N+n] + a[ 1*N+n]*b[ 1*N+n];
0055       c[ 1*N+n] = a[ 0*N+n]*b[ 1*N+n] + a[ 1*N+n]*b[ 2*N+n];
0056       c[ 2*N+n] = a[ 0*N+n]*b[ 3*N+n] + a[ 1*N+n]*b[ 4*N+n];
0057       c[ 3*N+n] = b[ 3*N+n];
0058       c[ 4*N+n] = b[ 4*N+n];
0059       c[ 5*N+n] = b[ 5*N+n];
0060       c[ 6*N+n] = a[ 6*N+n]*b[ 0*N+n] + a[ 7*N+n]*b[ 1*N+n];
0061       c[ 7*N+n] = a[ 6*N+n]*b[ 1*N+n] + a[ 7*N+n]*b[ 2*N+n];
0062       c[ 8*N+n] = a[ 6*N+n]*b[ 3*N+n] + a[ 7*N+n]*b[ 4*N+n];
0063    }
0064 #endif