Back to home page

Project CMSSW displayed by LXR

 
 

    


Warning, /RecoTracker/MkFitCore/src/CCSErr.ah is written in an unsupported language. File is not indexed.

0001 #ifdef MPLEX_INTRINSICS
0002 
0003    for (int n = 0; n < N; n += MPLEX_INTRINSICS_WIDTH_BYTES / sizeof(T))
0004    {
0005       IntrVec_t b_0 = LD(b, 0);
0006       IntrVec_t c_0 = b_0;
0007       IntrVec_t b_1 = LD(b, 1);
0008       IntrVec_t c_1 = b_1;
0009       IntrVec_t b_3 = LD(b, 3);
0010       IntrVec_t c_2 = b_3;
0011       IntrVec_t b_6 = LD(b, 6);
0012       IntrVec_t c_3 = b_6;
0013       IntrVec_t b_10 = LD(b, 10);
0014       IntrVec_t c_4 = b_10;
0015       IntrVec_t b_15 = LD(b, 15);
0016       IntrVec_t c_5 = b_15;
0017 
0018 
0019 
0020 
0021 
0022 
0023 
0024       IntrVec_t c_6 = b_1;
0025       IntrVec_t b_2 = LD(b, 2);
0026       IntrVec_t c_7 = b_2;
0027       IntrVec_t b_4 = LD(b, 4);
0028       IntrVec_t c_8 = b_4;
0029       ST(c, 0, c_0);
0030       ST(c, 1, c_1);
0031       ST(c, 2, c_2);
0032       ST(c, 3, c_3);
0033       ST(c, 4, c_4);
0034       ST(c, 5, c_5);
0035       IntrVec_t b_7 = LD(b, 7);
0036       IntrVec_t c_9 = b_7;
0037       IntrVec_t b_11 = LD(b, 11);
0038       IntrVec_t c_10 = b_11;
0039       IntrVec_t b_16 = LD(b, 16);
0040       IntrVec_t c_11 = b_16;
0041 
0042 
0043 
0044 
0045 
0046 
0047 
0048       IntrVec_t c_12 = b_3;
0049       IntrVec_t c_13 = b_4;
0050       IntrVec_t b_5 = LD(b, 5);
0051       IntrVec_t c_14 = b_5;
0052       ST(c, 6, c_6);
0053       ST(c, 7, c_7);
0054       ST(c, 8, c_8);
0055       ST(c, 9, c_9);
0056       ST(c, 10, c_10);
0057       ST(c, 11, c_11);
0058       IntrVec_t b_8 = LD(b, 8);
0059       IntrVec_t c_15 = b_8;
0060       IntrVec_t b_12 = LD(b, 12);
0061       IntrVec_t c_16 = b_12;
0062       IntrVec_t b_17 = LD(b, 17);
0063       IntrVec_t c_17 = b_17;
0064 
0065 
0066 
0067 
0068 
0069 
0070 
0071       IntrVec_t a_21 = LD(a, 21);
0072       IntrVec_t c_18 = MUL(a_21, b_6);
0073       IntrVec_t c_19 = MUL(a_21, b_7);
0074       IntrVec_t c_20 = MUL(a_21, b_8);
0075       ST(c, 12, c_12);
0076       ST(c, 13, c_13);
0077       ST(c, 14, c_14);
0078       ST(c, 15, c_15);
0079       ST(c, 16, c_16);
0080       ST(c, 17, c_17);
0081       IntrVec_t b_9 = LD(b, 9);
0082       IntrVec_t c_21 = MUL(a_21, b_9);
0083       IntrVec_t b_13 = LD(b, 13);
0084       IntrVec_t c_22 = MUL(a_21, b_13);
0085       IntrVec_t b_18 = LD(b, 18);
0086       IntrVec_t c_23 = MUL(a_21, b_18);
0087 
0088       IntrVec_t a_22 = LD(a, 22);
0089       c_18 = FMA(a_22, b_10, c_18);
0090       c_19 = FMA(a_22, b_11, c_19);
0091       c_20 = FMA(a_22, b_12, c_20);
0092       c_21 = FMA(a_22, b_13, c_21);
0093       IntrVec_t b_14 = LD(b, 14);
0094       c_22 = FMA(a_22, b_14, c_22);
0095       IntrVec_t b_19 = LD(b, 19);
0096       c_23 = FMA(a_22, b_19, c_23);
0097 
0098 
0099 
0100 
0101 
0102       IntrVec_t a_27 = LD(a, 27);
0103       IntrVec_t c_24 = MUL(a_27, b_6);
0104       IntrVec_t c_25 = MUL(a_27, b_7);
0105       IntrVec_t c_26 = MUL(a_27, b_8);
0106       ST(c, 18, c_18);
0107       ST(c, 19, c_19);
0108       ST(c, 20, c_20);
0109       ST(c, 21, c_21);
0110       ST(c, 22, c_22);
0111       ST(c, 23, c_23);
0112       IntrVec_t c_27 = MUL(a_27, b_9);
0113       IntrVec_t c_28 = MUL(a_27, b_13);
0114       IntrVec_t c_29 = MUL(a_27, b_18);
0115 
0116       IntrVec_t a_28 = LD(a, 28);
0117       c_24 = FMA(a_28, b_10, c_24);
0118       c_25 = FMA(a_28, b_11, c_25);
0119       c_26 = FMA(a_28, b_12, c_26);
0120       c_27 = FMA(a_28, b_13, c_27);
0121       c_28 = FMA(a_28, b_14, c_28);
0122       c_29 = FMA(a_28, b_19, c_29);
0123 
0124 
0125 
0126 
0127 
0128       IntrVec_t a_33 = LD(a, 33);
0129       IntrVec_t c_30 = MUL(a_33, b_6);
0130       IntrVec_t c_31 = MUL(a_33, b_7);
0131       IntrVec_t c_32 = MUL(a_33, b_8);
0132       ST(c, 24, c_24);
0133       ST(c, 25, c_25);
0134       ST(c, 26, c_26);
0135       ST(c, 27, c_27);
0136       ST(c, 28, c_28);
0137       ST(c, 29, c_29);
0138       IntrVec_t c_33 = MUL(a_33, b_9);
0139       IntrVec_t c_34 = MUL(a_33, b_13);
0140       IntrVec_t c_35 = MUL(a_33, b_18);
0141 
0142       IntrVec_t a_34 = LD(a, 34);
0143       c_30 = FMA(a_34, b_10, c_30);
0144       c_31 = FMA(a_34, b_11, c_31);
0145       c_32 = FMA(a_34, b_12, c_32);
0146       c_33 = FMA(a_34, b_13, c_33);
0147       c_34 = FMA(a_34, b_14, c_34);
0148       c_35 = FMA(a_34, b_19, c_35);
0149 
0150       IntrVec_t a_35 = LD(a, 35);
0151       c_30 = FMA(a_35, b_15, c_30);
0152       c_31 = FMA(a_35, b_16, c_31);
0153       c_32 = FMA(a_35, b_17, c_32);
0154       c_33 = FMA(a_35, b_18, c_33);
0155       c_34 = FMA(a_35, b_19, c_34);
0156       ST(c, 30, c_30);
0157       ST(c, 31, c_31);
0158       ST(c, 32, c_32);
0159       ST(c, 33, c_33);
0160       ST(c, 34, c_34);
0161       IntrVec_t b_20 = LD(b, 20);
0162       c_35 = FMA(a_35, b_20, c_35);
0163       ST(c, 35, c_35);
0164    }
0165 
0166 #else
0167 
0168 #pragma omp simd
0169    for (int n = 0; n < N; ++n)
0170    {
0171       c[ 0*N+n] = b[ 0*N+n];
0172       c[ 1*N+n] = b[ 1*N+n];
0173       c[ 2*N+n] = b[ 3*N+n];
0174       c[ 3*N+n] = b[ 6*N+n];
0175       c[ 4*N+n] = b[10*N+n];
0176       c[ 5*N+n] = b[15*N+n];
0177       c[ 6*N+n] = b[ 1*N+n];
0178       c[ 7*N+n] = b[ 2*N+n];
0179       c[ 8*N+n] = b[ 4*N+n];
0180       c[ 9*N+n] = b[ 7*N+n];
0181       c[10*N+n] = b[11*N+n];
0182       c[11*N+n] = b[16*N+n];
0183       c[12*N+n] = b[ 3*N+n];
0184       c[13*N+n] = b[ 4*N+n];
0185       c[14*N+n] = b[ 5*N+n];
0186       c[15*N+n] = b[ 8*N+n];
0187       c[16*N+n] = b[12*N+n];
0188       c[17*N+n] = b[17*N+n];
0189       c[18*N+n] = a[21*N+n]*b[ 6*N+n] + a[22*N+n]*b[10*N+n];
0190       c[19*N+n] = a[21*N+n]*b[ 7*N+n] + a[22*N+n]*b[11*N+n];
0191       c[20*N+n] = a[21*N+n]*b[ 8*N+n] + a[22*N+n]*b[12*N+n];
0192       c[21*N+n] = a[21*N+n]*b[ 9*N+n] + a[22*N+n]*b[13*N+n];
0193       c[22*N+n] = a[21*N+n]*b[13*N+n] + a[22*N+n]*b[14*N+n];
0194       c[23*N+n] = a[21*N+n]*b[18*N+n] + a[22*N+n]*b[19*N+n];
0195       c[24*N+n] = a[27*N+n]*b[ 6*N+n] + a[28*N+n]*b[10*N+n];
0196       c[25*N+n] = a[27*N+n]*b[ 7*N+n] + a[28*N+n]*b[11*N+n];
0197       c[26*N+n] = a[27*N+n]*b[ 8*N+n] + a[28*N+n]*b[12*N+n];
0198       c[27*N+n] = a[27*N+n]*b[ 9*N+n] + a[28*N+n]*b[13*N+n];
0199       c[28*N+n] = a[27*N+n]*b[13*N+n] + a[28*N+n]*b[14*N+n];
0200       c[29*N+n] = a[27*N+n]*b[18*N+n] + a[28*N+n]*b[19*N+n];
0201       c[30*N+n] = a[33*N+n]*b[ 6*N+n] + a[34*N+n]*b[10*N+n] + a[35*N+n]*b[15*N+n];
0202       c[31*N+n] = a[33*N+n]*b[ 7*N+n] + a[34*N+n]*b[11*N+n] + a[35*N+n]*b[16*N+n];
0203       c[32*N+n] = a[33*N+n]*b[ 8*N+n] + a[34*N+n]*b[12*N+n] + a[35*N+n]*b[17*N+n];
0204       c[33*N+n] = a[33*N+n]*b[ 9*N+n] + a[34*N+n]*b[13*N+n] + a[35*N+n]*b[18*N+n];
0205       c[34*N+n] = a[33*N+n]*b[13*N+n] + a[34*N+n]*b[14*N+n] + a[35*N+n]*b[19*N+n];
0206       c[35*N+n] = a[33*N+n]*b[18*N+n] + a[34*N+n]*b[19*N+n] + a[35*N+n]*b[20*N+n];
0207    }
0208 #endif