Warning, /RecoTracker/MkFitCore/src/CCSErr.ah is written in an unsupported language. File is not indexed.
0001 #ifdef MPLEX_INTRINSICS
0002
0003 for (int n = 0; n < N; n += MPLEX_INTRINSICS_WIDTH_BYTES / sizeof(T))
0004 {
0005 IntrVec_t b_0 = LD(b, 0);
0006 IntrVec_t c_0 = b_0;
0007 IntrVec_t b_1 = LD(b, 1);
0008 IntrVec_t c_1 = b_1;
0009 IntrVec_t b_3 = LD(b, 3);
0010 IntrVec_t c_2 = b_3;
0011 IntrVec_t b_6 = LD(b, 6);
0012 IntrVec_t c_3 = b_6;
0013 IntrVec_t b_10 = LD(b, 10);
0014 IntrVec_t c_4 = b_10;
0015 IntrVec_t b_15 = LD(b, 15);
0016 IntrVec_t c_5 = b_15;
0017
0018
0019
0020
0021
0022
0023
0024 IntrVec_t c_6 = b_1;
0025 IntrVec_t b_2 = LD(b, 2);
0026 IntrVec_t c_7 = b_2;
0027 IntrVec_t b_4 = LD(b, 4);
0028 IntrVec_t c_8 = b_4;
0029 ST(c, 0, c_0);
0030 ST(c, 1, c_1);
0031 ST(c, 2, c_2);
0032 ST(c, 3, c_3);
0033 ST(c, 4, c_4);
0034 ST(c, 5, c_5);
0035 IntrVec_t b_7 = LD(b, 7);
0036 IntrVec_t c_9 = b_7;
0037 IntrVec_t b_11 = LD(b, 11);
0038 IntrVec_t c_10 = b_11;
0039 IntrVec_t b_16 = LD(b, 16);
0040 IntrVec_t c_11 = b_16;
0041
0042
0043
0044
0045
0046
0047
0048 IntrVec_t c_12 = b_3;
0049 IntrVec_t c_13 = b_4;
0050 IntrVec_t b_5 = LD(b, 5);
0051 IntrVec_t c_14 = b_5;
0052 ST(c, 6, c_6);
0053 ST(c, 7, c_7);
0054 ST(c, 8, c_8);
0055 ST(c, 9, c_9);
0056 ST(c, 10, c_10);
0057 ST(c, 11, c_11);
0058 IntrVec_t b_8 = LD(b, 8);
0059 IntrVec_t c_15 = b_8;
0060 IntrVec_t b_12 = LD(b, 12);
0061 IntrVec_t c_16 = b_12;
0062 IntrVec_t b_17 = LD(b, 17);
0063 IntrVec_t c_17 = b_17;
0064
0065
0066
0067
0068
0069
0070
0071 IntrVec_t a_21 = LD(a, 21);
0072 IntrVec_t c_18 = MUL(a_21, b_6);
0073 IntrVec_t c_19 = MUL(a_21, b_7);
0074 IntrVec_t c_20 = MUL(a_21, b_8);
0075 ST(c, 12, c_12);
0076 ST(c, 13, c_13);
0077 ST(c, 14, c_14);
0078 ST(c, 15, c_15);
0079 ST(c, 16, c_16);
0080 ST(c, 17, c_17);
0081 IntrVec_t b_9 = LD(b, 9);
0082 IntrVec_t c_21 = MUL(a_21, b_9);
0083 IntrVec_t b_13 = LD(b, 13);
0084 IntrVec_t c_22 = MUL(a_21, b_13);
0085 IntrVec_t b_18 = LD(b, 18);
0086 IntrVec_t c_23 = MUL(a_21, b_18);
0087
0088 IntrVec_t a_22 = LD(a, 22);
0089 c_18 = FMA(a_22, b_10, c_18);
0090 c_19 = FMA(a_22, b_11, c_19);
0091 c_20 = FMA(a_22, b_12, c_20);
0092 c_21 = FMA(a_22, b_13, c_21);
0093 IntrVec_t b_14 = LD(b, 14);
0094 c_22 = FMA(a_22, b_14, c_22);
0095 IntrVec_t b_19 = LD(b, 19);
0096 c_23 = FMA(a_22, b_19, c_23);
0097
0098
0099
0100
0101
0102 IntrVec_t a_27 = LD(a, 27);
0103 IntrVec_t c_24 = MUL(a_27, b_6);
0104 IntrVec_t c_25 = MUL(a_27, b_7);
0105 IntrVec_t c_26 = MUL(a_27, b_8);
0106 ST(c, 18, c_18);
0107 ST(c, 19, c_19);
0108 ST(c, 20, c_20);
0109 ST(c, 21, c_21);
0110 ST(c, 22, c_22);
0111 ST(c, 23, c_23);
0112 IntrVec_t c_27 = MUL(a_27, b_9);
0113 IntrVec_t c_28 = MUL(a_27, b_13);
0114 IntrVec_t c_29 = MUL(a_27, b_18);
0115
0116 IntrVec_t a_28 = LD(a, 28);
0117 c_24 = FMA(a_28, b_10, c_24);
0118 c_25 = FMA(a_28, b_11, c_25);
0119 c_26 = FMA(a_28, b_12, c_26);
0120 c_27 = FMA(a_28, b_13, c_27);
0121 c_28 = FMA(a_28, b_14, c_28);
0122 c_29 = FMA(a_28, b_19, c_29);
0123
0124
0125
0126
0127
0128 IntrVec_t a_33 = LD(a, 33);
0129 IntrVec_t c_30 = MUL(a_33, b_6);
0130 IntrVec_t c_31 = MUL(a_33, b_7);
0131 IntrVec_t c_32 = MUL(a_33, b_8);
0132 ST(c, 24, c_24);
0133 ST(c, 25, c_25);
0134 ST(c, 26, c_26);
0135 ST(c, 27, c_27);
0136 ST(c, 28, c_28);
0137 ST(c, 29, c_29);
0138 IntrVec_t c_33 = MUL(a_33, b_9);
0139 IntrVec_t c_34 = MUL(a_33, b_13);
0140 IntrVec_t c_35 = MUL(a_33, b_18);
0141
0142 IntrVec_t a_34 = LD(a, 34);
0143 c_30 = FMA(a_34, b_10, c_30);
0144 c_31 = FMA(a_34, b_11, c_31);
0145 c_32 = FMA(a_34, b_12, c_32);
0146 c_33 = FMA(a_34, b_13, c_33);
0147 c_34 = FMA(a_34, b_14, c_34);
0148 c_35 = FMA(a_34, b_19, c_35);
0149
0150 IntrVec_t a_35 = LD(a, 35);
0151 c_30 = FMA(a_35, b_15, c_30);
0152 c_31 = FMA(a_35, b_16, c_31);
0153 c_32 = FMA(a_35, b_17, c_32);
0154 c_33 = FMA(a_35, b_18, c_33);
0155 c_34 = FMA(a_35, b_19, c_34);
0156 ST(c, 30, c_30);
0157 ST(c, 31, c_31);
0158 ST(c, 32, c_32);
0159 ST(c, 33, c_33);
0160 ST(c, 34, c_34);
0161 IntrVec_t b_20 = LD(b, 20);
0162 c_35 = FMA(a_35, b_20, c_35);
0163 ST(c, 35, c_35);
0164 }
0165
0166 #else
0167
0168 #pragma omp simd
0169 for (int n = 0; n < N; ++n)
0170 {
0171 c[ 0*N+n] = b[ 0*N+n];
0172 c[ 1*N+n] = b[ 1*N+n];
0173 c[ 2*N+n] = b[ 3*N+n];
0174 c[ 3*N+n] = b[ 6*N+n];
0175 c[ 4*N+n] = b[10*N+n];
0176 c[ 5*N+n] = b[15*N+n];
0177 c[ 6*N+n] = b[ 1*N+n];
0178 c[ 7*N+n] = b[ 2*N+n];
0179 c[ 8*N+n] = b[ 4*N+n];
0180 c[ 9*N+n] = b[ 7*N+n];
0181 c[10*N+n] = b[11*N+n];
0182 c[11*N+n] = b[16*N+n];
0183 c[12*N+n] = b[ 3*N+n];
0184 c[13*N+n] = b[ 4*N+n];
0185 c[14*N+n] = b[ 5*N+n];
0186 c[15*N+n] = b[ 8*N+n];
0187 c[16*N+n] = b[12*N+n];
0188 c[17*N+n] = b[17*N+n];
0189 c[18*N+n] = a[21*N+n]*b[ 6*N+n] + a[22*N+n]*b[10*N+n];
0190 c[19*N+n] = a[21*N+n]*b[ 7*N+n] + a[22*N+n]*b[11*N+n];
0191 c[20*N+n] = a[21*N+n]*b[ 8*N+n] + a[22*N+n]*b[12*N+n];
0192 c[21*N+n] = a[21*N+n]*b[ 9*N+n] + a[22*N+n]*b[13*N+n];
0193 c[22*N+n] = a[21*N+n]*b[13*N+n] + a[22*N+n]*b[14*N+n];
0194 c[23*N+n] = a[21*N+n]*b[18*N+n] + a[22*N+n]*b[19*N+n];
0195 c[24*N+n] = a[27*N+n]*b[ 6*N+n] + a[28*N+n]*b[10*N+n];
0196 c[25*N+n] = a[27*N+n]*b[ 7*N+n] + a[28*N+n]*b[11*N+n];
0197 c[26*N+n] = a[27*N+n]*b[ 8*N+n] + a[28*N+n]*b[12*N+n];
0198 c[27*N+n] = a[27*N+n]*b[ 9*N+n] + a[28*N+n]*b[13*N+n];
0199 c[28*N+n] = a[27*N+n]*b[13*N+n] + a[28*N+n]*b[14*N+n];
0200 c[29*N+n] = a[27*N+n]*b[18*N+n] + a[28*N+n]*b[19*N+n];
0201 c[30*N+n] = a[33*N+n]*b[ 6*N+n] + a[34*N+n]*b[10*N+n] + a[35*N+n]*b[15*N+n];
0202 c[31*N+n] = a[33*N+n]*b[ 7*N+n] + a[34*N+n]*b[11*N+n] + a[35*N+n]*b[16*N+n];
0203 c[32*N+n] = a[33*N+n]*b[ 8*N+n] + a[34*N+n]*b[12*N+n] + a[35*N+n]*b[17*N+n];
0204 c[33*N+n] = a[33*N+n]*b[ 9*N+n] + a[34*N+n]*b[13*N+n] + a[35*N+n]*b[18*N+n];
0205 c[34*N+n] = a[33*N+n]*b[13*N+n] + a[34*N+n]*b[14*N+n] + a[35*N+n]*b[19*N+n];
0206 c[35*N+n] = a[33*N+n]*b[18*N+n] + a[34*N+n]*b[19*N+n] + a[35*N+n]*b[20*N+n];
0207 }
0208 #endif