Warning, /RecoTracker/MkFitCore/src/JacErrPropCurv1.ah is written in an unsupported language. File is not indexed.
0001 #ifdef MPLEX_INTRINSICS
0002
0003 for (int n = 0; n < N; n += MPLEX_INTRINSICS_WIDTH_BYTES / sizeof(T)) {
0004 #ifdef AVX512_INTRINSICS
0005 IntrVec_t all_zeros = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
0006 #else
0007 IntrVec_t all_zeros = {0, 0, 0, 0, 0, 0, 0, 0};
0008 #endif
0009
0010 IntrVec_t a_3 = LD(a, 3);
0011 IntrVec_t b_15 = LD(b, 15);
0012 IntrVec_t c_0 = MUL(a_3, b_15);
0013 IntrVec_t b_16 = LD(b, 16);
0014 IntrVec_t c_1 = MUL(a_3, b_16);
0015 IntrVec_t b_17 = LD(b, 17);
0016 IntrVec_t c_2 = MUL(a_3, b_17);
0017 IntrVec_t b_18 = LD(b, 18);
0018 IntrVec_t c_3 = MUL(a_3, b_18);
0019 IntrVec_t b_19 = LD(b, 19);
0020 IntrVec_t c_4 = MUL(a_3, b_19);
0021
0022 IntrVec_t a_4 = LD(a, 4);
0023 IntrVec_t b_20 = LD(b, 20);
0024 c_0 = FMA(a_4, b_20, c_0);
0025 IntrVec_t b_21 = LD(b, 21);
0026 c_1 = FMA(a_4, b_21, c_1);
0027 IntrVec_t b_22 = LD(b, 22);
0028 c_2 = FMA(a_4, b_22, c_2);
0029 ST(c, 0, c_0);
0030 IntrVec_t b_23 = LD(b, 23);
0031 c_3 = FMA(a_4, b_23, c_3);
0032 ST(c, 1, c_1);
0033 ST(c, 2, c_2);
0034 IntrVec_t b_24 = LD(b, 24);
0035 c_4 = FMA(a_4, b_24, c_4);
0036 ST(c, 3, c_3);
0037
0038 IntrVec_t a_8 = LD(a, 8);
0039 IntrVec_t c_5 = MUL(a_8, b_15);
0040 IntrVec_t c_6 = MUL(a_8, b_16);
0041 ST(c, 4, c_4);
0042 IntrVec_t c_7 = MUL(a_8, b_17);
0043 IntrVec_t c_8 = MUL(a_8, b_18);
0044 IntrVec_t c_9 = MUL(a_8, b_19);
0045
0046 IntrVec_t a_9 = LD(a, 9);
0047 c_5 = FMA(a_9, b_20, c_5);
0048 c_6 = FMA(a_9, b_21, c_6);
0049 c_7 = FMA(a_9, b_22, c_7);
0050 c_8 = FMA(a_9, b_23, c_8);
0051 c_9 = FMA(a_9, b_24, c_9);
0052 ST(c, 5, c_5);
0053 ST(c, 6, c_6);
0054 ST(c, 7, c_7);
0055 ST(c, 8, c_8);
0056 ST(c, 9, c_9);
0057
0058 IntrVec_t a_14 = LD(a, 14);
0059 IntrVec_t c_10 = MUL(a_14, b_20);
0060 IntrVec_t c_11 = MUL(a_14, b_21);
0061 IntrVec_t c_12 = MUL(a_14, b_22);
0062 IntrVec_t c_13 = MUL(a_14, b_23);
0063 IntrVec_t c_14 = MUL(a_14, b_24);
0064 ST(c, 10, c_10);
0065 ST(c, 11, c_11);
0066 ST(c, 12, c_12);
0067 ST(c, 13, c_13);
0068 ST(c, 14, c_14);
0069
0070 IntrVec_t a_15 = LD(a, 15);
0071 IntrVec_t c_15 = a_15;
0072
0073 IntrVec_t a_16 = LD(a, 16);
0074 IntrVec_t b_6 = LD(b, 6);
0075 IntrVec_t c_16 = MUL(a_16, b_6);
0076 IntrVec_t b_7 = LD(b, 7);
0077 IntrVec_t c_17 = MUL(a_16, b_7);
0078
0079 ST(c, 18, all_zeros);
0080 ST(c, 19, all_zeros);
0081
0082 IntrVec_t b_10 = LD(b, 10);
0083 IntrVec_t c_20 = b_10;
0084 IntrVec_t b_11 = LD(b, 11);
0085 IntrVec_t c_21 = b_11;
0086 ST(c, 15, c_15);
0087 ST(c, 16, c_16);
0088 ST(c, 17, c_17);
0089 IntrVec_t b_12 = LD(b, 12);
0090 IntrVec_t c_22 = b_12;
0091 IntrVec_t b_13 = LD(b, 13);
0092 IntrVec_t c_23 = b_13;
0093 IntrVec_t b_14 = LD(b, 14);
0094 IntrVec_t c_24 = b_14;
0095
0096 IntrVec_t a_26 = LD(a, 26);
0097 IntrVec_t c_26 = MUL(a_26, b_6);
0098 IntrVec_t c_27 = MUL(a_26, b_7);
0099
0100 ST(c, 25, all_zeros);
0101 ST(c, 28, all_zeros);
0102 ST(c, 29, all_zeros);
0103 ST(c, 20, c_20);
0104 ST(c, 21, c_21);
0105 ST(c, 22, c_22);
0106 ST(c, 23, c_23);
0107 ST(c, 24, c_24);
0108 ST(c, 26, c_26);
0109 ST(c, 27, c_27);
0110 }
0111
0112 #else
0113
0114 #pragma omp simd
0115 for (int n = 0; n < N; ++n) {
0116 c[0 * N + n] = a[3 * N + n] * b[15 * N + n] + a[4 * N + n] * b[20 * N + n];
0117 c[1 * N + n] = a[3 * N + n] * b[16 * N + n] + a[4 * N + n] * b[21 * N + n];
0118 c[2 * N + n] = a[3 * N + n] * b[17 * N + n] + a[4 * N + n] * b[22 * N + n];
0119 c[3 * N + n] = a[3 * N + n] * b[18 * N + n] + a[4 * N + n] * b[23 * N + n];
0120 c[4 * N + n] = a[3 * N + n] * b[19 * N + n] + a[4 * N + n] * b[24 * N + n];
0121 c[5 * N + n] = a[8 * N + n] * b[15 * N + n] + a[9 * N + n] * b[20 * N + n];
0122 c[6 * N + n] = a[8 * N + n] * b[16 * N + n] + a[9 * N + n] * b[21 * N + n];
0123 c[7 * N + n] = a[8 * N + n] * b[17 * N + n] + a[9 * N + n] * b[22 * N + n];
0124 c[8 * N + n] = a[8 * N + n] * b[18 * N + n] + a[9 * N + n] * b[23 * N + n];
0125 c[9 * N + n] = a[8 * N + n] * b[19 * N + n] + a[9 * N + n] * b[24 * N + n];
0126 c[10 * N + n] = a[14 * N + n] * b[20 * N + n];
0127 c[11 * N + n] = a[14 * N + n] * b[21 * N + n];
0128 c[12 * N + n] = a[14 * N + n] * b[22 * N + n];
0129 c[13 * N + n] = a[14 * N + n] * b[23 * N + n];
0130 c[14 * N + n] = a[14 * N + n] * b[24 * N + n];
0131 c[15 * N + n] = a[15 * N + n];
0132 c[16 * N + n] = a[16 * N + n] * b[6 * N + n];
0133 c[17 * N + n] = a[16 * N + n] * b[7 * N + n];
0134 c[18 * N + n] = 0;
0135 c[19 * N + n] = 0;
0136 c[20 * N + n] = b[10 * N + n];
0137 c[21 * N + n] = b[11 * N + n];
0138 c[22 * N + n] = b[12 * N + n];
0139 c[23 * N + n] = b[13 * N + n];
0140 c[24 * N + n] = b[14 * N + n];
0141 c[25 * N + n] = 0;
0142 c[26 * N + n] = a[26 * N + n] * b[6 * N + n];
0143 c[27 * N + n] = a[26 * N + n] * b[7 * N + n];
0144 c[28 * N + n] = 0;
0145 c[29 * N + n] = 0;
0146 }
0147 #endif