Warning, /RecoTracker/MkFitCore/src/JacLoc2CCS.ah is written in an unsupported language. File is not indexed.
0001 #ifdef MPLEX_INTRINSICS
0002
0003 for (int n = 0; n < N; n += MPLEX_INTRINSICS_WIDTH_BYTES / sizeof(T)) {
0004 #ifdef AVX512_INTRINSICS
0005 IntrVec_t all_zeros = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
0006 #else
0007 IntrVec_t all_zeros = {0, 0, 0, 0, 0, 0, 0, 0};
0008 #endif
0009
0010 IntrVec_t a_3 = LD(a, 3);
0011 IntrVec_t b_18 = LD(b, 18);
0012 IntrVec_t c_3 = MUL(a_3, b_18);
0013 IntrVec_t b_19 = LD(b, 19);
0014 IntrVec_t c_4 = MUL(a_3, b_19);
0015
0016 ST(c, 0, all_zeros);
0017 ST(c, 1, all_zeros);
0018 ST(c, 2, all_zeros);
0019 IntrVec_t a_4 = LD(a, 4);
0020 IntrVec_t b_23 = LD(b, 23);
0021 c_3 = FMA(a_4, b_23, c_3);
0022 IntrVec_t b_24 = LD(b, 24);
0023 c_4 = FMA(a_4, b_24, c_4);
0024
0025 IntrVec_t a_8 = LD(a, 8);
0026 IntrVec_t c_8 = MUL(a_8, b_18);
0027 ST(c, 3, c_3);
0028 IntrVec_t c_9 = MUL(a_8, b_19);
0029 ST(c, 4, c_4);
0030
0031 ST(c, 5, all_zeros);
0032 ST(c, 6, all_zeros);
0033 ST(c, 7, all_zeros);
0034 IntrVec_t a_9 = LD(a, 9);
0035 c_8 = FMA(a_9, b_23, c_8);
0036 c_9 = FMA(a_9, b_24, c_9);
0037
0038 ST(c, 10, all_zeros);
0039 ST(c, 11, all_zeros);
0040 ST(c, 12, all_zeros);
0041 IntrVec_t a_14 = LD(a, 14);
0042 IntrVec_t c_13 = MUL(a_14, b_23);
0043 IntrVec_t c_14 = MUL(a_14, b_24);
0044 ST(c, 8, c_8);
0045 ST(c, 9, c_9);
0046
0047 IntrVec_t a_15 = LD(a, 15);
0048 IntrVec_t c_15 = a_15;
0049 ST(c, 13, c_13);
0050 ST(c, 14, c_14);
0051
0052 IntrVec_t a_16 = LD(a, 16);
0053 IntrVec_t b_6 = LD(b, 6);
0054 IntrVec_t c_16 = MUL(a_16, b_6);
0055 IntrVec_t b_7 = LD(b, 7);
0056 IntrVec_t c_17 = MUL(a_16, b_7);
0057
0058 ST(c, 18, all_zeros);
0059 ST(c, 19, all_zeros);
0060
0061 IntrVec_t b_11 = LD(b, 11);
0062 IntrVec_t c_21 = b_11;
0063 IntrVec_t b_12 = LD(b, 12);
0064 IntrVec_t c_22 = b_12;
0065 ST(c, 15, c_15);
0066 ST(c, 16, c_16);
0067 ST(c, 17, c_17);
0068 IntrVec_t b_13 = LD(b, 13);
0069 IntrVec_t c_23 = b_13;
0070 IntrVec_t b_14 = LD(b, 14);
0071 IntrVec_t c_24 = b_14;
0072
0073 ST(c, 20, all_zeros);
0074
0075 IntrVec_t a_26 = LD(a, 26);
0076 IntrVec_t c_26 = MUL(a_26, b_6);
0077 IntrVec_t c_27 = MUL(a_26, b_7);
0078
0079 ST(c, 25, all_zeros);
0080 ST(c, 28, all_zeros);
0081 ST(c, 29, all_zeros);
0082 ST(c, 21, c_21);
0083 ST(c, 22, c_22);
0084 ST(c, 23, c_23);
0085 ST(c, 24, c_24);
0086 ST(c, 26, c_26);
0087 ST(c, 27, c_27);
0088 }
0089
0090 #else
0091
0092 #pragma omp simd
0093 for (int n = 0; n < N; ++n) {
0094 c[0 * N + n] = 0;
0095 c[1 * N + n] = 0;
0096 c[2 * N + n] = 0;
0097 c[3 * N + n] = a[3 * N + n] * b[18 * N + n] + a[4 * N + n] * b[23 * N + n];
0098 c[4 * N + n] = a[3 * N + n] * b[19 * N + n] + a[4 * N + n] * b[24 * N + n];
0099 c[5 * N + n] = 0;
0100 c[6 * N + n] = 0;
0101 c[7 * N + n] = 0;
0102 c[8 * N + n] = a[8 * N + n] * b[18 * N + n] + a[9 * N + n] * b[23 * N + n];
0103 c[9 * N + n] = a[8 * N + n] * b[19 * N + n] + a[9 * N + n] * b[24 * N + n];
0104 c[10 * N + n] = 0;
0105 c[11 * N + n] = 0;
0106 c[12 * N + n] = 0;
0107 c[13 * N + n] = a[14 * N + n] * b[23 * N + n];
0108 c[14 * N + n] = a[14 * N + n] * b[24 * N + n];
0109 c[15 * N + n] = a[15 * N + n];
0110 c[16 * N + n] = a[16 * N + n] * b[6 * N + n];
0111 c[17 * N + n] = a[16 * N + n] * b[7 * N + n];
0112 c[18 * N + n] = 0;
0113 c[19 * N + n] = 0;
0114 c[20 * N + n] = 0;
0115 c[21 * N + n] = b[11 * N + n];
0116 c[22 * N + n] = b[12 * N + n];
0117 c[23 * N + n] = b[13 * N + n];
0118 c[24 * N + n] = b[14 * N + n];
0119 c[25 * N + n] = 0;
0120 c[26 * N + n] = a[26 * N + n] * b[6 * N + n];
0121 c[27 * N + n] = a[26 * N + n] * b[7 * N + n];
0122 c[28 * N + n] = 0;
0123 c[29 * N + n] = 0;
0124 }
0125 #endif