Warning, /RecoTracker/MkFitCore/src/KH.ah is written in an unsupported language. File is not indexed.
0001 #ifdef MPLEX_INTRINSICS
0002
0003 for (int n = 0; n < N; n += MPLEX_INTRINSICS_WIDTH_BYTES / sizeof(T))
0004 {
0005 #ifdef AVX512_INTRINSICS
0006 IntrVec_t all_zeros = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
0007 #else
0008 IntrVec_t all_zeros = { 0, 0, 0, 0, 0, 0, 0, 0 };
0009 #endif
0010
0011 IntrVec_t a_0 = LD(a, 0);
0012 IntrVec_t b_0 = LD(b, 0);
0013 IntrVec_t c_0 = MUL(a_0, b_0);
0014 IntrVec_t b_1 = LD(b, 1);
0015 IntrVec_t c_1 = MUL(a_0, b_1);
0016
0017 IntrVec_t a_1 = LD(a, 1);
0018 IntrVec_t c_2 = a_1;
0019
0020 ST(c, 3, all_zeros);
0021 ST(c, 4, all_zeros);
0022 ST(c, 5, all_zeros);
0023
0024 IntrVec_t a_3 = LD(a, 3);
0025 IntrVec_t c_6 = MUL(a_3, b_0);
0026 IntrVec_t c_7 = MUL(a_3, b_1);
0027
0028 IntrVec_t a_4 = LD(a, 4);
0029 IntrVec_t c_8 = a_4;
0030 ST(c, 0, c_0);
0031 ST(c, 1, c_1);
0032 ST(c, 2, c_2);
0033
0034 ST(c, 9, all_zeros);
0035 ST(c, 10, all_zeros);
0036 ST(c, 11, all_zeros);
0037
0038 IntrVec_t a_6 = LD(a, 6);
0039 IntrVec_t c_12 = MUL(a_6, b_0);
0040 IntrVec_t c_13 = MUL(a_6, b_1);
0041
0042 IntrVec_t a_7 = LD(a, 7);
0043 IntrVec_t c_14 = a_7;
0044 ST(c, 6, c_6);
0045 ST(c, 7, c_7);
0046 ST(c, 8, c_8);
0047
0048 ST(c, 15, all_zeros);
0049 ST(c, 16, all_zeros);
0050 ST(c, 17, all_zeros);
0051
0052 IntrVec_t a_9 = LD(a, 9);
0053 IntrVec_t c_18 = MUL(a_9, b_0);
0054 IntrVec_t c_19 = MUL(a_9, b_1);
0055
0056 IntrVec_t a_10 = LD(a, 10);
0057 IntrVec_t c_20 = a_10;
0058 ST(c, 12, c_12);
0059 ST(c, 13, c_13);
0060 ST(c, 14, c_14);
0061
0062 ST(c, 21, all_zeros);
0063 ST(c, 22, all_zeros);
0064 ST(c, 23, all_zeros);
0065
0066 IntrVec_t a_12 = LD(a, 12);
0067 IntrVec_t c_24 = MUL(a_12, b_0);
0068 IntrVec_t c_25 = MUL(a_12, b_1);
0069
0070 IntrVec_t a_13 = LD(a, 13);
0071 IntrVec_t c_26 = a_13;
0072 ST(c, 18, c_18);
0073 ST(c, 19, c_19);
0074 ST(c, 20, c_20);
0075
0076 ST(c, 27, all_zeros);
0077 ST(c, 28, all_zeros);
0078 ST(c, 29, all_zeros);
0079
0080 IntrVec_t a_15 = LD(a, 15);
0081 IntrVec_t c_30 = MUL(a_15, b_0);
0082 IntrVec_t c_31 = MUL(a_15, b_1);
0083
0084 IntrVec_t a_16 = LD(a, 16);
0085 IntrVec_t c_32 = a_16;
0086 ST(c, 24, c_24);
0087 ST(c, 25, c_25);
0088 ST(c, 26, c_26);
0089
0090 ST(c, 33, all_zeros);
0091 ST(c, 34, all_zeros);
0092 ST(c, 35, all_zeros);
0093 ST(c, 30, c_30);
0094 ST(c, 31, c_31);
0095 ST(c, 32, c_32);
0096 }
0097
0098 #else
0099
0100 #pragma omp simd
0101 for (int n = 0; n < N; ++n)
0102 {
0103 c[ 0*N+n] = a[ 0*N+n]*b[ 0*N+n];
0104 c[ 1*N+n] = a[ 0*N+n]*b[ 1*N+n];
0105 c[ 2*N+n] = a[ 1*N+n];
0106 c[ 3*N+n] = 0;
0107 c[ 4*N+n] = 0;
0108 c[ 5*N+n] = 0;
0109 c[ 6*N+n] = a[ 3*N+n]*b[ 0*N+n];
0110 c[ 7*N+n] = a[ 3*N+n]*b[ 1*N+n];
0111 c[ 8*N+n] = a[ 4*N+n];
0112 c[ 9*N+n] = 0;
0113 c[10*N+n] = 0;
0114 c[11*N+n] = 0;
0115 c[12*N+n] = a[ 6*N+n]*b[ 0*N+n];
0116 c[13*N+n] = a[ 6*N+n]*b[ 1*N+n];
0117 c[14*N+n] = a[ 7*N+n];
0118 c[15*N+n] = 0;
0119 c[16*N+n] = 0;
0120 c[17*N+n] = 0;
0121 c[18*N+n] = a[ 9*N+n]*b[ 0*N+n];
0122 c[19*N+n] = a[ 9*N+n]*b[ 1*N+n];
0123 c[20*N+n] = a[10*N+n];
0124 c[21*N+n] = 0;
0125 c[22*N+n] = 0;
0126 c[23*N+n] = 0;
0127 c[24*N+n] = a[12*N+n]*b[ 0*N+n];
0128 c[25*N+n] = a[12*N+n]*b[ 1*N+n];
0129 c[26*N+n] = a[13*N+n];
0130 c[27*N+n] = 0;
0131 c[28*N+n] = 0;
0132 c[29*N+n] = 0;
0133 c[30*N+n] = a[15*N+n]*b[ 0*N+n];
0134 c[31*N+n] = a[15*N+n]*b[ 1*N+n];
0135 c[32*N+n] = a[16*N+n];
0136 c[33*N+n] = 0;
0137 c[34*N+n] = 0;
0138 c[35*N+n] = 0;
0139 }
0140 #endif