Warning, /RecoTracker/MkFitCore/src/upParam_MultKalmanGain.ah is written in an unsupported language. File is not indexed.
0001 #ifdef MPLEX_INTRINSICS
0002
0003 for (int n = 0; n < N; n += MPLEX_INTRINSICS_WIDTH_BYTES / sizeof(T))
0004 {
0005 IntrVec_t a_0 = LD(a, 0);
0006 IntrVec_t b_0 = LD(b, 0);
0007 IntrVec_t c_0 = MUL(a_0, b_0);
0008 IntrVec_t b_1 = LD(b, 1);
0009 IntrVec_t c_1 = MUL(a_0, b_1);
0010 IntrVec_t b_3 = LD(b, 3);
0011 IntrVec_t c_2 = MUL(a_0, b_3);
0012
0013 IntrVec_t a_1 = LD(a, 1);
0014 c_0 = FMA(a_1, b_1, c_0);
0015 IntrVec_t b_2 = LD(b, 2);
0016 c_1 = FMA(a_1, b_2, c_1);
0017 IntrVec_t b_4 = LD(b, 4);
0018 c_2 = FMA(a_1, b_4, c_2);
0019
0020 IntrVec_t a_3 = LD(a, 3);
0021 c_0 = FMA(a_3, b_3, c_0);
0022 c_1 = FMA(a_3, b_4, c_1);
0023 IntrVec_t b_5 = LD(b, 5);
0024 c_2 = FMA(a_3, b_5, c_2);
0025
0026 IntrVec_t c_3 = MUL(a_1, b_0);
0027 ST(c, 0, c_0);
0028 ST(c, 1, c_1);
0029 IntrVec_t c_4 = MUL(a_1, b_1);
0030 ST(c, 2, c_2);
0031 IntrVec_t c_5 = MUL(a_1, b_3);
0032
0033 IntrVec_t a_2 = LD(a, 2);
0034 c_3 = FMA(a_2, b_1, c_3);
0035 c_4 = FMA(a_2, b_2, c_4);
0036 c_5 = FMA(a_2, b_4, c_5);
0037
0038 IntrVec_t a_4 = LD(a, 4);
0039 c_3 = FMA(a_4, b_3, c_3);
0040 c_4 = FMA(a_4, b_4, c_4);
0041 c_5 = FMA(a_4, b_5, c_5);
0042
0043 IntrVec_t c_6 = MUL(a_3, b_0);
0044 IntrVec_t c_7 = MUL(a_3, b_1);
0045 ST(c, 3, c_3);
0046 ST(c, 4, c_4);
0047 ST(c, 5, c_5);
0048 IntrVec_t c_8 = MUL(a_3, b_3);
0049
0050 c_6 = FMA(a_4, b_1, c_6);
0051 c_7 = FMA(a_4, b_2, c_7);
0052 c_8 = FMA(a_4, b_4, c_8);
0053
0054 IntrVec_t a_5 = LD(a, 5);
0055 c_6 = FMA(a_5, b_3, c_6);
0056 c_7 = FMA(a_5, b_4, c_7);
0057 c_8 = FMA(a_5, b_5, c_8);
0058
0059 IntrVec_t a_6 = LD(a, 6);
0060 IntrVec_t c_9 = MUL(a_6, b_0);
0061 ST(c, 6, c_6);
0062 ST(c, 7, c_7);
0063 ST(c, 8, c_8);
0064 IntrVec_t c_10 = MUL(a_6, b_1);
0065 IntrVec_t c_11 = MUL(a_6, b_3);
0066
0067 IntrVec_t a_7 = LD(a, 7);
0068 c_9 = FMA(a_7, b_1, c_9);
0069 c_10 = FMA(a_7, b_2, c_10);
0070 c_11 = FMA(a_7, b_4, c_11);
0071
0072 IntrVec_t a_8 = LD(a, 8);
0073 c_9 = FMA(a_8, b_3, c_9);
0074 c_10 = FMA(a_8, b_4, c_10);
0075 c_11 = FMA(a_8, b_5, c_11);
0076
0077 IntrVec_t a_10 = LD(a, 10);
0078 IntrVec_t c_12 = MUL(a_10, b_0);
0079 ST(c, 9, c_9);
0080 ST(c, 10, c_10);
0081 ST(c, 11, c_11);
0082 IntrVec_t c_13 = MUL(a_10, b_1);
0083 IntrVec_t c_14 = MUL(a_10, b_3);
0084
0085 IntrVec_t a_11 = LD(a, 11);
0086 c_12 = FMA(a_11, b_1, c_12);
0087 c_13 = FMA(a_11, b_2, c_13);
0088 c_14 = FMA(a_11, b_4, c_14);
0089
0090 IntrVec_t a_12 = LD(a, 12);
0091 c_12 = FMA(a_12, b_3, c_12);
0092 c_13 = FMA(a_12, b_4, c_13);
0093 c_14 = FMA(a_12, b_5, c_14);
0094
0095 IntrVec_t a_15 = LD(a, 15);
0096 IntrVec_t c_15 = MUL(a_15, b_0);
0097 ST(c, 12, c_12);
0098 ST(c, 13, c_13);
0099 ST(c, 14, c_14);
0100 IntrVec_t c_16 = MUL(a_15, b_1);
0101 IntrVec_t c_17 = MUL(a_15, b_3);
0102
0103 IntrVec_t a_16 = LD(a, 16);
0104 c_15 = FMA(a_16, b_1, c_15);
0105 c_16 = FMA(a_16, b_2, c_16);
0106 c_17 = FMA(a_16, b_4, c_17);
0107
0108 IntrVec_t a_17 = LD(a, 17);
0109 c_15 = FMA(a_17, b_3, c_15);
0110 c_16 = FMA(a_17, b_4, c_16);
0111 c_17 = FMA(a_17, b_5, c_17);
0112
0113 ST(c, 15, c_15);
0114 ST(c, 16, c_16);
0115 ST(c, 17, c_17);
0116 }
0117
0118 #else
0119
0120 #pragma omp simd
0121 for (int n = 0; n < N; ++n)
0122 {
0123 c[ 0*N+n] = a[ 0*N+n]*b[ 0*N+n] + a[ 1*N+n]*b[ 1*N+n] + a[ 3*N+n]*b[ 3*N+n];
0124 c[ 1*N+n] = a[ 0*N+n]*b[ 1*N+n] + a[ 1*N+n]*b[ 2*N+n] + a[ 3*N+n]*b[ 4*N+n];
0125 c[ 2*N+n] = a[ 0*N+n]*b[ 3*N+n] + a[ 1*N+n]*b[ 4*N+n] + a[ 3*N+n]*b[ 5*N+n];
0126 c[ 3*N+n] = a[ 1*N+n]*b[ 0*N+n] + a[ 2*N+n]*b[ 1*N+n] + a[ 4*N+n]*b[ 3*N+n];
0127 c[ 4*N+n] = a[ 1*N+n]*b[ 1*N+n] + a[ 2*N+n]*b[ 2*N+n] + a[ 4*N+n]*b[ 4*N+n];
0128 c[ 5*N+n] = a[ 1*N+n]*b[ 3*N+n] + a[ 2*N+n]*b[ 4*N+n] + a[ 4*N+n]*b[ 5*N+n];
0129 c[ 6*N+n] = a[ 3*N+n]*b[ 0*N+n] + a[ 4*N+n]*b[ 1*N+n] + a[ 5*N+n]*b[ 3*N+n];
0130 c[ 7*N+n] = a[ 3*N+n]*b[ 1*N+n] + a[ 4*N+n]*b[ 2*N+n] + a[ 5*N+n]*b[ 4*N+n];
0131 c[ 8*N+n] = a[ 3*N+n]*b[ 3*N+n] + a[ 4*N+n]*b[ 4*N+n] + a[ 5*N+n]*b[ 5*N+n];
0132 c[ 9*N+n] = a[ 6*N+n]*b[ 0*N+n] + a[ 7*N+n]*b[ 1*N+n] + a[ 8*N+n]*b[ 3*N+n];
0133 c[10*N+n] = a[ 6*N+n]*b[ 1*N+n] + a[ 7*N+n]*b[ 2*N+n] + a[ 8*N+n]*b[ 4*N+n];
0134 c[11*N+n] = a[ 6*N+n]*b[ 3*N+n] + a[ 7*N+n]*b[ 4*N+n] + a[ 8*N+n]*b[ 5*N+n];
0135 c[12*N+n] = a[10*N+n]*b[ 0*N+n] + a[11*N+n]*b[ 1*N+n] + a[12*N+n]*b[ 3*N+n];
0136 c[13*N+n] = a[10*N+n]*b[ 1*N+n] + a[11*N+n]*b[ 2*N+n] + a[12*N+n]*b[ 4*N+n];
0137 c[14*N+n] = a[10*N+n]*b[ 3*N+n] + a[11*N+n]*b[ 4*N+n] + a[12*N+n]*b[ 5*N+n];
0138 c[15*N+n] = a[15*N+n]*b[ 0*N+n] + a[16*N+n]*b[ 1*N+n] + a[17*N+n]*b[ 3*N+n];
0139 c[16*N+n] = a[15*N+n]*b[ 1*N+n] + a[16*N+n]*b[ 2*N+n] + a[17*N+n]*b[ 4*N+n];
0140 c[17*N+n] = a[15*N+n]*b[ 3*N+n] + a[16*N+n]*b[ 4*N+n] + a[17*N+n]*b[ 5*N+n];
0141 }
0142 #endif