Back to home page

Project CMSSW displayed by LXR

 
 

    


Warning, /RecoTracker/MkFitCore/src/MultHelixPlaneProp.ah is written in an unsupported language. File is not indexed.

0001 #ifdef MPLEX_INTRINSICS
0002 
0003 for (int n = 0; n < N; n += MPLEX_INTRINSICS_WIDTH_BYTES / sizeof(T)) {
0004   IntrVec_t a_0 = LD(a, 0);
0005   IntrVec_t b_0 = LD(b, 0);
0006   IntrVec_t c_0 = MUL(a_0, b_0);
0007   IntrVec_t b_1 = LD(b, 1);
0008   IntrVec_t c_1 = MUL(a_0, b_1);
0009   IntrVec_t b_3 = LD(b, 3);
0010   IntrVec_t c_2 = MUL(a_0, b_3);
0011   IntrVec_t b_6 = LD(b, 6);
0012   IntrVec_t c_3 = MUL(a_0, b_6);
0013   IntrVec_t b_10 = LD(b, 10);
0014   IntrVec_t c_4 = MUL(a_0, b_10);
0015   IntrVec_t b_15 = LD(b, 15);
0016   IntrVec_t c_5 = MUL(a_0, b_15);
0017 
0018   IntrVec_t a_1 = LD(a, 1);
0019   c_0 = FMA(a_1, b_1, c_0);
0020   IntrVec_t b_2 = LD(b, 2);
0021   c_1 = FMA(a_1, b_2, c_1);
0022   IntrVec_t b_4 = LD(b, 4);
0023   c_2 = FMA(a_1, b_4, c_2);
0024   IntrVec_t b_7 = LD(b, 7);
0025   c_3 = FMA(a_1, b_7, c_3);
0026   IntrVec_t b_11 = LD(b, 11);
0027   c_4 = FMA(a_1, b_11, c_4);
0028   IntrVec_t b_16 = LD(b, 16);
0029   c_5 = FMA(a_1, b_16, c_5);
0030 
0031   IntrVec_t a_2 = LD(a, 2);
0032   c_0 = FMA(a_2, b_3, c_0);
0033   c_1 = FMA(a_2, b_4, c_1);
0034   IntrVec_t b_5 = LD(b, 5);
0035   c_2 = FMA(a_2, b_5, c_2);
0036   IntrVec_t b_8 = LD(b, 8);
0037   c_3 = FMA(a_2, b_8, c_3);
0038   IntrVec_t b_12 = LD(b, 12);
0039   c_4 = FMA(a_2, b_12, c_4);
0040   IntrVec_t b_17 = LD(b, 17);
0041   c_5 = FMA(a_2, b_17, c_5);
0042 
0043   IntrVec_t a_3 = LD(a, 3);
0044   c_0 = FMA(a_3, b_6, c_0);
0045   c_1 = FMA(a_3, b_7, c_1);
0046   c_2 = FMA(a_3, b_8, c_2);
0047   IntrVec_t b_9 = LD(b, 9);
0048   c_3 = FMA(a_3, b_9, c_3);
0049   IntrVec_t b_13 = LD(b, 13);
0050   c_4 = FMA(a_3, b_13, c_4);
0051   IntrVec_t b_18 = LD(b, 18);
0052   c_5 = FMA(a_3, b_18, c_5);
0053 
0054   IntrVec_t a_4 = LD(a, 4);
0055   c_0 = FMA(a_4, b_10, c_0);
0056   c_1 = FMA(a_4, b_11, c_1);
0057   c_2 = FMA(a_4, b_12, c_2);
0058   c_3 = FMA(a_4, b_13, c_3);
0059   IntrVec_t b_14 = LD(b, 14);
0060   c_4 = FMA(a_4, b_14, c_4);
0061   IntrVec_t b_19 = LD(b, 19);
0062   c_5 = FMA(a_4, b_19, c_5);
0063 
0064   IntrVec_t a_5 = LD(a, 5);
0065   c_0 = FMA(a_5, b_15, c_0);
0066   c_1 = FMA(a_5, b_16, c_1);
0067   c_2 = FMA(a_5, b_17, c_2);
0068   c_3 = FMA(a_5, b_18, c_3);
0069   c_4 = FMA(a_5, b_19, c_4);
0070   ST(c, 0, c_0);
0071   ST(c, 1, c_1);
0072   ST(c, 2, c_2);
0073   ST(c, 3, c_3);
0074   ST(c, 4, c_4);
0075   IntrVec_t b_20 = LD(b, 20);
0076   c_5 = FMA(a_5, b_20, c_5);
0077 
0078   IntrVec_t a_6 = LD(a, 6);
0079   IntrVec_t c_6 = MUL(a_6, b_0);
0080   IntrVec_t c_7 = MUL(a_6, b_1);
0081   IntrVec_t c_8 = MUL(a_6, b_3);
0082   ST(c, 5, c_5);
0083   IntrVec_t c_9 = MUL(a_6, b_6);
0084   IntrVec_t c_10 = MUL(a_6, b_10);
0085   IntrVec_t c_11 = MUL(a_6, b_15);
0086 
0087   IntrVec_t a_7 = LD(a, 7);
0088   c_6 = FMA(a_7, b_1, c_6);
0089   c_7 = FMA(a_7, b_2, c_7);
0090   c_8 = FMA(a_7, b_4, c_8);
0091   c_9 = FMA(a_7, b_7, c_9);
0092   c_10 = FMA(a_7, b_11, c_10);
0093   c_11 = FMA(a_7, b_16, c_11);
0094 
0095   IntrVec_t a_8 = LD(a, 8);
0096   c_6 = FMA(a_8, b_3, c_6);
0097   c_7 = FMA(a_8, b_4, c_7);
0098   c_8 = FMA(a_8, b_5, c_8);
0099   c_9 = FMA(a_8, b_8, c_9);
0100   c_10 = FMA(a_8, b_12, c_10);
0101   c_11 = FMA(a_8, b_17, c_11);
0102 
0103   IntrVec_t a_9 = LD(a, 9);
0104   c_6 = FMA(a_9, b_6, c_6);
0105   c_7 = FMA(a_9, b_7, c_7);
0106   c_8 = FMA(a_9, b_8, c_8);
0107   c_9 = FMA(a_9, b_9, c_9);
0108   c_10 = FMA(a_9, b_13, c_10);
0109   c_11 = FMA(a_9, b_18, c_11);
0110 
0111   IntrVec_t a_10 = LD(a, 10);
0112   c_6 = FMA(a_10, b_10, c_6);
0113   c_7 = FMA(a_10, b_11, c_7);
0114   c_8 = FMA(a_10, b_12, c_8);
0115   c_9 = FMA(a_10, b_13, c_9);
0116   c_10 = FMA(a_10, b_14, c_10);
0117   c_11 = FMA(a_10, b_19, c_11);
0118 
0119   IntrVec_t a_11 = LD(a, 11);
0120   c_6 = FMA(a_11, b_15, c_6);
0121   c_7 = FMA(a_11, b_16, c_7);
0122   c_8 = FMA(a_11, b_17, c_8);
0123   c_9 = FMA(a_11, b_18, c_9);
0124   c_10 = FMA(a_11, b_19, c_10);
0125   ST(c, 6, c_6);
0126   ST(c, 7, c_7);
0127   ST(c, 8, c_8);
0128   ST(c, 9, c_9);
0129   ST(c, 10, c_10);
0130   c_11 = FMA(a_11, b_20, c_11);
0131 
0132   IntrVec_t a_12 = LD(a, 12);
0133   IntrVec_t c_12 = MUL(a_12, b_0);
0134   IntrVec_t c_13 = MUL(a_12, b_1);
0135   IntrVec_t c_14 = MUL(a_12, b_3);
0136   ST(c, 11, c_11);
0137   IntrVec_t c_15 = MUL(a_12, b_6);
0138   IntrVec_t c_16 = MUL(a_12, b_10);
0139   IntrVec_t c_17 = MUL(a_12, b_15);
0140 
0141   IntrVec_t a_13 = LD(a, 13);
0142   c_12 = FMA(a_13, b_1, c_12);
0143   c_13 = FMA(a_13, b_2, c_13);
0144   c_14 = FMA(a_13, b_4, c_14);
0145   c_15 = FMA(a_13, b_7, c_15);
0146   c_16 = FMA(a_13, b_11, c_16);
0147   c_17 = FMA(a_13, b_16, c_17);
0148 
0149   IntrVec_t a_14 = LD(a, 14);
0150   c_12 = FMA(a_14, b_3, c_12);
0151   c_13 = FMA(a_14, b_4, c_13);
0152   c_14 = FMA(a_14, b_5, c_14);
0153   c_15 = FMA(a_14, b_8, c_15);
0154   c_16 = FMA(a_14, b_12, c_16);
0155   c_17 = FMA(a_14, b_17, c_17);
0156 
0157   IntrVec_t a_15 = LD(a, 15);
0158   c_12 = FMA(a_15, b_6, c_12);
0159   c_13 = FMA(a_15, b_7, c_13);
0160   c_14 = FMA(a_15, b_8, c_14);
0161   c_15 = FMA(a_15, b_9, c_15);
0162   c_16 = FMA(a_15, b_13, c_16);
0163   c_17 = FMA(a_15, b_18, c_17);
0164 
0165   IntrVec_t a_16 = LD(a, 16);
0166   c_12 = FMA(a_16, b_10, c_12);
0167   c_13 = FMA(a_16, b_11, c_13);
0168   c_14 = FMA(a_16, b_12, c_14);
0169   c_15 = FMA(a_16, b_13, c_15);
0170   c_16 = FMA(a_16, b_14, c_16);
0171   c_17 = FMA(a_16, b_19, c_17);
0172 
0173   IntrVec_t a_17 = LD(a, 17);
0174   c_12 = FMA(a_17, b_15, c_12);
0175   c_13 = FMA(a_17, b_16, c_13);
0176   c_14 = FMA(a_17, b_17, c_14);
0177   c_15 = FMA(a_17, b_18, c_15);
0178   c_16 = FMA(a_17, b_19, c_16);
0179   ST(c, 12, c_12);
0180   ST(c, 13, c_13);
0181   ST(c, 14, c_14);
0182   ST(c, 15, c_15);
0183   ST(c, 16, c_16);
0184   c_17 = FMA(a_17, b_20, c_17);
0185 
0186   IntrVec_t c_18 = b_6;
0187   IntrVec_t c_19 = b_7;
0188   IntrVec_t c_20 = b_8;
0189   IntrVec_t c_21 = b_9;
0190   ST(c, 17, c_17);
0191   IntrVec_t c_22 = b_13;
0192   IntrVec_t c_23 = b_18;
0193 
0194   IntrVec_t a_22 = LD(a, 22);
0195   c_18 = FMA(a_22, b_10, c_18);
0196   c_19 = FMA(a_22, b_11, c_19);
0197   c_20 = FMA(a_22, b_12, c_20);
0198   c_21 = FMA(a_22, b_13, c_21);
0199   c_22 = FMA(a_22, b_14, c_22);
0200   c_23 = FMA(a_22, b_19, c_23);
0201 
0202   IntrVec_t a_23 = LD(a, 23);
0203   c_18 = FMA(a_23, b_15, c_18);
0204   c_19 = FMA(a_23, b_16, c_19);
0205   c_20 = FMA(a_23, b_17, c_20);
0206   c_21 = FMA(a_23, b_18, c_21);
0207   c_22 = FMA(a_23, b_19, c_22);
0208   ST(c, 18, c_18);
0209   ST(c, 19, c_19);
0210   ST(c, 20, c_20);
0211   ST(c, 21, c_21);
0212   ST(c, 22, c_22);
0213   c_23 = FMA(a_23, b_20, c_23);
0214 
0215   IntrVec_t a_24 = LD(a, 24);
0216   IntrVec_t c_24 = MUL(a_24, b_0);
0217   IntrVec_t c_25 = MUL(a_24, b_1);
0218   IntrVec_t c_26 = MUL(a_24, b_3);
0219   ST(c, 23, c_23);
0220   IntrVec_t c_27 = MUL(a_24, b_6);
0221   IntrVec_t c_28 = MUL(a_24, b_10);
0222   IntrVec_t c_29 = MUL(a_24, b_15);
0223 
0224   IntrVec_t a_25 = LD(a, 25);
0225   c_24 = FMA(a_25, b_1, c_24);
0226   c_25 = FMA(a_25, b_2, c_25);
0227   c_26 = FMA(a_25, b_4, c_26);
0228   c_27 = FMA(a_25, b_7, c_27);
0229   c_28 = FMA(a_25, b_11, c_28);
0230   c_29 = FMA(a_25, b_16, c_29);
0231 
0232   IntrVec_t a_26 = LD(a, 26);
0233   c_24 = FMA(a_26, b_3, c_24);
0234   c_25 = FMA(a_26, b_4, c_25);
0235   c_26 = FMA(a_26, b_5, c_26);
0236   c_27 = FMA(a_26, b_8, c_27);
0237   c_28 = FMA(a_26, b_12, c_28);
0238   c_29 = FMA(a_26, b_17, c_29);
0239 
0240   IntrVec_t a_27 = LD(a, 27);
0241   c_24 = FMA(a_27, b_6, c_24);
0242   c_25 = FMA(a_27, b_7, c_25);
0243   c_26 = FMA(a_27, b_8, c_26);
0244   c_27 = FMA(a_27, b_9, c_27);
0245   c_28 = FMA(a_27, b_13, c_28);
0246   c_29 = FMA(a_27, b_18, c_29);
0247 
0248   IntrVec_t a_28 = LD(a, 28);
0249   c_24 = FMA(a_28, b_10, c_24);
0250   c_25 = FMA(a_28, b_11, c_25);
0251   c_26 = FMA(a_28, b_12, c_26);
0252   c_27 = FMA(a_28, b_13, c_27);
0253   c_28 = FMA(a_28, b_14, c_28);
0254   c_29 = FMA(a_28, b_19, c_29);
0255 
0256   IntrVec_t a_29 = LD(a, 29);
0257   c_24 = FMA(a_29, b_15, c_24);
0258   c_25 = FMA(a_29, b_16, c_25);
0259   c_26 = FMA(a_29, b_17, c_26);
0260   c_27 = FMA(a_29, b_18, c_27);
0261   c_28 = FMA(a_29, b_19, c_28);
0262   ST(c, 24, c_24);
0263   ST(c, 25, c_25);
0264   ST(c, 26, c_26);
0265   ST(c, 27, c_27);
0266   ST(c, 28, c_28);
0267   c_29 = FMA(a_29, b_20, c_29);
0268 
0269   IntrVec_t a_34 = LD(a, 34);
0270   IntrVec_t c_30 = MUL(a_34, b_10);
0271   IntrVec_t c_31 = MUL(a_34, b_11);
0272   IntrVec_t c_32 = MUL(a_34, b_12);
0273   ST(c, 29, c_29);
0274   IntrVec_t c_33 = MUL(a_34, b_13);
0275   IntrVec_t c_34 = MUL(a_34, b_14);
0276   IntrVec_t c_35 = MUL(a_34, b_19);
0277 
0278   IntrVec_t a_35 = LD(a, 35);
0279   c_30 = FMA(a_35, b_15, c_30);
0280   c_31 = FMA(a_35, b_16, c_31);
0281   c_32 = FMA(a_35, b_17, c_32);
0282   c_33 = FMA(a_35, b_18, c_33);
0283   c_34 = FMA(a_35, b_19, c_34);
0284   ST(c, 30, c_30);
0285   ST(c, 31, c_31);
0286   ST(c, 32, c_32);
0287   ST(c, 33, c_33);
0288   ST(c, 34, c_34);
0289   c_35 = FMA(a_35, b_20, c_35);
0290   ST(c, 35, c_35);
0291 }
0292 
0293 #else
0294 
0295 #pragma omp simd
0296 for (int n = 0; n < N; ++n) {
0297   c[0 * N + n] = a[0 * N + n] * b[0 * N + n] + a[1 * N + n] * b[1 * N + n] + a[2 * N + n] * b[3 * N + n] +
0298                  a[3 * N + n] * b[6 * N + n] + a[4 * N + n] * b[10 * N + n] + a[5 * N + n] * b[15 * N + n];
0299   c[1 * N + n] = a[0 * N + n] * b[1 * N + n] + a[1 * N + n] * b[2 * N + n] + a[2 * N + n] * b[4 * N + n] +
0300                  a[3 * N + n] * b[7 * N + n] + a[4 * N + n] * b[11 * N + n] + a[5 * N + n] * b[16 * N + n];
0301   c[2 * N + n] = a[0 * N + n] * b[3 * N + n] + a[1 * N + n] * b[4 * N + n] + a[2 * N + n] * b[5 * N + n] +
0302                  a[3 * N + n] * b[8 * N + n] + a[4 * N + n] * b[12 * N + n] + a[5 * N + n] * b[17 * N + n];
0303   c[3 * N + n] = a[0 * N + n] * b[6 * N + n] + a[1 * N + n] * b[7 * N + n] + a[2 * N + n] * b[8 * N + n] +
0304                  a[3 * N + n] * b[9 * N + n] + a[4 * N + n] * b[13 * N + n] + a[5 * N + n] * b[18 * N + n];
0305   c[4 * N + n] = a[0 * N + n] * b[10 * N + n] + a[1 * N + n] * b[11 * N + n] + a[2 * N + n] * b[12 * N + n] +
0306                  a[3 * N + n] * b[13 * N + n] + a[4 * N + n] * b[14 * N + n] + a[5 * N + n] * b[19 * N + n];
0307   c[5 * N + n] = a[0 * N + n] * b[15 * N + n] + a[1 * N + n] * b[16 * N + n] + a[2 * N + n] * b[17 * N + n] +
0308                  a[3 * N + n] * b[18 * N + n] + a[4 * N + n] * b[19 * N + n] + a[5 * N + n] * b[20 * N + n];
0309   c[6 * N + n] = a[6 * N + n] * b[0 * N + n] + a[7 * N + n] * b[1 * N + n] + a[8 * N + n] * b[3 * N + n] +
0310                  a[9 * N + n] * b[6 * N + n] + a[10 * N + n] * b[10 * N + n] + a[11 * N + n] * b[15 * N + n];
0311   c[7 * N + n] = a[6 * N + n] * b[1 * N + n] + a[7 * N + n] * b[2 * N + n] + a[8 * N + n] * b[4 * N + n] +
0312                  a[9 * N + n] * b[7 * N + n] + a[10 * N + n] * b[11 * N + n] + a[11 * N + n] * b[16 * N + n];
0313   c[8 * N + n] = a[6 * N + n] * b[3 * N + n] + a[7 * N + n] * b[4 * N + n] + a[8 * N + n] * b[5 * N + n] +
0314                  a[9 * N + n] * b[8 * N + n] + a[10 * N + n] * b[12 * N + n] + a[11 * N + n] * b[17 * N + n];
0315   c[9 * N + n] = a[6 * N + n] * b[6 * N + n] + a[7 * N + n] * b[7 * N + n] + a[8 * N + n] * b[8 * N + n] +
0316                  a[9 * N + n] * b[9 * N + n] + a[10 * N + n] * b[13 * N + n] + a[11 * N + n] * b[18 * N + n];
0317   c[10 * N + n] = a[6 * N + n] * b[10 * N + n] + a[7 * N + n] * b[11 * N + n] + a[8 * N + n] * b[12 * N + n] +
0318                   a[9 * N + n] * b[13 * N + n] + a[10 * N + n] * b[14 * N + n] + a[11 * N + n] * b[19 * N + n];
0319   c[11 * N + n] = a[6 * N + n] * b[15 * N + n] + a[7 * N + n] * b[16 * N + n] + a[8 * N + n] * b[17 * N + n] +
0320                   a[9 * N + n] * b[18 * N + n] + a[10 * N + n] * b[19 * N + n] + a[11 * N + n] * b[20 * N + n];
0321   c[12 * N + n] = a[12 * N + n] * b[0 * N + n] + a[13 * N + n] * b[1 * N + n] + a[14 * N + n] * b[3 * N + n] +
0322                   a[15 * N + n] * b[6 * N + n] + a[16 * N + n] * b[10 * N + n] + a[17 * N + n] * b[15 * N + n];
0323   c[13 * N + n] = a[12 * N + n] * b[1 * N + n] + a[13 * N + n] * b[2 * N + n] + a[14 * N + n] * b[4 * N + n] +
0324                   a[15 * N + n] * b[7 * N + n] + a[16 * N + n] * b[11 * N + n] + a[17 * N + n] * b[16 * N + n];
0325   c[14 * N + n] = a[12 * N + n] * b[3 * N + n] + a[13 * N + n] * b[4 * N + n] + a[14 * N + n] * b[5 * N + n] +
0326                   a[15 * N + n] * b[8 * N + n] + a[16 * N + n] * b[12 * N + n] + a[17 * N + n] * b[17 * N + n];
0327   c[15 * N + n] = a[12 * N + n] * b[6 * N + n] + a[13 * N + n] * b[7 * N + n] + a[14 * N + n] * b[8 * N + n] +
0328                   a[15 * N + n] * b[9 * N + n] + a[16 * N + n] * b[13 * N + n] + a[17 * N + n] * b[18 * N + n];
0329   c[16 * N + n] = a[12 * N + n] * b[10 * N + n] + a[13 * N + n] * b[11 * N + n] + a[14 * N + n] * b[12 * N + n] +
0330                   a[15 * N + n] * b[13 * N + n] + a[16 * N + n] * b[14 * N + n] + a[17 * N + n] * b[19 * N + n];
0331   c[17 * N + n] = a[12 * N + n] * b[15 * N + n] + a[13 * N + n] * b[16 * N + n] + a[14 * N + n] * b[17 * N + n] +
0332                   a[15 * N + n] * b[18 * N + n] + a[16 * N + n] * b[19 * N + n] + a[17 * N + n] * b[20 * N + n];
0333   c[18 * N + n] = b[6 * N + n] + a[22 * N + n] * b[10 * N + n] + a[23 * N + n] * b[15 * N + n];
0334   c[19 * N + n] = b[7 * N + n] + a[22 * N + n] * b[11 * N + n] + a[23 * N + n] * b[16 * N + n];
0335   c[20 * N + n] = b[8 * N + n] + a[22 * N + n] * b[12 * N + n] + a[23 * N + n] * b[17 * N + n];
0336   c[21 * N + n] = b[9 * N + n] + a[22 * N + n] * b[13 * N + n] + a[23 * N + n] * b[18 * N + n];
0337   c[22 * N + n] = b[13 * N + n] + a[22 * N + n] * b[14 * N + n] + a[23 * N + n] * b[19 * N + n];
0338   c[23 * N + n] = b[18 * N + n] + a[22 * N + n] * b[19 * N + n] + a[23 * N + n] * b[20 * N + n];
0339   c[24 * N + n] = a[24 * N + n] * b[0 * N + n] + a[25 * N + n] * b[1 * N + n] + a[26 * N + n] * b[3 * N + n] +
0340                   a[27 * N + n] * b[6 * N + n] + a[28 * N + n] * b[10 * N + n] + a[29 * N + n] * b[15 * N + n];
0341   c[25 * N + n] = a[24 * N + n] * b[1 * N + n] + a[25 * N + n] * b[2 * N + n] + a[26 * N + n] * b[4 * N + n] +
0342                   a[27 * N + n] * b[7 * N + n] + a[28 * N + n] * b[11 * N + n] + a[29 * N + n] * b[16 * N + n];
0343   c[26 * N + n] = a[24 * N + n] * b[3 * N + n] + a[25 * N + n] * b[4 * N + n] + a[26 * N + n] * b[5 * N + n] +
0344                   a[27 * N + n] * b[8 * N + n] + a[28 * N + n] * b[12 * N + n] + a[29 * N + n] * b[17 * N + n];
0345   c[27 * N + n] = a[24 * N + n] * b[6 * N + n] + a[25 * N + n] * b[7 * N + n] + a[26 * N + n] * b[8 * N + n] +
0346                   a[27 * N + n] * b[9 * N + n] + a[28 * N + n] * b[13 * N + n] + a[29 * N + n] * b[18 * N + n];
0347   c[28 * N + n] = a[24 * N + n] * b[10 * N + n] + a[25 * N + n] * b[11 * N + n] + a[26 * N + n] * b[12 * N + n] +
0348                   a[27 * N + n] * b[13 * N + n] + a[28 * N + n] * b[14 * N + n] + a[29 * N + n] * b[19 * N + n];
0349   c[29 * N + n] = a[24 * N + n] * b[15 * N + n] + a[25 * N + n] * b[16 * N + n] + a[26 * N + n] * b[17 * N + n] +
0350                   a[27 * N + n] * b[18 * N + n] + a[28 * N + n] * b[19 * N + n] + a[29 * N + n] * b[20 * N + n];
0351   c[30 * N + n] = a[34 * N + n] * b[10 * N + n] + a[35 * N + n] * b[15 * N + n];
0352   c[31 * N + n] = a[34 * N + n] * b[11 * N + n] + a[35 * N + n] * b[16 * N + n];
0353   c[32 * N + n] = a[34 * N + n] * b[12 * N + n] + a[35 * N + n] * b[17 * N + n];
0354   c[33 * N + n] = a[34 * N + n] * b[13 * N + n] + a[35 * N + n] * b[18 * N + n];
0355   c[34 * N + n] = a[34 * N + n] * b[14 * N + n] + a[35 * N + n] * b[19 * N + n];
0356   c[35 * N + n] = a[34 * N + n] * b[19 * N + n] + a[35 * N + n] * b[20 * N + n];
0357 }
0358 #endif