Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:28:22

0001 ///////////////////////////////////////////////////////////////////////////////
0002 /// helixAtRFromIterativeCCS_impl
0003 ///////////////////////////////////////////////////////////////////////////////
0004 
0005 //#define DEBUG
0006 //#include "Debug.h"
0007 
0008 template <typename Tf, typename TfLL1, typename Tf1>
0009 static inline void parsFromPathL_impl(const Tf& __restrict__ inPar,
0010                                       TfLL1& __restrict__ outPar,
0011                                       const float* kinv,
0012                                       const Tf1& __restrict__ s,
0013                                       const int nmin,
0014                                       const int nmax) {
0015   float alpha[nmax - nmin];
0016   for (int n = nmin; n < nmax; ++n) {
0017     alpha[n - nmin] = s[n - nmin] * std::sin(inPar(n, 5, 0)) * inPar(n, 3, 0) * kinv[n - nmin];
0018   }
0019 
0020   float cosah[nmax - nmin];
0021   float sinah[nmax - nmin];
0022   if constexpr (Config::useTrigApprox) {
0023 #if !defined(__INTEL_COMPILER)
0024 #pragma omp simd
0025 #endif
0026     for (int n = nmin; n < nmax; ++n) {
0027       sincos4(alpha[n - nmin] * 0.5f, sinah[n - nmin], cosah[n - nmin]);
0028     }
0029   } else {
0030 #if !defined(__INTEL_COMPILER)
0031 #pragma omp simd
0032 #endif
0033     for (int n = nmin; n < nmax; ++n) {
0034       cosah[n - nmin] = std::cos(alpha[n - nmin] * 0.5f);
0035       sinah[n - nmin] = std::sin(alpha[n - nmin] * 0.5f);
0036     }
0037   }
0038 
0039   for (int n = nmin; n < nmax; ++n) {
0040     outPar(n, 0, 0) =
0041         inPar(n, 0, 0) + 2.f * sinah[n - nmin] *
0042                              (std::cos(inPar(n, 4, 0)) * cosah[n - nmin] - std::sin(inPar(n, 4, 0)) * sinah[n - nmin]) /
0043                              (inPar(n, 3, 0) * kinv[n - nmin]);
0044     outPar(n, 1, 0) =
0045         inPar(n, 1, 0) + 2.f * sinah[n - nmin] *
0046                              (std::sin(inPar(n, 4, 0)) * cosah[n - nmin] + std::cos(inPar(n, 4, 0)) * sinah[n - nmin]) /
0047                              (inPar(n, 3, 0) * kinv[n - nmin]);
0048     outPar(n, 2, 0) = inPar(n, 2, 0) + alpha[n - nmin] / kinv[n - nmin] * std::cos(inPar(n, 5, 0)) /
0049                                            (inPar(n, 3, 0) * std::sin(inPar(n, 5, 0)));
0050     outPar(n, 3, 0) = inPar(n, 3, 0);
0051     outPar(n, 4, 0) = inPar(n, 4, 0) + alpha[n - nmin];
0052     outPar(n, 5, 0) = inPar(n, 5, 0);
0053   }
0054 }
0055 
0056 //should kinv and D be templated???
0057 template <typename Tf, typename Ti, typename TfLL1, typename TfLLL, typename Tf1>
0058 inline void parsAndErrPropFromPathL_impl(const Tf& __restrict__ inPar,
0059                                          const Ti& __restrict__ inChg,
0060                                          TfLL1& __restrict__ outPar,
0061                                          const float* kinv,
0062                                          const Tf1& __restrict__ s,
0063                                          TfLLL& __restrict__ errorProp,
0064                                          const int nmin,
0065                                          const int nmax,
0066                                          const int N_proc,
0067                                          const PropagationFlags& pf) {
0068   //iteration should return the path length s, then update parameters and compute errors
0069 
0070   parsFromPathL_impl(inPar, outPar, kinv, s, nmin, nmax);
0071 
0072   float cosPin[nmax - nmin];
0073   float sinPin[nmax - nmin];
0074   float cosPout[nmax - nmin];
0075   float sinPout[nmax - nmin];
0076   float cosT[nmax - nmin];
0077   float sinT[nmax - nmin];
0078 
0079 #pragma omp simd
0080   for (int n = nmin; n < nmax; ++n) {
0081     cosPin[n - nmin] = std::cos(inPar(n, 4, 0));
0082     sinPin[n - nmin] = std::sin(inPar(n, 4, 0));
0083     cosPout[n - nmin] = std::cos(outPar(n, 4, 0));
0084     sinPout[n - nmin] = std::sin(outPar(n, 4, 0));
0085     cosT[n - nmin] = std::cos(inPar(n, 5, 0));
0086     sinT[n - nmin] = std::sin(inPar(n, 5, 0));
0087   }
0088 
0089   // use code from AnalyticalCurvilinearJacobian::computeFullJacobian for error propagation in curvilinear coordinates, then convert to CCS
0090   // main difference from the above function is that we assume that the magnetic field is purely along z (which also implies that there is no change in pz)
0091   // this simplifies significantly the code
0092 
0093   MPlex55 errorPropCurv;
0094   for (int n = nmin; n < nmax; ++n) {
0095     const float qbp = inChg(n, 0, 0) * sinT[n - nmin] * inPar(n, 3, 0);
0096     // calculate transport matrix
0097     // Origin: TRPRFN
0098     const float t11 = cosPin[n - nmin] * sinT[n - nmin];
0099     const float t12 = sinPin[n - nmin] * sinT[n - nmin];
0100     const float t21 = cosPout[n - nmin] * sinT[n - nmin];
0101     const float t22 = sinPout[n - nmin] * sinT[n - nmin];
0102     const float cosl1 = 1.f / sinT[n - nmin];
0103     // define average magnetic field and gradient
0104     // at initial point - inlike TRPRFN
0105     const float bF =
0106         (pf.use_param_b_field
0107              ? 0.01f * Const::sol * Config::bFieldFromZR(inPar(n, 2, 0), hipo(inPar(n, 0, 0), inPar(n, 1, 0)))
0108              : 0.01f * Const::sol * Config::Bfield);
0109     const float q = -bF * qbp;
0110     const float theta = q * s[n - nmin];
0111     //float sint, cost;
0112     //vdt::fast_sincos(theta, sint, cost);
0113     const float sint = std::sin(theta);
0114     const float cost = std::cos(theta);
0115     const float dx1 = inPar(n, 0, 0) - outPar(n, 0, 0);
0116     const float dx2 = inPar(n, 1, 0) - outPar(n, 1, 0);
0117     const float dx3 = inPar(n, 2, 0) - outPar(n, 2, 0);
0118     float au = 1.f / sqrt(t11 * t11 + t12 * t12);
0119     const float u11 = -au * t12;
0120     const float u12 = au * t11;
0121     const float v11 = -cosT[n - nmin] * u12;
0122     const float v12 = cosT[n - nmin] * u11;
0123     const float v13 = t11 * u12 - t12 * u11;
0124     au = 1.f / sqrt(t21 * t21 + t22 * t22);
0125     const float u21 = -au * t22;
0126     const float u22 = au * t21;
0127     const float v21 = -cosT[n - nmin] * u22;
0128     const float v22 = cosT[n - nmin] * u21;
0129     const float v23 = t21 * u22 - t22 * u21;
0130     // now prepare the transport matrix
0131     const float omcost = 1.f - cost;
0132     const float tmsint = theta - sint;
0133     //   1/p - doesn't change since |p1| = |p2|
0134     errorPropCurv(n, 0, 0) = 1.f;
0135     for (auto i = 1; i < 5; ++i)
0136       errorPropCurv(n, 0, i) = 0.f;
0137     //   lambda
0138     errorPropCurv(n, 1, 0) = 0.f;
0139     errorPropCurv(n, 1, 1) =
0140         cost * (v11 * v21 + v12 * v22 + v13 * v23) + sint * (-v12 * v21 + v11 * v22) + omcost * v13 * v23;
0141     errorPropCurv(n, 1, 2) = (cost * (u11 * v21 + u12 * v22) + sint * (-u12 * v21 + u11 * v22)) * sinT[n - nmin];
0142     errorPropCurv(n, 1, 3) = 0.f;
0143     errorPropCurv(n, 1, 4) = 0.f;
0144     //   phi
0145     errorPropCurv(n, 2, 0) = bF * v23 * (t21 * dx1 + t22 * dx2 + cosT[n - nmin] * dx3) * cosl1;
0146     errorPropCurv(n, 2, 1) = (cost * (v11 * u21 + v12 * u22) + sint * (-v12 * u21 + v11 * u22) +
0147                               v23 * (-sint * (v11 * t21 + v12 * t22 + v13 * cosT[n - nmin]) +
0148                                      omcost * (-v11 * t22 + v12 * t21) - tmsint * cosT[n - nmin] * v13)) *
0149                              cosl1;
0150     errorPropCurv(n, 2, 2) = (cost * (u11 * u21 + u12 * u22) + sint * (-u12 * u21 + u11 * u22) +
0151                               v23 * (-sint * (u11 * t21 + u12 * t22) + omcost * (-u11 * t22 + u12 * t21))) *
0152                              cosl1 * sinT[n - nmin];
0153     errorPropCurv(n, 2, 3) = -q * v23 * (u11 * t21 + u12 * t22) * cosl1;
0154     errorPropCurv(n, 2, 4) = -q * v23 * (v11 * t21 + v12 * t22 + v13 * cosT[n - nmin]) * cosl1;
0155     //   yt
0156     float cutCriterion = fabs(s[n - nmin] * sinT[n - nmin] * inPar(n, 3, 0));
0157     const float limit = 5.f;  // valid for propagations with effectively float precision
0158     if (cutCriterion > limit) {
0159       const float pp = 1.f / qbp;
0160       errorPropCurv(n, 3, 0) = pp * (u21 * dx1 + u22 * dx2);
0161       errorPropCurv(n, 4, 0) = pp * (v21 * dx1 + v22 * dx2 + v23 * dx3);
0162     } else {
0163       const float temp1 = -t12 * u21 + t11 * u22;
0164       const float s2 = s[n - nmin] * s[n - nmin];
0165       const float secondOrder41 = -0.5f * bF * temp1 * s2;
0166       const float temp2 = -t11 * u21 - t12 * u22;
0167       const float s3 = s2 * s[n - nmin];
0168       const float s4 = s3 * s[n - nmin];
0169       const float h2 = bF * bF;
0170       const float h3 = h2 * bF;
0171       const float qbp2 = qbp * qbp;
0172       const float thirdOrder41 = 1.f / 3 * h2 * s3 * qbp * temp2;
0173       const float fourthOrder41 = 1.f / 8 * h3 * s4 * qbp2 * temp1;
0174       errorPropCurv(n, 3, 0) = secondOrder41 + (thirdOrder41 + fourthOrder41);
0175       const float temp3 = -t12 * v21 + t11 * v22;
0176       const float secondOrder51 = -0.5f * bF * temp3 * s2;
0177       const float temp4 = -t11 * v21 - t12 * v22 - cosT[n - nmin] * v23;
0178       const float thirdOrder51 = 1.f / 3 * h2 * s3 * qbp * temp4;
0179       const float fourthOrder51 = 1.f / 8 * h3 * s4 * qbp2 * temp3;
0180       errorPropCurv(n, 4, 0) = secondOrder51 + (thirdOrder51 + fourthOrder51);
0181     }
0182     errorPropCurv(n, 3, 1) = (sint * (v11 * u21 + v12 * u22) + omcost * (-v12 * u21 + v11 * u22)) / q;
0183     errorPropCurv(n, 3, 2) = (sint * (u11 * u21 + u12 * u22) + omcost * (-u12 * u21 + u11 * u22)) * sinT[n - nmin] / q;
0184     errorPropCurv(n, 3, 3) = (u11 * u21 + u12 * u22);
0185     errorPropCurv(n, 3, 4) = (v11 * u21 + v12 * u22);
0186     //   zt
0187     errorPropCurv(n, 4, 1) =
0188         (sint * (v11 * v21 + v12 * v22 + v13 * v23) + omcost * (-v12 * v21 + v11 * v22) + tmsint * v23 * v13) / q;
0189     errorPropCurv(n, 4, 2) = (sint * (u11 * v21 + u12 * v22) + omcost * (-u12 * v21 + u11 * v22)) * sinT[n - nmin] / q;
0190     errorPropCurv(n, 4, 3) = (u11 * v21 + u12 * v22);
0191     errorPropCurv(n, 4, 4) = (v11 * v21 + v12 * v22 + v13 * v23);
0192 
0193   }  //end loop over n
0194 
0195 //debug = true;
0196 #ifdef DEBUG
0197   for (int n = nmin; n < nmax; ++n) {
0198     if (debug && g_debug && n < N_proc) {
0199       dmutex_guard;
0200       std::cout << n << ": errorPropCurv" << std::endl;
0201       printf("%5f %5f %5f %5f %5f\n",
0202              errorPropCurv(n, 0, 0),
0203              errorPropCurv(n, 0, 1),
0204              errorPropCurv(n, 0, 2),
0205              errorPropCurv(n, 0, 3),
0206              errorPropCurv(n, 0, 4));
0207       printf("%5f %5f %5f %5f %5f\n",
0208              errorPropCurv(n, 1, 0),
0209              errorPropCurv(n, 1, 1),
0210              errorPropCurv(n, 1, 2),
0211              errorPropCurv(n, 1, 3),
0212              errorPropCurv(n, 1, 4));
0213       printf("%5f %5f %5f %5f %5f\n",
0214              errorPropCurv(n, 2, 0),
0215              errorPropCurv(n, 2, 1),
0216              errorPropCurv(n, 2, 2),
0217              errorPropCurv(n, 2, 3),
0218              errorPropCurv(n, 2, 4));
0219       printf("%5f %5f %5f %5f %5f\n",
0220              errorPropCurv(n, 3, 0),
0221              errorPropCurv(n, 3, 1),
0222              errorPropCurv(n, 3, 2),
0223              errorPropCurv(n, 3, 3),
0224              errorPropCurv(n, 3, 4));
0225       printf("%5f %5f %5f %5f %5f\n",
0226              errorPropCurv(n, 4, 0),
0227              errorPropCurv(n, 4, 1),
0228              errorPropCurv(n, 4, 2),
0229              errorPropCurv(n, 4, 3),
0230              errorPropCurv(n, 4, 4));
0231       printf("\n");
0232     }
0233   }
0234 #endif
0235 
0236   //now we need jacobians to convert to/from curvilinear and CCS
0237   // code from TrackState::jacobianCCSToCurvilinear
0238   MPlex56 jacCCS2Curv;
0239   for (int n = nmin; n < nmax; ++n) {
0240     for (int ii = 0; ii < 5; ii++) {
0241       for (int jj = 0; jj < 6; jj++) {
0242         jacCCS2Curv(n, ii, jj) = 0.f;
0243       }
0244     }
0245     jacCCS2Curv(n, 0, 3) = inChg(n, 0, 0) * sinT[n - nmin];
0246     jacCCS2Curv(n, 0, 5) = inChg(n, 0, 0) * cosT[n - nmin] * inPar(n, 3, 0);
0247     jacCCS2Curv(n, 1, 5) = -1.f;
0248     jacCCS2Curv(n, 2, 4) = 1.f;
0249     jacCCS2Curv(n, 3, 0) = -sinPin[n - nmin];
0250     jacCCS2Curv(n, 3, 1) = cosPin[n - nmin];
0251     jacCCS2Curv(n, 4, 0) = -cosPin[n - nmin] * cosT[n - nmin];
0252     jacCCS2Curv(n, 4, 1) = -sinPin[n - nmin] * cosT[n - nmin];
0253     jacCCS2Curv(n, 4, 2) = sinT[n - nmin];
0254   }
0255 
0256   // code from TrackState::jacobianCurvilinearToCCS
0257   MPlex65 jacCurv2CCS;
0258   for (int n = nmin; n < nmax; ++n) {
0259     for (int ii = 0; ii < 6; ii++) {
0260       for (int jj = 0; jj < 5; jj++) {
0261         jacCurv2CCS(n, ii, jj) = 0.f;
0262       }
0263     }
0264 
0265     jacCurv2CCS(n, 0, 3) = -sinPout[n - nmin];
0266     jacCurv2CCS(n, 0, 4) = -cosT[n - nmin] * cosPout[n - nmin];
0267     jacCurv2CCS(n, 1, 3) = cosPout[n - nmin];
0268     jacCurv2CCS(n, 1, 4) = -cosT[n - nmin] * sinPout[n - nmin];
0269     jacCurv2CCS(n, 2, 4) = sinT[n - nmin];
0270     jacCurv2CCS(n, 3, 0) = inChg(n, 0, 0) / sinT[n - nmin];
0271     jacCurv2CCS(n, 3, 1) = outPar(n, 3, 0) * cosT[n - nmin] / sinT[n - nmin];
0272     jacCurv2CCS(n, 4, 2) = 1.f;
0273     jacCurv2CCS(n, 5, 1) = -1.f;
0274   }
0275 
0276   //need to compute errorProp = jacCurv2CCS*errorPropCurv*jacCCS2Curv
0277   Matriplex::MPlex<float, 6, 5, NN> tmp;
0278   Matriplex::multiplyGeneral(jacCurv2CCS, errorPropCurv, tmp);
0279   Matriplex::multiplyGeneral(tmp, jacCCS2Curv, errorProp);
0280 }
0281 
0282 // from P.Avery's notes (http://www.phys.ufl.edu/~avery/fitting/transport.pdf eq. 5)
0283 inline float getS(float delta0,
0284                   float delta1,
0285                   float delta2,
0286                   float eta0,
0287                   float eta1,
0288                   float eta2,
0289                   float sinP,
0290                   float cosP,
0291                   float sinT,
0292                   float cosT,
0293                   float pt,
0294                   int q,
0295                   float kinv) {
0296   float A = delta0 * eta0 + delta1 * eta1 + delta2 * eta2;
0297   float ip = sinT / pt;
0298   float p0[3] = {pt * cosP, pt * sinP, cosT / ip};
0299   float B = (p0[0] * eta0 + p0[1] * eta1 + p0[2] * eta2) * ip;
0300   float rho = kinv * ip;
0301   float C = (eta0 * p0[1] - eta1 * p0[0]) * rho * 0.5f * ip;
0302   float sqb2m4ac = std::sqrt(B * B - 4.f * A * C);
0303   float s1 = (-B + sqb2m4ac) * 0.5f / C;
0304   float s2 = (-B - sqb2m4ac) * 0.5f / C;
0305 #ifdef DEBUG
0306   if (debug)
0307     std::cout << "A=" << A << " B=" << B << " C=" << C << " s1=" << s1 << " s2=" << s2 << std::endl;
0308 #endif
0309   //take the closest
0310   return (std::abs(s1) > std::abs(s2) ? s2 : s1);
0311 }
0312 
0313 template <typename Tf, typename Ti, typename TfLL1, typename Tf11, typename TfLLL, typename Tf1>
0314 static inline void helixAtPlane_impl(const Tf& __restrict__ inPar,
0315                                      const Ti& __restrict__ inChg,
0316                                      const Tf11& __restrict__ plPnt,
0317                                      const Tf11& __restrict__ plNrm,
0318                                      Tf1& __restrict__ s,
0319                                      TfLL1& __restrict__ outPar,
0320                                      TfLLL& __restrict__ errorProp,
0321                                      Ti& __restrict__ outFailFlag,  // expected to be initialized to 0
0322                                      const int nmin,
0323                                      const int nmax,
0324                                      const int N_proc,
0325                                      const PropagationFlags& pf) {
0326   for (int n = nmin; n < nmax; ++n) {
0327     dprint_np(n,
0328               "input parameters"
0329                   << " inPar(n, 0, 0)=" << std::setprecision(9) << inPar(n, 0, 0) << " inPar(n, 1, 0)="
0330                   << std::setprecision(9) << inPar(n, 1, 0) << " inPar(n, 2, 0)=" << std::setprecision(9)
0331                   << inPar(n, 2, 0) << " inPar(n, 3, 0)=" << std::setprecision(9) << inPar(n, 3, 0)
0332                   << " inPar(n, 4, 0)=" << std::setprecision(9) << inPar(n, 4, 0)
0333                   << " inPar(n, 5, 0)=" << std::setprecision(9) << inPar(n, 5, 0));
0334   }
0335 
0336   float kinv[nmax - nmin];
0337   if (pf.use_param_b_field) {
0338 #pragma omp simd
0339     for (int n = nmin; n < nmax; ++n) {
0340       kinv[n - nmin] = inChg(n, 0, 0) * 0.01f *
0341                        (-Const::sol * Config::bFieldFromZR(inPar(n, 2, 0), hipo(inPar(n, 0, 0), inPar(n, 1, 0))));
0342     }
0343   } else {
0344 #pragma omp simd
0345     for (int n = nmin; n < nmax; ++n) {
0346       kinv[n - nmin] = inChg(n, 0, 0) * 0.01f * (-Const::sol * Config::Bfield);
0347     }
0348   }
0349 
0350   float delta0[nmax - nmin];
0351   float delta1[nmax - nmin];
0352   float delta2[nmax - nmin];
0353 #pragma omp simd
0354   for (int n = nmin; n < nmax; ++n) {
0355     delta0[n - nmin] = inPar(n, 0, 0) - plPnt(n, 0, 0);
0356     delta1[n - nmin] = inPar(n, 1, 0) - plPnt(n, 1, 0);
0357     delta2[n - nmin] = inPar(n, 2, 0) - plPnt(n, 2, 0);
0358   }
0359 
0360   float sinP[nmax - nmin];
0361   float cosP[nmax - nmin];
0362 #pragma omp simd
0363   for (int n = nmin; n < nmax; ++n) {
0364     sinP[n - nmin] = std::sin(inPar(n, 4, 0));
0365     cosP[n - nmin] = std::cos(inPar(n, 4, 0));
0366   }
0367 
0368   // determine solution for straight line
0369   float sl[nmax - nmin];
0370 #pragma omp simd
0371   for (int n = nmin; n < nmax; ++n) {
0372     //sl[n - nmin] = - ( plNrm(n, 0, 0)*delta0[n - nmin] + plNrm(n, 1, 0)*delta1[n - nmin] + plNrm(n, 2, 0)*delta2[n - nmin] ) / ( plNrm(n, 0, 0)*cosP[n - nmin]/inPar(n,3,0) + plNrm(n, 1, 0)*sinP[n - nmin]/inPar(n,3,0) + plNrm(n, 2, 0)*std::cos(inPar(n,5,0))/std::sin(inPar(n,5,0))/inPar(n,3,0) );
0373     sl[n - nmin] =
0374         -(plNrm(n, 0, 0) * delta0[n - nmin] + plNrm(n, 1, 0) * delta1[n - nmin] + plNrm(n, 2, 0) * delta2[n - nmin]) /
0375         (plNrm(n, 0, 0) * cosP[n - nmin] * std::sin(inPar(n, 5, 0)) +
0376          plNrm(n, 1, 0) * sinP[n - nmin] * std::sin(inPar(n, 5, 0)) + plNrm(n, 2, 0) * std::cos(inPar(n, 5, 0)));
0377   }
0378 
0379   //float s[nmax - nmin];
0380   //first iteration outside the loop
0381 #pragma omp simd
0382   for (int n = nmin; n < nmax; ++n) {
0383     s[n - nmin] = (std::abs(plNrm(n, 2, 0)) < 1.f
0384                        ? getS(delta0[n - nmin],
0385                               delta1[n - nmin],
0386                               delta2[n - nmin],
0387                               plNrm(n, 0, 0),
0388                               plNrm(n, 1, 0),
0389                               plNrm(n, 2, 0),
0390                               sinP[n - nmin],
0391                               cosP[n - nmin],
0392                               std::sin(inPar(n, 5, 0)),
0393                               std::cos(inPar(n, 5, 0)),
0394                               inPar(n, 3, 0),
0395                               inChg(n, 0, 0),
0396                               kinv[n - nmin])
0397                        : (plPnt.constAt(n, 2, 0) - inPar.constAt(n, 2, 0)) / std::cos(inPar.constAt(n, 5, 0)));
0398   }
0399 
0400   MPlexLV outParTmp;
0401 
0402   CMS_UNROLL_LOOP_COUNT(Config::Niter - 1)
0403   for (int i = 0; i < Config::Niter - 1; ++i) {
0404     parsFromPathL_impl(inPar, outParTmp, kinv, s, nmin, nmax);
0405 
0406 #pragma omp simd
0407     for (int n = nmin; n < nmax; ++n) {
0408       delta0[n - nmin] = outParTmp(n, 0, 0) - plPnt(n, 0, 0);
0409       delta1[n - nmin] = outParTmp(n, 1, 0) - plPnt(n, 1, 0);
0410       delta2[n - nmin] = outParTmp(n, 2, 0) - plPnt(n, 2, 0);
0411     }
0412 
0413 #pragma omp simd
0414     for (int n = nmin; n < nmax; ++n) {
0415       sinP[n - nmin] = std::sin(outParTmp(n, 4, 0));
0416       cosP[n - nmin] = std::cos(outParTmp(n, 4, 0));
0417     }
0418 
0419 #pragma omp simd
0420     for (int n = nmin; n < nmax; ++n) {
0421       s[n - nmin] += (std::abs(plNrm(n, 2, 0)) < 1.f ? getS(delta0[n - nmin],
0422                                                             delta1[n - nmin],
0423                                                             delta2[n - nmin],
0424                                                             plNrm(n, 0, 0),
0425                                                             plNrm(n, 1, 0),
0426                                                             plNrm(n, 2, 0),
0427                                                             sinP[n - nmin],
0428                                                             cosP[n - nmin],
0429                                                             std::sin(inPar(n, 5, 0)),
0430                                                             std::cos(inPar(n, 5, 0)),
0431                                                             inPar(n, 3, 0),
0432                                                             inChg(n, 0, 0),
0433                                                             kinv[n - nmin])
0434                                                      : (plPnt.constAt(n, 2, 0) - outParTmp.constAt(n, 2, 0)) /
0435                                                            std::cos(outParTmp.constAt(n, 5, 0)));
0436     }
0437   }  //end Niter-1
0438 
0439   // use linear approximation if s did not converge (for very high pT tracks)
0440   for (int n = nmin; n < nmax; ++n) {
0441 #ifdef DEBUG
0442     if (debug)
0443       std::cout << "s[n - nmin]=" << s[n - nmin] << " sl[n - nmin]=" << sl[n - nmin]
0444                 << " std::isnan(s[n - nmin])=" << std::isnan(s[n - nmin])
0445                 << " std::isfinite(s[n - nmin])=" << std::isfinite(s[n - nmin])
0446                 << " std::isnormal(s[n - nmin])=" << std::isnormal(s[n - nmin]) << std::endl;
0447 #endif
0448     if ((std::abs(sl[n - nmin]) > std::abs(s[n - nmin])) || std::isnormal(s[n - nmin]) == false)
0449       s[n - nmin] = sl[n - nmin];
0450   }
0451 
0452 #ifdef DEBUG
0453   if (debug)
0454     std::cout << "s=" << s[0] << std::endl;
0455 #endif
0456   parsAndErrPropFromPathL_impl(inPar, inChg, outPar, kinv, s, errorProp, nmin, nmax, N_proc, pf);
0457 }
0458 
0459 template <typename Tf, typename Ti, typename TfLL1, typename Tf11, typename TfLLL>
0460 static inline void helixAtRFromIterativeCCS_impl(const Tf& __restrict__ inPar,
0461                                                  const Ti& __restrict__ inChg,
0462                                                  const Tf11& __restrict__ msRad,
0463                                                  TfLL1& __restrict__ outPar,
0464                                                  TfLLL& __restrict__ errorProp,
0465                                                  Ti& __restrict__ outFailFlag,  // expected to be initialized to 0
0466                                                  const int nmin,
0467                                                  const int nmax,
0468                                                  const int N_proc,
0469                                                  const PropagationFlags& pf) {
0470   // bool debug = true;
0471 
0472 #pragma omp simd
0473   for (int n = nmin; n < nmax; ++n) {
0474     //initialize erroProp to identity matrix
0475     errorProp(n, 0, 0) = 1.f;
0476     errorProp(n, 1, 1) = 1.f;
0477     errorProp(n, 2, 2) = 1.f;
0478     errorProp(n, 3, 3) = 1.f;
0479     errorProp(n, 4, 4) = 1.f;
0480     errorProp(n, 5, 5) = 1.f;
0481   }
0482   float r0[nmax - nmin];
0483 #pragma omp simd
0484   for (int n = nmin; n < nmax; ++n) {
0485     //initialize erroProp to identity matrix
0486     r0[n - nmin] = hipo(inPar(n, 0, 0), inPar(n, 1, 0));
0487   }
0488   float k[nmax - nmin];
0489   if (pf.use_param_b_field) {
0490 #pragma omp simd
0491     for (int n = nmin; n < nmax; ++n) {
0492       k[n - nmin] = inChg(n, 0, 0) * 100.f / (-Const::sol * Config::bFieldFromZR(inPar(n, 2, 0), r0[n - nmin]));
0493     }
0494   } else {
0495 #pragma omp simd
0496     for (int n = nmin; n < nmax; ++n) {
0497       k[n - nmin] = inChg(n, 0, 0) * 100.f / (-Const::sol * Config::Bfield);
0498     }
0499   }
0500   float r[nmax - nmin];
0501 #pragma omp simd
0502   for (int n = nmin; n < nmax; ++n) {
0503     r[n - nmin] = msRad(n, 0, 0);
0504   }
0505   float xin[nmax - nmin];
0506   float yin[nmax - nmin];
0507   float ipt[nmax - nmin];
0508   float phiin[nmax - nmin];
0509   float theta[nmax - nmin];
0510 #pragma omp simd
0511   for (int n = nmin; n < nmax; ++n) {
0512     // if (std::abs(r-r0)<0.0001f) {
0513     //  dprint("distance less than 1mum, skip");
0514     //  continue;
0515     // }
0516 
0517     xin[n - nmin] = inPar(n, 0, 0);
0518     yin[n - nmin] = inPar(n, 1, 0);
0519     ipt[n - nmin] = inPar(n, 3, 0);
0520     phiin[n - nmin] = inPar(n, 4, 0);
0521     theta[n - nmin] = inPar(n, 5, 0);
0522 
0523     //dprint(std::endl);
0524   }
0525 
0526   //debug = true;
0527   for (int n = nmin; n < nmax; ++n) {
0528     dprint_np(n,
0529               "input parameters"
0530                   << " inPar(n, 0, 0)=" << std::setprecision(9) << inPar(n, 0, 0) << " inPar(n, 1, 0)="
0531                   << std::setprecision(9) << inPar(n, 1, 0) << " inPar(n, 2, 0)=" << std::setprecision(9)
0532                   << inPar(n, 2, 0) << " inPar(n, 3, 0)=" << std::setprecision(9) << inPar(n, 3, 0)
0533                   << " inPar(n, 4, 0)=" << std::setprecision(9) << inPar(n, 4, 0)
0534                   << " inPar(n, 5, 0)=" << std::setprecision(9) << inPar(n, 5, 0));
0535   }
0536 
0537   float kinv[nmax - nmin];
0538   float pt[nmax - nmin];
0539 #pragma omp simd
0540   for (int n = nmin; n < nmax; ++n) {
0541     kinv[n - nmin] = 1.f / k[n - nmin];
0542     pt[n - nmin] = 1.f / ipt[n - nmin];
0543   }
0544   float D[nmax - nmin];
0545   float cosa[nmax - nmin];
0546   float sina[nmax - nmin];
0547   float cosah[nmax - nmin];
0548   float sinah[nmax - nmin];
0549   float id[nmax - nmin];
0550 
0551 #pragma omp simd
0552   for (int n = nmin; n < nmax; ++n) {
0553     D[n - nmin] = 0.;
0554   }
0555 
0556   //no trig approx here, phi can be large
0557   float cosPorT[nmax - nmin];
0558   float sinPorT[nmax - nmin];
0559 #pragma omp simd
0560   for (int n = nmin; n < nmax; ++n) {
0561     cosPorT[n - nmin] = std::cos(phiin[n - nmin]);
0562   }
0563 #pragma omp simd
0564   for (int n = nmin; n < nmax; ++n) {
0565     sinPorT[n - nmin] = std::sin(phiin[n - nmin]);
0566   }
0567 
0568   float pxin[nmax - nmin];
0569   float pyin[nmax - nmin];
0570 #pragma omp simd
0571   for (int n = nmin; n < nmax; ++n) {
0572     pxin[n - nmin] = cosPorT[n - nmin] * pt[n - nmin];
0573     pyin[n - nmin] = sinPorT[n - nmin] * pt[n - nmin];
0574   }
0575 
0576   for (int n = nmin; n < nmax; ++n) {
0577     dprint_np(n,
0578               "k=" << std::setprecision(9) << k[n - nmin] << " pxin=" << std::setprecision(9) << pxin[n - nmin]
0579                    << " pyin=" << std::setprecision(9) << pyin[n - nmin] << " cosPorT=" << std::setprecision(9)
0580                    << cosPorT[n - nmin] << " sinPorT=" << std::setprecision(9) << sinPorT[n - nmin]
0581                    << " pt=" << std::setprecision(9) << pt[n - nmin]);
0582   }
0583 
0584   float dDdx[nmax - nmin];
0585   float dDdy[nmax - nmin];
0586   float dDdipt[nmax - nmin];
0587   float dDdphi[nmax - nmin];
0588 
0589 #pragma omp simd
0590   for (int n = nmin; n < nmax; ++n) {
0591     dDdipt[n - nmin] = 0.;
0592     dDdphi[n - nmin] = 0.;
0593   }
0594 #pragma omp simd
0595   for (int n = nmin; n < nmax; ++n) {
0596     //derivatives initialized to value for first iteration, i.e. distance = r-r0in
0597     dDdx[n - nmin] = r0[n - nmin] > 0.f ? -xin[n - nmin] / r0[n - nmin] : 0.f;
0598   }
0599 
0600 #pragma omp simd
0601   for (int n = nmin; n < nmax; ++n) {
0602     dDdy[n - nmin] = r0[n - nmin] > 0.f ? -yin[n - nmin] / r0[n - nmin] : 0.f;
0603   }
0604 
0605   float oodotp[nmax - nmin];
0606   float x[nmax - nmin];
0607   float y[nmax - nmin];
0608   float oor0[nmax - nmin];
0609   float dadipt[nmax - nmin];
0610   float dadx[nmax - nmin];
0611   float dady[nmax - nmin];
0612   float pxca[nmax - nmin];
0613   float pxsa[nmax - nmin];
0614   float pyca[nmax - nmin];
0615   float pysa[nmax - nmin];
0616   float tmp[nmax - nmin];
0617   float tmpx[nmax - nmin];
0618   float tmpy[nmax - nmin];
0619   float pxinold[nmax - nmin];
0620 
0621   CMS_UNROLL_LOOP_COUNT(Config::Niter)
0622   for (int i = 0; i < Config::Niter; ++i) {
0623 #pragma omp simd
0624     for (int n = nmin; n < nmax; ++n) {
0625       //compute distance and path for the current iteration
0626       r0[n - nmin] = hipo(outPar(n, 0, 0), outPar(n, 1, 0));
0627     }
0628 
0629     // Use one over dot product of transverse momentum and radial
0630     // direction to scale the step. Propagation is prevented from reaching
0631     // too close to the apex (dotp > 0.2).
0632     // - Can / should we come up with a better approximation?
0633     // - Can / should take +/- curvature into account?
0634 
0635 #pragma omp simd
0636     for (int n = nmin; n < nmax; ++n) {
0637       oodotp[n - nmin] =
0638           r0[n - nmin] * pt[n - nmin] / (pxin[n - nmin] * outPar(n, 0, 0) + pyin[n - nmin] * outPar(n, 1, 0));
0639     }
0640 
0641 #pragma omp simd
0642     for (int n = nmin; n < nmax; ++n) {
0643       if (oodotp[n - nmin] > 5.0f || oodotp[n - nmin] < 0)  // 0.2 is 78.5 deg
0644       {
0645         outFailFlag(n, 0, 0) = 1;
0646         oodotp[n - nmin] = 0.0f;
0647       } else if (r[n - nmin] - r0[n - nmin] < 0.0f && pt[n - nmin] < 1.0f) {
0648         // Scale down the correction for low-pT ingoing tracks.
0649         oodotp[n - nmin] = 1.0f + (oodotp[n - nmin] - 1.0f) * pt[n - nmin];
0650       }
0651     }
0652 
0653 #pragma omp simd
0654     for (int n = nmin; n < nmax; ++n) {
0655       // Can we come up with a better approximation?
0656       // Should take +/- curvature into account.
0657       id[n - nmin] = (r[n - nmin] - r0[n - nmin]) * oodotp[n - nmin];
0658     }
0659 
0660 #pragma omp simd
0661     for (int n = nmin; n < nmax; ++n) {
0662       D[n - nmin] += id[n - nmin];
0663     }
0664 
0665     if constexpr (Config::useTrigApprox) {
0666 #if !defined(__INTEL_COMPILER)
0667 #pragma omp simd
0668 #endif
0669       for (int n = nmin; n < nmax; ++n) {
0670         sincos4(id[n - nmin] * ipt[n - nmin] * kinv[n - nmin] * 0.5f, sinah[n - nmin], cosah[n - nmin]);
0671       }
0672     } else {
0673 #if !defined(__INTEL_COMPILER)
0674 #pragma omp simd
0675 #endif
0676       for (int n = nmin; n < nmax; ++n) {
0677         cosah[n - nmin] = std::cos(id[n - nmin] * ipt[n - nmin] * kinv[n - nmin] * 0.5f);
0678         sinah[n - nmin] = std::sin(id[n - nmin] * ipt[n - nmin] * kinv[n - nmin] * 0.5f);
0679       }
0680     }
0681 
0682 #pragma omp simd
0683     for (int n = nmin; n < nmax; ++n) {
0684       cosa[n - nmin] = 1.f - 2.f * sinah[n - nmin] * sinah[n - nmin];
0685       sina[n - nmin] = 2.f * sinah[n - nmin] * cosah[n - nmin];
0686     }
0687 
0688     for (int n = nmin; n < nmax; ++n) {
0689       dprint_np(n,
0690                 "Attempt propagation from r="
0691                     << r0[n - nmin] << " to r=" << r[n - nmin] << std::endl
0692                     << "   x=" << xin[n - nmin] << " y=" << yin[n - nmin] << " z=" << inPar(n, 2, 0)
0693                     << " px=" << pxin[n - nmin] << " py=" << pyin[n - nmin]
0694                     << " pz=" << pt[n - nmin] * std::tan(theta[n - nmin]) << " q=" << inChg(n, 0, 0) << std::endl
0695                     << "   r=" << std::setprecision(9) << r[n - nmin] << " r0=" << std::setprecision(9) << r0[n - nmin]
0696                     << " id=" << std::setprecision(9) << id[n - nmin] << " dr=" << std::setprecision(9)
0697                     << r[n - nmin] - r0[n - nmin] << " cosa=" << cosa[n - nmin] << " sina=" << sina[n - nmin]
0698                     << " dir_cos(rad,pT)=" << 1.0f / oodotp[n - nmin]);
0699     }
0700 
0701     //update derivatives on total distance
0702     if (i + 1 != Config::Niter) {
0703 #pragma omp simd
0704       for (int n = nmin; n < nmax; ++n) {
0705         x[n - nmin] = outPar(n, 0, 0);
0706         y[n - nmin] = outPar(n, 1, 0);
0707       }
0708 #pragma omp simd
0709       for (int n = nmin; n < nmax; ++n) {
0710         oor0[n - nmin] =
0711             (r0[n - nmin] > 0.f && std::abs(r[n - nmin] - r0[n - nmin]) > 0.0001f) ? 1.f / r0[n - nmin] : 0.f;
0712       }
0713 #pragma omp simd
0714       for (int n = nmin; n < nmax; ++n) {
0715         dadipt[n - nmin] = id[n - nmin] * kinv[n - nmin];
0716         dadx[n - nmin] = -x[n - nmin] * ipt[n - nmin] * kinv[n - nmin] * oor0[n - nmin];
0717         dady[n - nmin] = -y[n - nmin] * ipt[n - nmin] * kinv[n - nmin] * oor0[n - nmin];
0718         pxca[n - nmin] = pxin[n - nmin] * cosa[n - nmin];
0719         pxsa[n - nmin] = pxin[n - nmin] * sina[n - nmin];
0720         pyca[n - nmin] = pyin[n - nmin] * cosa[n - nmin];
0721         pysa[n - nmin] = pyin[n - nmin] * sina[n - nmin];
0722         tmpx[n - nmin] = k[n - nmin] * dadx[n - nmin];
0723       }
0724 
0725 #pragma omp simd
0726       for (int n = nmin; n < nmax; ++n) {
0727         dDdx[n - nmin] -= (x[n - nmin] * (1.f + tmpx[n - nmin] * (pxca[n - nmin] - pysa[n - nmin])) +
0728                            y[n - nmin] * tmpx[n - nmin] * (pyca[n - nmin] + pxsa[n - nmin])) *
0729                           oor0[n - nmin];
0730       }
0731 
0732 #pragma omp simd
0733       for (int n = nmin; n < nmax; ++n) {
0734         tmpy[n - nmin] = k[n - nmin] * dady[n - nmin];
0735       }
0736 #pragma omp simd
0737       for (int n = nmin; n < nmax; ++n) {
0738         dDdy[n - nmin] -= (x[n - nmin] * tmpy[n - nmin] * (pxca[n - nmin] - pysa[n - nmin]) +
0739                            y[n - nmin] * (1.f + tmpy[n - nmin] * (pyca[n - nmin] + pxsa[n - nmin]))) *
0740                           oor0[n - nmin];
0741       }
0742 #pragma omp simd
0743       for (int n = nmin; n < nmax; ++n) {
0744         //now r0 depends on ipt and phi as well
0745         tmp[n - nmin] = dadipt[n - nmin] * ipt[n - nmin];
0746       }
0747 #pragma omp simd
0748       for (int n = nmin; n < nmax; ++n) {
0749         dDdipt[n - nmin] -= k[n - nmin] *
0750                             (x[n - nmin] * (pxca[n - nmin] * tmp[n - nmin] - pysa[n - nmin] * tmp[n - nmin] -
0751                                             pyca[n - nmin] - pxsa[n - nmin] + pyin[n - nmin]) +
0752                              y[n - nmin] * (pyca[n - nmin] * tmp[n - nmin] + pxsa[n - nmin] * tmp[n - nmin] -
0753                                             pysa[n - nmin] + pxca[n - nmin] - pxin[n - nmin])) *
0754                             pt[n - nmin] * oor0[n - nmin];
0755       }
0756 #pragma omp simd
0757       for (int n = nmin; n < nmax; ++n) {
0758         dDdphi[n - nmin] += k[n - nmin] *
0759                             (x[n - nmin] * (pysa[n - nmin] - pxin[n - nmin] + pxca[n - nmin]) -
0760                              y[n - nmin] * (pxsa[n - nmin] - pyin[n - nmin] + pyca[n - nmin])) *
0761                             oor0[n - nmin];
0762       }
0763     }
0764 
0765 #pragma omp simd
0766     for (int n = nmin; n < nmax; ++n) {
0767       //update parameters
0768       outPar(n, 0, 0) = outPar(n, 0, 0) + 2.f * k[n - nmin] * sinah[n - nmin] *
0769                                               (pxin[n - nmin] * cosah[n - nmin] - pyin[n - nmin] * sinah[n - nmin]);
0770       outPar(n, 1, 0) = outPar(n, 1, 0) + 2.f * k[n - nmin] * sinah[n - nmin] *
0771                                               (pyin[n - nmin] * cosah[n - nmin] + pxin[n - nmin] * sinah[n - nmin]);
0772       pxinold[n - nmin] = pxin[n - nmin];  //copy before overwriting
0773       pxin[n - nmin] = pxin[n - nmin] * cosa[n - nmin] - pyin[n - nmin] * sina[n - nmin];
0774       pyin[n - nmin] = pyin[n - nmin] * cosa[n - nmin] + pxinold[n - nmin] * sina[n - nmin];
0775     }
0776     for (int n = nmin; n < nmax; ++n) {
0777       dprint_np(n,
0778                 "outPar(n, 0, 0)=" << outPar(n, 0, 0) << " outPar(n, 1, 0)=" << outPar(n, 1, 0)
0779                                    << " pxin=" << pxin[n - nmin] << " pyin=" << pyin[n - nmin]);
0780     }
0781   }  // iteration loop
0782 
0783   float alpha[nmax - nmin];
0784   float dadphi[nmax - nmin];
0785 
0786 #pragma omp simd
0787   for (int n = nmin; n < nmax; ++n) {
0788     alpha[n - nmin] = D[n - nmin] * ipt[n - nmin] * kinv[n - nmin];
0789     dadx[n - nmin] = dDdx[n - nmin] * ipt[n - nmin] * kinv[n - nmin];
0790     dady[n - nmin] = dDdy[n - nmin] * ipt[n - nmin] * kinv[n - nmin];
0791     dadipt[n - nmin] = (ipt[n - nmin] * dDdipt[n - nmin] + D[n - nmin]) * kinv[n - nmin];
0792     dadphi[n - nmin] = dDdphi[n - nmin] * ipt[n - nmin] * kinv[n - nmin];
0793   }
0794 
0795   if constexpr (Config::useTrigApprox) {
0796 #pragma omp simd
0797     for (int n = nmin; n < nmax; ++n) {
0798       sincos4(alpha[n - nmin], sina[n - nmin], cosa[n - nmin]);
0799     }
0800   } else {
0801 #pragma omp simd
0802     for (int n = nmin; n < nmax; ++n) {
0803       cosa[n - nmin] = std::cos(alpha[n - nmin]);
0804       sina[n - nmin] = std::sin(alpha[n - nmin]);
0805     }
0806   }
0807 #pragma omp simd
0808   for (int n = nmin; n < nmax; ++n) {
0809     errorProp(n, 0, 0) = 1.f + k[n - nmin] * dadx[n - nmin] *
0810                                    (cosPorT[n - nmin] * cosa[n - nmin] - sinPorT[n - nmin] * sina[n - nmin]) *
0811                                    pt[n - nmin];
0812     errorProp(n, 0, 1) = k[n - nmin] * dady[n - nmin] *
0813                          (cosPorT[n - nmin] * cosa[n - nmin] - sinPorT[n - nmin] * sina[n - nmin]) * pt[n - nmin];
0814     errorProp(n, 0, 2) = 0.f;
0815     errorProp(n, 0, 3) =
0816         k[n - nmin] *
0817         (cosPorT[n - nmin] * (ipt[n - nmin] * dadipt[n - nmin] * cosa[n - nmin] - sina[n - nmin]) +
0818          sinPorT[n - nmin] * ((1.f - cosa[n - nmin]) - ipt[n - nmin] * dadipt[n - nmin] * sina[n - nmin])) *
0819         pt[n - nmin] * pt[n - nmin];
0820     errorProp(n, 0, 4) =
0821         k[n - nmin] *
0822         (cosPorT[n - nmin] * dadphi[n - nmin] * cosa[n - nmin] - sinPorT[n - nmin] * dadphi[n - nmin] * sina[n - nmin] -
0823          sinPorT[n - nmin] * sina[n - nmin] + cosPorT[n - nmin] * cosa[n - nmin] - cosPorT[n - nmin]) *
0824         pt[n - nmin];
0825     errorProp(n, 0, 5) = 0.f;
0826   }
0827 
0828 #pragma omp simd
0829   for (int n = nmin; n < nmax; ++n) {
0830     errorProp(n, 1, 0) = k[n - nmin] * dadx[n - nmin] *
0831                          (sinPorT[n - nmin] * cosa[n - nmin] + cosPorT[n - nmin] * sina[n - nmin]) * pt[n - nmin];
0832     errorProp(n, 1, 1) = 1.f + k[n - nmin] * dady[n - nmin] *
0833                                    (sinPorT[n - nmin] * cosa[n - nmin] + cosPorT[n - nmin] * sina[n - nmin]) *
0834                                    pt[n - nmin];
0835     errorProp(n, 1, 2) = 0.f;
0836     errorProp(n, 1, 3) =
0837         k[n - nmin] *
0838         (sinPorT[n - nmin] * (ipt[n - nmin] * dadipt[n - nmin] * cosa[n - nmin] - sina[n - nmin]) +
0839          cosPorT[n - nmin] * (ipt[n - nmin] * dadipt[n - nmin] * sina[n - nmin] - (1.f - cosa[n - nmin]))) *
0840         pt[n - nmin] * pt[n - nmin];
0841     errorProp(n, 1, 4) =
0842         k[n - nmin] *
0843         (sinPorT[n - nmin] * dadphi[n - nmin] * cosa[n - nmin] + cosPorT[n - nmin] * dadphi[n - nmin] * sina[n - nmin] +
0844          sinPorT[n - nmin] * cosa[n - nmin] + cosPorT[n - nmin] * sina[n - nmin] - sinPorT[n - nmin]) *
0845         pt[n - nmin];
0846     errorProp(n, 1, 5) = 0.f;
0847   }
0848 
0849 #pragma omp simd
0850   for (int n = nmin; n < nmax; ++n) {
0851     //no trig approx here, theta can be large
0852     cosPorT[n - nmin] = std::cos(theta[n - nmin]);
0853   }
0854 
0855 #pragma omp simd
0856   for (int n = nmin; n < nmax; ++n) {
0857     sinPorT[n - nmin] = std::sin(theta[n - nmin]);
0858   }
0859 
0860 #pragma omp simd
0861   for (int n = nmin; n < nmax; ++n) {
0862     //redefine sinPorT as 1./sinPorT to reduce the number of temporaries
0863     sinPorT[n - nmin] = 1.f / sinPorT[n - nmin];
0864   }
0865 
0866 #pragma omp simd
0867   for (int n = nmin; n < nmax; ++n) {
0868     outPar(n, 2, 0) =
0869         inPar(n, 2, 0) + k[n - nmin] * alpha[n - nmin] * cosPorT[n - nmin] * pt[n - nmin] * sinPorT[n - nmin];
0870     errorProp(n, 2, 0) = k[n - nmin] * cosPorT[n - nmin] * dadx[n - nmin] * pt[n - nmin] * sinPorT[n - nmin];
0871     errorProp(n, 2, 1) = k[n - nmin] * cosPorT[n - nmin] * dady[n - nmin] * pt[n - nmin] * sinPorT[n - nmin];
0872     errorProp(n, 2, 2) = 1.f;
0873     errorProp(n, 2, 3) = k[n - nmin] * cosPorT[n - nmin] * (ipt[n - nmin] * dadipt[n - nmin] - alpha[n - nmin]) *
0874                          pt[n - nmin] * pt[n - nmin] * sinPorT[n - nmin];
0875     errorProp(n, 2, 4) = k[n - nmin] * dadphi[n - nmin] * cosPorT[n - nmin] * pt[n - nmin] * sinPorT[n - nmin];
0876     errorProp(n, 2, 5) = -k[n - nmin] * alpha[n - nmin] * pt[n - nmin] * sinPorT[n - nmin] * sinPorT[n - nmin];
0877   }
0878 
0879 #pragma omp simd
0880   for (int n = nmin; n < nmax; ++n) {
0881     outPar(n, 3, 0) = ipt[n - nmin];
0882     errorProp(n, 3, 0) = 0.f;
0883     errorProp(n, 3, 1) = 0.f;
0884     errorProp(n, 3, 2) = 0.f;
0885     errorProp(n, 3, 3) = 1.f;
0886     errorProp(n, 3, 4) = 0.f;
0887     errorProp(n, 3, 5) = 0.f;
0888   }
0889 
0890 #pragma omp simd
0891   for (int n = nmin; n < nmax; ++n) {
0892     outPar(n, 4, 0) = inPar(n, 4, 0) + alpha[n - nmin];
0893     errorProp(n, 4, 0) = dadx[n - nmin];
0894     errorProp(n, 4, 1) = dady[n - nmin];
0895     errorProp(n, 4, 2) = 0.f;
0896     errorProp(n, 4, 3) = dadipt[n - nmin];
0897     errorProp(n, 4, 4) = 1.f + dadphi[n - nmin];
0898     errorProp(n, 4, 5) = 0.f;
0899   }
0900 
0901 #pragma omp simd
0902   for (int n = nmin; n < nmax; ++n) {
0903     outPar(n, 5, 0) = theta[n - nmin];
0904     errorProp(n, 5, 0) = 0.f;
0905     errorProp(n, 5, 1) = 0.f;
0906     errorProp(n, 5, 2) = 0.f;
0907     errorProp(n, 5, 3) = 0.f;
0908     errorProp(n, 5, 4) = 0.f;
0909     errorProp(n, 5, 5) = 1.f;
0910   }
0911 
0912   for (int n = nmin; n < nmax; ++n) {
0913     dprint_np(n,
0914               "propagation end, dump parameters\n"
0915                   << "   D = " << D[n - nmin] << " alpha = " << alpha[n - nmin] << " kinv = " << kinv[n - nmin]
0916                   << std::endl
0917                   << "   pos = " << outPar(n, 0, 0) << " " << outPar(n, 1, 0) << " " << outPar(n, 2, 0) << "\t\t r="
0918                   << std::sqrt(outPar(n, 0, 0) * outPar(n, 0, 0) + outPar(n, 1, 0) * outPar(n, 1, 0)) << std::endl
0919                   << "   mom = " << outPar(n, 3, 0) << " " << outPar(n, 4, 0) << " " << outPar(n, 5, 0) << std::endl
0920                   << "   cart= " << std::cos(outPar(n, 4, 0)) / outPar(n, 3, 0) << " "
0921                   << std::sin(outPar(n, 4, 0)) / outPar(n, 3, 0) << " " << 1. / (outPar(n, 3, 0) * tan(outPar(n, 5, 0)))
0922                   << "\t\tpT=" << 1. / std::abs(outPar(n, 3, 0)) << std::endl);
0923   }
0924 
0925 #ifdef DEBUG
0926   for (int n = nmin; n < nmax; ++n) {
0927     if (debug && g_debug && n < N_proc) {
0928       dmutex_guard;
0929       std::cout << n << ": jacobian" << std::endl;
0930       printf("%5f %5f %5f %5f %5f %5f\n",
0931              errorProp(n, 0, 0),
0932              errorProp(n, 0, 1),
0933              errorProp(n, 0, 2),
0934              errorProp(n, 0, 3),
0935              errorProp(n, 0, 4),
0936              errorProp(n, 0, 5));
0937       printf("%5f %5f %5f %5f %5f %5f\n",
0938              errorProp(n, 1, 0),
0939              errorProp(n, 1, 1),
0940              errorProp(n, 1, 2),
0941              errorProp(n, 1, 3),
0942              errorProp(n, 1, 4),
0943              errorProp(n, 1, 5));
0944       printf("%5f %5f %5f %5f %5f %5f\n",
0945              errorProp(n, 2, 0),
0946              errorProp(n, 2, 1),
0947              errorProp(n, 2, 2),
0948              errorProp(n, 2, 3),
0949              errorProp(n, 2, 4),
0950              errorProp(n, 2, 5));
0951       printf("%5f %5f %5f %5f %5f %5f\n",
0952              errorProp(n, 3, 0),
0953              errorProp(n, 3, 1),
0954              errorProp(n, 3, 2),
0955              errorProp(n, 3, 3),
0956              errorProp(n, 3, 4),
0957              errorProp(n, 3, 5));
0958       printf("%5f %5f %5f %5f %5f %5f\n",
0959              errorProp(n, 4, 0),
0960              errorProp(n, 4, 1),
0961              errorProp(n, 4, 2),
0962              errorProp(n, 4, 3),
0963              errorProp(n, 4, 4),
0964              errorProp(n, 4, 5));
0965       printf("%5f %5f %5f %5f %5f %5f\n",
0966              errorProp(n, 5, 0),
0967              errorProp(n, 5, 1),
0968              errorProp(n, 5, 2),
0969              errorProp(n, 5, 3),
0970              errorProp(n, 5, 4),
0971              errorProp(n, 5, 5));
0972       printf("\n");
0973     }
0974   }
0975 #endif
0976 }