MkFitCore/src/PropagationMPlex.cc

0001 #include "FWCore/Utilities/interface/CMSUnrollLoop.h"
0002 #include "RecoTracker/MkFitCore/interface/PropagationConfig.h"
0003 #include "RecoTracker/MkFitCore/interface/Config.h"
0004 #include "RecoTracker/MkFitCore/interface/TrackerInfo.h"
0005
0006 #include "PropagationMPlex.h"
0007
0008 //#define DEBUG
0009 #include "Debug.h"
0010
0011 namespace mkfit {
0012
0013   //==============================================================================
0014   // propagateLineToRMPlex
0015   //==============================================================================
0016
0017   void propagateLineToRMPlex(const MPlexLS& psErr,
0018                              const MPlexLV& psPar,
0019                              const MPlexHS& msErr,
0020                              const MPlexHV& msPar,
0021                              MPlexLS& outErr,
0022                              MPlexLV& outPar,
0023                              const int N_proc) {
0024     // XXX Regenerate parts below with a script.
0025
0026     const Matriplex::idx_t N = NN;
0027
0028 #pragma omp simd
0029     for (int n = 0; n < NN; ++n) {
0030       const float cosA = (psPar[0 * N + n] * psPar[3 * N + n] + psPar[1 * N + n] * psPar[4 * N + n]) /
0031                          (std::sqrt((psPar[0 * N + n] * psPar[0 * N + n] + psPar[1 * N + n] * psPar[1 * N + n]) *
0032                                     (psPar[3 * N + n] * psPar[3 * N + n] + psPar[4 * N + n] * psPar[4 * N + n])));
0033       const float dr = (hipo(msPar[0 * N + n], msPar[1 * N + n]) - hipo(psPar[0 * N + n], psPar[1 * N + n])) / cosA;
0034
0035       dprint_np(n, "propagateLineToRMPlex dr=" << dr);
0036
0037       const float pt = hipo(psPar[3 * N + n], psPar[4 * N + n]);
0038       const float p = dr / pt;  // path
0039       const float psq = p * p;
0040
0041       outPar[0 * N + n] = psPar[0 * N + n] + p * psPar[3 * N + n];
0042       outPar[1 * N + n] = psPar[1 * N + n] + p * psPar[4 * N + n];
0043       outPar[2 * N + n] = psPar[2 * N + n] + p * psPar[5 * N + n];
0044       outPar[3 * N + n] = psPar[3 * N + n];
0045       outPar[4 * N + n] = psPar[4 * N + n];
0046       outPar[5 * N + n] = psPar[5 * N + n];
0047
0048       {
0049         const MPlexLS& A = psErr;
0050         MPlexLS& B = outErr;
0051
0052         B.fArray[0 * N + n] = A.fArray[0 * N + n];
0053         B.fArray[1 * N + n] = A.fArray[1 * N + n];
0054         B.fArray[2 * N + n] = A.fArray[2 * N + n];
0055         B.fArray[3 * N + n] = A.fArray[3 * N + n];
0056         B.fArray[4 * N + n] = A.fArray[4 * N + n];
0057         B.fArray[5 * N + n] = A.fArray[5 * N + n];
0058         B.fArray[6 * N + n] = A.fArray[6 * N + n] + p * A.fArray[0 * N + n];
0059         B.fArray[7 * N + n] = A.fArray[7 * N + n] + p * A.fArray[1 * N + n];
0060         B.fArray[8 * N + n] = A.fArray[8 * N + n] + p * A.fArray[3 * N + n];
0061         B.fArray[9 * N + n] =
0062             A.fArray[9 * N + n] + p * (A.fArray[6 * N + n] + A.fArray[6 * N + n]) + psq * A.fArray[0 * N + n];
0063         B.fArray[10 * N + n] = A.fArray[10 * N + n] + p * A.fArray[1 * N + n];
0064         B.fArray[11 * N + n] = A.fArray[11 * N + n] + p * A.fArray[2 * N + n];
0065         B.fArray[12 * N + n] = A.fArray[12 * N + n] + p * A.fArray[4 * N + n];
0066         B.fArray[13 * N + n] =
0067             A.fArray[13 * N + n] + p * (A.fArray[7 * N + n] + A.fArray[10 * N + n]) + psq * A.fArray[1 * N + n];
0068         B.fArray[14 * N + n] =
0069             A.fArray[14 * N + n] + p * (A.fArray[11 * N + n] + A.fArray[11 * N + n]) + psq * A.fArray[2 * N + n];
0070         B.fArray[15 * N + n] = A.fArray[15 * N + n] + p * A.fArray[3 * N + n];
0071         B.fArray[16 * N + n] = A.fArray[16 * N + n] + p * A.fArray[4 * N + n];
0072         B.fArray[17 * N + n] = A.fArray[17 * N + n] + p * A.fArray[5 * N + n];
0073         B.fArray[18 * N + n] =
0074             A.fArray[18 * N + n] + p * (A.fArray[8 * N + n] + A.fArray[15 * N + n]) + psq * A.fArray[3 * N + n];
0075         B.fArray[19 * N + n] =
0076             A.fArray[19 * N + n] + p * (A.fArray[12 * N + n] + A.fArray[16 * N + n]) + psq * A.fArray[4 * N + n];
0077         B.fArray[20 * N + n] =
0078             A.fArray[20 * N + n] + p * (A.fArray[17 * N + n] + A.fArray[17 * N + n]) + psq * A.fArray[5 * N + n];
0079       }
0080
0081       dprint_np(n, "propagateLineToRMPlex arrive at r=" << hipo(outPar[0 * N + n], outPar[1 * N + n]));
0082     }
0083   }
0084
0085 }  // end namespace mkfit
0086
0087 //==============================================================================
0088 // propagateHelixToRMPlex
0089 //==============================================================================
0090
0091 namespace {
0092   using namespace mkfit;
0093
0094   void MultHelixProp(const MPlexLL& A, const MPlexLS& B, MPlexLL& C) {
0095     // C = A * B
0096
0097     typedef float T;
0098     const Matriplex::idx_t N = NN;
0099
0100     const T* a = A.fArray;
0101     ASSUME_ALIGNED(a, 64);
0102     const T* b = B.fArray;
0103     ASSUME_ALIGNED(b, 64);
0104     T* c = C.fArray;
0105     ASSUME_ALIGNED(c, 64);
0106
0107 #include "MultHelixProp.ah"
0108   }
0109
0110   void MultHelixPropTransp(const MPlexLL& A, const MPlexLL& B, MPlexLS& C) {
0111     // C = B * AT;
0112
0113     typedef float T;
0114     const Matriplex::idx_t N = NN;
0115
0116     const T* a = A.fArray;
0117     ASSUME_ALIGNED(a, 64);
0118     const T* b = B.fArray;
0119     ASSUME_ALIGNED(b, 64);
0120     T* c = C.fArray;
0121     ASSUME_ALIGNED(c, 64);
0122
0123 #include "MultHelixPropTransp.ah"
0124   }
0125
0126   void MultHelixPropTemp(const MPlexLL& A, const MPlexLL& B, MPlexLL& C, int n) {
0127     // C = A * B
0128
0129     typedef float T;
0130     const Matriplex::idx_t N = NN;
0131
0132     const T* a = A.fArray;
0133     ASSUME_ALIGNED(a, 64);
0134     const T* b = B.fArray;
0135     ASSUME_ALIGNED(b, 64);
0136     T* c = C.fArray;
0137     ASSUME_ALIGNED(c, 64);
0138
0139     c[0 * N + n] = a[0 * N + n] * b[0 * N + n] + a[1 * N + n] * b[6 * N + n] + a[2 * N + n] * b[12 * N + n] +
0140                    a[4 * N + n] * b[24 * N + n];
0141     c[1 * N + n] = a[0 * N + n] * b[1 * N + n] + a[1 * N + n] * b[7 * N + n] + a[2 * N + n] * b[13 * N + n] +
0142                    a[4 * N + n] * b[25 * N + n];
0143     c[2 * N + n] = a[2 * N + n];
0144     c[3 * N + n] = a[0 * N + n] * b[3 * N + n] + a[1 * N + n] * b[9 * N + n] + a[2 * N + n] * b[15 * N + n] +
0145                    a[3 * N + n] + a[4 * N + n] * b[27 * N + n];
0146     c[4 * N + n] = a[0 * N + n] * b[4 * N + n] + a[1 * N + n] * b[10 * N + n] + a[4 * N + n];
0147     c[5 * N + n] = a[2 * N + n] * b[17 * N + n] + a[5 * N + n];
0148     c[6 * N + n] = a[6 * N + n] * b[0 * N + n] + a[7 * N + n] * b[6 * N + n] + a[8 * N + n] * b[12 * N + n] +
0149                    a[10 * N + n] * b[24 * N + n];
0150     c[7 * N + n] = a[6 * N + n] * b[1 * N + n] + a[7 * N + n] * b[7 * N + n] + a[8 * N + n] * b[13 * N + n] +
0151                    a[10 * N + n] * b[25 * N + n];
0152     c[8 * N + n] = a[8 * N + n];
0153     c[9 * N + n] = a[6 * N + n] * b[3 * N + n] + a[7 * N + n] * b[9 * N + n] + a[8 * N + n] * b[15 * N + n] +
0154                    a[9 * N + n] + a[10 * N + n] * b[27 * N + n];
0155     c[10 * N + n] = a[6 * N + n] * b[4 * N + n] + a[7 * N + n] * b[10 * N + n] + a[10 * N + n];
0156     c[11 * N + n] = a[8 * N + n] * b[17 * N + n] + a[11 * N + n];
0157     c[12 * N + n] = a[12 * N + n] * b[0 * N + n] + a[13 * N + n] * b[6 * N + n] + a[14 * N + n] * b[12 * N + n] +
0158                     a[16 * N + n] * b[24 * N + n];
0159     c[13 * N + n] = a[12 * N + n] * b[1 * N + n] + a[13 * N + n] * b[7 * N + n] + a[14 * N + n] * b[13 * N + n] +
0160                     a[16 * N + n] * b[25 * N + n];
0161     c[14 * N + n] = a[14 * N + n];
0162     c[15 * N + n] = a[12 * N + n] * b[3 * N + n] + a[13 * N + n] * b[9 * N + n] + a[14 * N + n] * b[15 * N + n] +
0163                     a[15 * N + n] + a[16 * N + n] * b[27 * N + n];
0164     c[16 * N + n] = a[12 * N + n] * b[4 * N + n] + a[13 * N + n] * b[10 * N + n] + a[16 * N + n];
0165     c[17 * N + n] = a[14 * N + n] * b[17 * N + n] + a[17 * N + n];
0166     c[18 * N + n] = a[18 * N + n] * b[0 * N + n] + a[19 * N + n] * b[6 * N + n] + a[20 * N + n] * b[12 * N + n] +
0167                     a[22 * N + n] * b[24 * N + n];
0168     c[19 * N + n] = a[18 * N + n] * b[1 * N + n] + a[19 * N + n] * b[7 * N + n] + a[20 * N + n] * b[13 * N + n] +
0169                     a[22 * N + n] * b[25 * N + n];
0170     c[20 * N + n] = a[20 * N + n];
0171     c[21 * N + n] = a[18 * N + n] * b[3 * N + n] + a[19 * N + n] * b[9 * N + n] + a[20 * N + n] * b[15 * N + n] +
0172                     a[21 * N + n] + a[22 * N + n] * b[27 * N + n];
0173     c[22 * N + n] = a[18 * N + n] * b[4 * N + n] + a[19 * N + n] * b[10 * N + n] + a[22 * N + n];
0174     c[23 * N + n] = a[20 * N + n] * b[17 * N + n] + a[23 * N + n];
0175     c[24 * N + n] = a[24 * N + n] * b[0 * N + n] + a[25 * N + n] * b[6 * N + n] + a[26 * N + n] * b[12 * N + n] +
0176                     a[28 * N + n] * b[24 * N + n];
0177     c[25 * N + n] = a[24 * N + n] * b[1 * N + n] + a[25 * N + n] * b[7 * N + n] + a[26 * N + n] * b[13 * N + n] +
0178                     a[28 * N + n] * b[25 * N + n];
0179     c[26 * N + n] = a[26 * N + n];
0180     c[27 * N + n] = a[24 * N + n] * b[3 * N + n] + a[25 * N + n] * b[9 * N + n] + a[26 * N + n] * b[15 * N + n] +
0181                     a[27 * N + n] + a[28 * N + n] * b[27 * N + n];
0182     c[28 * N + n] = a[24 * N + n] * b[4 * N + n] + a[25 * N + n] * b[10 * N + n] + a[28 * N + n];
0183     c[29 * N + n] = a[26 * N + n] * b[17 * N + n] + a[29 * N + n];
0184     c[30 * N + n] = a[30 * N + n] * b[0 * N + n] + a[31 * N + n] * b[6 * N + n] + a[32 * N + n] * b[12 * N + n] +
0185                     a[34 * N + n] * b[24 * N + n];
0186     c[31 * N + n] = a[30 * N + n] * b[1 * N + n] + a[31 * N + n] * b[7 * N + n] + a[32 * N + n] * b[13 * N + n] +
0187                     a[34 * N + n] * b[25 * N + n];
0188     c[32 * N + n] = a[32 * N + n];
0189     c[33 * N + n] = a[30 * N + n] * b[3 * N + n] + a[31 * N + n] * b[9 * N + n] + a[32 * N + n] * b[15 * N + n] +
0190                     a[33 * N + n] + a[34 * N + n] * b[27 * N + n];
0191     c[34 * N + n] = a[30 * N + n] * b[4 * N + n] + a[31 * N + n] * b[10 * N + n] + a[34 * N + n];
0192     c[35 * N + n] = a[32 * N + n] * b[17 * N + n] + a[35 * N + n];
0193   }
0194
0195 }  // end unnamed namespace
0196
0197 //==============================================================================
0198
0199 namespace mkfit {
0200
0201   void helixAtRFromIterativeCCSFullJac(const MPlexLV& inPar,
0202                                        const MPlexQI& inChg,
0203                                        const MPlexQF& msRad,
0204                                        MPlexLV& outPar,
0205                                        MPlexLL& errorProp,
0206                                        const int N_proc) {
0207     errorProp.setVal(0.f);
0208     MPlexLL errorPropTmp(0.f);   //initialize to zero
0209     MPlexLL errorPropSwap(0.f);  //initialize to zero
0210
0211     // loop does not vectorize with llvm16, and it issues a warning
0212     // that apparently can't be suppressed with a pragma.  Needs to
0213     // be rechecked if future llvm versions improve vectorization.
0214 #if !defined(__clang__)
0215 #pragma omp simd
0216 #endif
0217     for (int n = 0; n < NN; ++n) {
0218       //initialize erroProp to identity matrix
0219       errorProp(n, 0, 0) = 1.f;
0220       errorProp(n, 1, 1) = 1.f;
0221       errorProp(n, 2, 2) = 1.f;
0222       errorProp(n, 3, 3) = 1.f;
0223       errorProp(n, 4, 4) = 1.f;
0224       errorProp(n, 5, 5) = 1.f;
0225
0226       const float k = inChg.constAt(n, 0, 0) * 100.f / (-Const::sol * Config::Bfield);
0227       const float r = msRad.constAt(n, 0, 0);
0228       float r0 = hipo(inPar.constAt(n, 0, 0), inPar.constAt(n, 1, 0));
0229
0230       if (std::abs(r - r0) < 0.0001f) {
0231         dprint_np(n, "distance less than 1mum, skip");
0232         continue;
0233       }
0234
0235       const float ipt = inPar.constAt(n, 3, 0);
0236       const float phiin = inPar.constAt(n, 4, 0);
0237       const float theta = inPar.constAt(n, 5, 0);
0238
0239       //set those that are 1. before iterations
0240       errorPropTmp(n, 2, 2) = 1.f;
0241       errorPropTmp(n, 3, 3) = 1.f;
0242       errorPropTmp(n, 4, 4) = 1.f;
0243       errorPropTmp(n, 5, 5) = 1.f;
0244
0245       float cosah = 0., sinah = 0.;
0246       //no trig approx here, phi and theta can be large
0247       float cosP = std::cos(phiin), sinP = std::sin(phiin);
0248       const float cosT = std::cos(theta), sinT = std::sin(theta);
0249       float pxin = cosP / ipt;
0250       float pyin = sinP / ipt;
0251
0252       CMS_UNROLL_LOOP_COUNT(Config::Niter)
0253       for (int i = 0; i < Config::Niter; ++i) {
0254         dprint_np(n,
0255                   std::endl
0256                       << "attempt propagation from r=" << r0 << " to r=" << r << std::endl
0257                       << "x=" << outPar.At(n, 0, 0) << " y=" << outPar.At(n, 1, 0) << " z=" << outPar.At(n, 2, 0)
0258                       << " px=" << std::cos(phiin) / ipt << " py=" << std::sin(phiin) / ipt
0259                       << " pz=" << 1.f / (ipt * tan(theta)) << " q=" << inChg.constAt(n, 0, 0) << std::endl);
0260
0261         r0 = hipo(outPar.constAt(n, 0, 0), outPar.constAt(n, 1, 0));
0262         const float ialpha = (r - r0) * ipt / k;
0263         //alpha+=ialpha;
0264
0265         if constexpr (Config::useTrigApprox) {
0266           sincos4(ialpha * 0.5f, sinah, cosah);
0267         } else {
0268           cosah = std::cos(ialpha * 0.5f);
0269           sinah = std::sin(ialpha * 0.5f);
0270         }
0271         const float cosa = 1.f - 2.f * sinah * sinah;
0272         const float sina = 2.f * sinah * cosah;
0273
0274         //derivatives of alpha
0275         const float dadx = -outPar.At(n, 0, 0) * ipt / (k * r0);
0276         const float dady = -outPar.At(n, 1, 0) * ipt / (k * r0);
0277         const float dadipt = (r - r0) / k;
0278
0279         outPar.At(n, 0, 0) = outPar.constAt(n, 0, 0) + 2.f * k * sinah * (pxin * cosah - pyin * sinah);
0280         outPar.At(n, 1, 0) = outPar.constAt(n, 1, 0) + 2.f * k * sinah * (pyin * cosah + pxin * sinah);
0281         const float pxinold = pxin;  //copy before overwriting
0282         pxin = pxin * cosa - pyin * sina;
0283         pyin = pyin * cosa + pxinold * sina;
0284
0285         //need phi at origin, so this goes before redefining phi
0286         //no trig approx here, phi can be large
0287         cosP = std::cos(outPar.At(n, 4, 0));
0288         sinP = std::sin(outPar.At(n, 4, 0));
0289
0290         outPar.At(n, 2, 0) = outPar.constAt(n, 2, 0) + k * ialpha * cosT / (ipt * sinT);
0291         outPar.At(n, 3, 0) = ipt;
0292         outPar.At(n, 4, 0) = outPar.constAt(n, 4, 0) + ialpha;
0293         outPar.At(n, 5, 0) = theta;
0294
0295         errorPropTmp(n, 0, 0) = 1.f + k * (cosP * dadx * cosa - sinP * dadx * sina) / ipt;
0296         errorPropTmp(n, 0, 1) = k * (cosP * dady * cosa - sinP * dady * sina) / ipt;
0297         errorPropTmp(n, 0, 3) =
0298             k * (cosP * (ipt * dadipt * cosa - sina) + sinP * ((1.f - cosa) - ipt * dadipt * sina)) / (ipt * ipt);
0299         errorPropTmp(n, 0, 4) = -k * (sinP * sina + cosP * (1.f - cosa)) / ipt;
0300
0301         errorPropTmp(n, 1, 0) = k * (sinP * dadx * cosa + cosP * dadx * sina) / ipt;
0302         errorPropTmp(n, 1, 1) = 1.f + k * (sinP * dady * cosa + cosP * dady * sina) / ipt;
0303         errorPropTmp(n, 1, 3) =
0304             k * (sinP * (ipt * dadipt * cosa - sina) + cosP * (ipt * dadipt * sina - (1.f - cosa))) / (ipt * ipt);
0305         errorPropTmp(n, 1, 4) = k * (cosP * sina - sinP * (1.f - cosa)) / ipt;
0306
0307         errorPropTmp(n, 2, 0) = k * cosT * dadx / (ipt * sinT);
0308         errorPropTmp(n, 2, 1) = k * cosT * dady / (ipt * sinT);
0309         errorPropTmp(n, 2, 3) = k * cosT * (ipt * dadipt - ialpha) / (ipt * ipt * sinT);
0310         errorPropTmp(n, 2, 5) = -k * ialpha / (ipt * sinT * sinT);
0311
0312         errorPropTmp(n, 4, 0) = dadx;
0313         errorPropTmp(n, 4, 1) = dady;
0314         errorPropTmp(n, 4, 3) = dadipt;
0315
0316         MultHelixPropTemp(errorProp, errorPropTmp, errorPropSwap, n);
0317         errorProp = errorPropSwap;
0318       }
0319
0320       dprint_np(
0321           n,
0322           "propagation end, dump parameters"
0323               << std::endl
0324               << "pos = " << outPar.At(n, 0, 0) << " " << outPar.At(n, 1, 0) << " " << outPar.At(n, 2, 0) << std::endl
0325               << "mom = " << std::cos(outPar.At(n, 4, 0)) / outPar.At(n, 3, 0) << " "
0326               << std::sin(outPar.At(n, 4, 0)) / outPar.At(n, 3, 0) << " "
0327               << 1. / (outPar.At(n, 3, 0) * tan(outPar.At(n, 5, 0)))
0328               << " r=" << std::sqrt(outPar.At(n, 0, 0) * outPar.At(n, 0, 0) + outPar.At(n, 1, 0) * outPar.At(n, 1, 0))
0329               << " pT=" << 1. / std::abs(outPar.At(n, 3, 0)) << std::endl);
0330
0331 #ifdef DEBUG
0332       if (debug && g_debug && n < N_proc) {
0333         dmutex_guard;
0334         std::cout << n << " jacobian" << std::endl;
0335         printf("%5f %5f %5f %5f %5f %5f\n",
0336                errorProp(n, 0, 0),
0337                errorProp(n, 0, 1),
0338                errorProp(n, 0, 2),
0339                errorProp(n, 0, 3),
0340                errorProp(n, 0, 4),
0341                errorProp(n, 0, 5));
0342         printf("%5f %5f %5f %5f %5f %5f\n",
0343                errorProp(n, 1, 0),
0344                errorProp(n, 1, 1),
0345                errorProp(n, 1, 2),
0346                errorProp(n, 1, 3),
0347                errorProp(n, 1, 4),
0348                errorProp(n, 1, 5));
0349         printf("%5f %5f %5f %5f %5f %5f\n",
0350                errorProp(n, 2, 0),
0351                errorProp(n, 2, 1),
0352                errorProp(n, 2, 2),
0353                errorProp(n, 2, 3),
0354                errorProp(n, 2, 4),
0355                errorProp(n, 2, 5));
0356         printf("%5f %5f %5f %5f %5f %5f\n",
0357                errorProp(n, 3, 0),
0358                errorProp(n, 3, 1),
0359                errorProp(n, 3, 2),
0360                errorProp(n, 3, 3),
0361                errorProp(n, 3, 4),
0362                errorProp(n, 3, 5));
0363         printf("%5f %5f %5f %5f %5f %5f\n",
0364                errorProp(n, 4, 0),
0365                errorProp(n, 4, 1),
0366                errorProp(n, 4, 2),
0367                errorProp(n, 4, 3),
0368                errorProp(n, 4, 4),
0369                errorProp(n, 4, 5));
0370         printf("%5f %5f %5f %5f %5f %5f\n",
0371                errorProp(n, 5, 0),
0372                errorProp(n, 5, 1),
0373                errorProp(n, 5, 2),
0374                errorProp(n, 5, 3),
0375                errorProp(n, 5, 4),
0376                errorProp(n, 5, 5));
0377       }
0378 #endif
0379     }
0380   }
0381
0382 }  // end namespace mkfit
0383
0384 // ============================================================================
0385 // BEGIN STUFF FROM PropagationMPlex.icc
0386
0387 namespace {
0388
0389   //========================================================================================
0390   // helixAtR
0391   //========================================================================================
0392
0393   void helixAtRFromIterativeCCS_impl(const MPlexLV& __restrict__ inPar,
0394                                      const MPlexQI& __restrict__ inChg,
0395                                      const MPlexQF& __restrict__ msRad,
0396                                      MPlexLV& __restrict__ outPar,
0397                                      MPlexLL& __restrict__ errorProp,
0398                                      MPlexQI& __restrict__ outFailFlag,  // expected to be initialized to 0
0399                                      const int nmin,
0400                                      const int nmax,
0401                                      const int N_proc,
0402                                      const PropagationFlags& pf) {
0403     // bool debug = true;
0404
0405 #pragma omp simd
0406     for (int n = nmin; n < nmax; ++n) {
0407       //initialize erroProp to identity matrix
0408       errorProp(n, 0, 0) = 1.f;
0409       errorProp(n, 1, 1) = 1.f;
0410       errorProp(n, 2, 2) = 1.f;
0411       errorProp(n, 3, 3) = 1.f;
0412       errorProp(n, 4, 4) = 1.f;
0413       errorProp(n, 5, 5) = 1.f;
0414     }
0415     float r0[nmax - nmin];
0416 #pragma omp simd
0417     for (int n = nmin; n < nmax; ++n) {
0418       //initialize erroProp to identity matrix
0419       r0[n - nmin] = hipo(inPar(n, 0, 0), inPar(n, 1, 0));
0420     }
0421     float k[nmax - nmin];
0422     if (pf.use_param_b_field) {
0423 #pragma omp simd
0424       for (int n = nmin; n < nmax; ++n) {
0425         k[n - nmin] = inChg(n, 0, 0) * 100.f / (-Const::sol * Config::bFieldFromZR(inPar(n, 2, 0), r0[n - nmin]));
0426       }
0427     } else {
0428 #pragma omp simd
0429       for (int n = nmin; n < nmax; ++n) {
0430         k[n - nmin] = inChg(n, 0, 0) * 100.f / (-Const::sol * Config::Bfield);
0431       }
0432     }
0433     float r[nmax - nmin];
0434 #pragma omp simd
0435     for (int n = nmin; n < nmax; ++n) {
0436       r[n - nmin] = msRad(n, 0, 0);
0437     }
0438     float xin[nmax - nmin];
0439     float yin[nmax - nmin];
0440     float ipt[nmax - nmin];
0441     float phiin[nmax - nmin];
0442     float theta[nmax - nmin];
0443 #pragma omp simd
0444     for (int n = nmin; n < nmax; ++n) {
0445       // if (std::abs(r-r0)<0.0001f) {
0446       //    dprint("distance less than 1mum, skip");
0447       //    continue;
0448       // }
0449
0450       xin[n - nmin] = inPar(n, 0, 0);
0451       yin[n - nmin] = inPar(n, 1, 0);
0452       ipt[n - nmin] = inPar(n, 3, 0);
0453       phiin[n - nmin] = inPar(n, 4, 0);
0454       theta[n - nmin] = inPar(n, 5, 0);
0455
0456       //dprint(std::endl);
0457     }
0458
0459     //debug = true;
0460     for (int n = nmin; n < nmax; ++n) {
0461       dprint_np(n,
0462                 "input parameters"
0463                     << " inPar(n, 0, 0)=" << std::setprecision(9) << inPar(n, 0, 0) << " inPar(n, 1, 0)="
0464                     << std::setprecision(9) << inPar(n, 1, 0) << " inPar(n, 2, 0)=" << std::setprecision(9)
0465                     << inPar(n, 2, 0) << " inPar(n, 3, 0)=" << std::setprecision(9) << inPar(n, 3, 0)
0466                     << " inPar(n, 4, 0)=" << std::setprecision(9) << inPar(n, 4, 0)
0467                     << " inPar(n, 5, 0)=" << std::setprecision(9) << inPar(n, 5, 0));
0468     }
0469
0470     float kinv[nmax - nmin];
0471     float pt[nmax - nmin];
0472 #pragma omp simd
0473     for (int n = nmin; n < nmax; ++n) {
0474       kinv[n - nmin] = 1.f / k[n - nmin];
0475       pt[n - nmin] = 1.f / ipt[n - nmin];
0476     }
0477     float D[nmax - nmin];
0478     float cosa[nmax - nmin];
0479     float sina[nmax - nmin];
0480     float cosah[nmax - nmin];
0481     float sinah[nmax - nmin];
0482     float id[nmax - nmin];
0483
0484 #pragma omp simd
0485     for (int n = nmin; n < nmax; ++n) {
0486       D[n - nmin] = 0.;
0487     }
0488
0489     //no trig approx here, phi can be large
0490     float cosPorT[nmax - nmin];
0491     float sinPorT[nmax - nmin];
0492 #pragma omp simd
0493     for (int n = nmin; n < nmax; ++n) {
0494       cosPorT[n - nmin] = std::cos(phiin[n - nmin]);
0495     }
0496 #pragma omp simd
0497     for (int n = nmin; n < nmax; ++n) {
0498       sinPorT[n - nmin] = std::sin(phiin[n - nmin]);
0499     }
0500
0501     float pxin[nmax - nmin];
0502     float pyin[nmax - nmin];
0503 #pragma omp simd
0504     for (int n = nmin; n < nmax; ++n) {
0505       pxin[n - nmin] = cosPorT[n - nmin] * pt[n - nmin];
0506       pyin[n - nmin] = sinPorT[n - nmin] * pt[n - nmin];
0507     }
0508
0509     for (int n = nmin; n < nmax; ++n) {
0510       dprint_np(n,
0511                 "k=" << std::setprecision(9) << k[n - nmin] << " pxin=" << std::setprecision(9) << pxin[n - nmin]
0512                      << " pyin=" << std::setprecision(9) << pyin[n - nmin] << " cosPorT=" << std::setprecision(9)
0513                      << cosPorT[n - nmin] << " sinPorT=" << std::setprecision(9) << sinPorT[n - nmin]
0514                      << " pt=" << std::setprecision(9) << pt[n - nmin]);
0515     }
0516
0517     float dDdx[nmax - nmin];
0518     float dDdy[nmax - nmin];
0519     float dDdipt[nmax - nmin];
0520     float dDdphi[nmax - nmin];
0521
0522 #pragma omp simd
0523     for (int n = nmin; n < nmax; ++n) {
0524       dDdipt[n - nmin] = 0.;
0525       dDdphi[n - nmin] = 0.;
0526     }
0527 #pragma omp simd
0528     for (int n = nmin; n < nmax; ++n) {
0529       //derivatives initialized to value for first iteration, i.e. distance = r-r0in
0530       dDdx[n - nmin] = r0[n - nmin] > 0.f ? -xin[n - nmin] / r0[n - nmin] : 0.f;
0531     }
0532
0533 #pragma omp simd
0534     for (int n = nmin; n < nmax; ++n) {
0535       dDdy[n - nmin] = r0[n - nmin] > 0.f ? -yin[n - nmin] / r0[n - nmin] : 0.f;
0536     }
0537
0538     float oodotp[nmax - nmin];
0539     float x[nmax - nmin];
0540     float y[nmax - nmin];
0541     float oor0[nmax - nmin];
0542     float dadipt[nmax - nmin];
0543     float dadx[nmax - nmin];
0544     float dady[nmax - nmin];
0545     float pxca[nmax - nmin];
0546     float pxsa[nmax - nmin];
0547     float pyca[nmax - nmin];
0548     float pysa[nmax - nmin];
0549     float tmp[nmax - nmin];
0550     float tmpx[nmax - nmin];
0551     float tmpy[nmax - nmin];
0552     float pxinold[nmax - nmin];
0553
0554     CMS_UNROLL_LOOP_COUNT(Config::Niter)
0555     for (int i = 0; i < Config::Niter; ++i) {
0556 #pragma omp simd
0557       for (int n = nmin; n < nmax; ++n) {
0558         //compute distance and path for the current iteration
0559         r0[n - nmin] = hipo(outPar(n, 0, 0), outPar(n, 1, 0));
0560       }
0561
0562       // Use one over dot product of transverse momentum and radial
0563       // direction to scale the step. Propagation is prevented from reaching
0564       // too close to the apex (dotp > 0.2).
0565       // - Can / should we come up with a better approximation?
0566       // - Can / should take +/- curvature into account?
0567
0568 #pragma omp simd
0569       for (int n = nmin; n < nmax; ++n) {
0570         oodotp[n - nmin] =
0571             r0[n - nmin] * pt[n - nmin] / (pxin[n - nmin] * outPar(n, 0, 0) + pyin[n - nmin] * outPar(n, 1, 0));
0572       }
0573
0574 #pragma omp simd
0575       for (int n = nmin; n < nmax; ++n) {
0576         if (oodotp[n - nmin] > 5.0f || oodotp[n - nmin] < 0)  // 0.2 is 78.5 deg
0577         {
0578           outFailFlag(n, 0, 0) = 1;
0579           oodotp[n - nmin] = 0.0f;
0580         } else if (r[n - nmin] - r0[n - nmin] < 0.0f && pt[n - nmin] < 1.0f) {
0581           // Scale down the correction for low-pT ingoing tracks.
0582           oodotp[n - nmin] = 1.0f + (oodotp[n - nmin] - 1.0f) * pt[n - nmin];
0583         }
0584       }
0585
0586 #pragma omp simd
0587       for (int n = nmin; n < nmax; ++n) {
0588         // Can we come up with a better approximation?
0589         // Should take +/- curvature into account.
0590         id[n - nmin] = (r[n - nmin] - r0[n - nmin]) * oodotp[n - nmin];
0591       }
0592
0593 #pragma omp simd
0594       for (int n = nmin; n < nmax; ++n) {
0595         D[n - nmin] += id[n - nmin];
0596       }
0597
0598       if constexpr (Config::useTrigApprox) {
0599 #if !defined(__INTEL_COMPILER)
0600 #pragma omp simd
0601 #endif
0602         for (int n = nmin; n < nmax; ++n) {
0603           sincos4(id[n - nmin] * ipt[n - nmin] * kinv[n - nmin] * 0.5f, sinah[n - nmin], cosah[n - nmin]);
0604         }
0605       } else {
0606 #if !defined(__INTEL_COMPILER)
0607 #pragma omp simd
0608 #endif
0609         for (int n = nmin; n < nmax; ++n) {
0610           cosah[n - nmin] = std::cos(id[n - nmin] * ipt[n - nmin] * kinv[n - nmin] * 0.5f);
0611           sinah[n - nmin] = std::sin(id[n - nmin] * ipt[n - nmin] * kinv[n - nmin] * 0.5f);
0612         }
0613       }
0614
0615 #pragma omp simd
0616       for (int n = nmin; n < nmax; ++n) {
0617         cosa[n - nmin] = 1.f - 2.f * sinah[n - nmin] * sinah[n - nmin];
0618         sina[n - nmin] = 2.f * sinah[n - nmin] * cosah[n - nmin];
0619       }
0620
0621       for (int n = nmin; n < nmax; ++n) {
0622         dprint_np(n,
0623                   "Attempt propagation from r="
0624                       << r0[n - nmin] << " to r=" << r[n - nmin] << std::endl
0625                       << "   x=" << xin[n - nmin] << " y=" << yin[n - nmin] << " z=" << inPar(n, 2, 0)
0626                       << " px=" << pxin[n - nmin] << " py=" << pyin[n - nmin]
0627                       << " pz=" << pt[n - nmin] * std::tan(theta[n - nmin]) << " q=" << inChg(n, 0, 0) << std::endl
0628                       << "   r=" << std::setprecision(9) << r[n - nmin] << " r0=" << std::setprecision(9)
0629                       << r0[n - nmin] << " id=" << std::setprecision(9) << id[n - nmin]
0630                       << " dr=" << std::setprecision(9) << r[n - nmin] - r0[n - nmin] << " cosa=" << cosa[n - nmin]
0631                       << " sina=" << sina[n - nmin] << " dir_cos(rad,pT)=" << 1.0f / oodotp[n - nmin]);
0632       }
0633
0634       //update derivatives on total distance
0635       if (i + 1 != Config::Niter) {
0636 #pragma omp simd
0637         for (int n = nmin; n < nmax; ++n) {
0638           x[n - nmin] = outPar(n, 0, 0);
0639           y[n - nmin] = outPar(n, 1, 0);
0640         }
0641 #pragma omp simd
0642         for (int n = nmin; n < nmax; ++n) {
0643           oor0[n - nmin] =
0644               (r0[n - nmin] > 0.f && std::abs(r[n - nmin] - r0[n - nmin]) > 0.0001f) ? 1.f / r0[n - nmin] : 0.f;
0645         }
0646 #pragma omp simd
0647         for (int n = nmin; n < nmax; ++n) {
0648           dadipt[n - nmin] = id[n - nmin] * kinv[n - nmin];
0649           dadx[n - nmin] = -x[n - nmin] * ipt[n - nmin] * kinv[n - nmin] * oor0[n - nmin];
0650           dady[n - nmin] = -y[n - nmin] * ipt[n - nmin] * kinv[n - nmin] * oor0[n - nmin];
0651           pxca[n - nmin] = pxin[n - nmin] * cosa[n - nmin];
0652           pxsa[n - nmin] = pxin[n - nmin] * sina[n - nmin];
0653           pyca[n - nmin] = pyin[n - nmin] * cosa[n - nmin];
0654           pysa[n - nmin] = pyin[n - nmin] * sina[n - nmin];
0655           tmpx[n - nmin] = k[n - nmin] * dadx[n - nmin];
0656         }
0657
0658 #pragma omp simd
0659         for (int n = nmin; n < nmax; ++n) {
0660           dDdx[n - nmin] -= (x[n - nmin] * (1.f + tmpx[n - nmin] * (pxca[n - nmin] - pysa[n - nmin])) +
0661                              y[n - nmin] * tmpx[n - nmin] * (pyca[n - nmin] + pxsa[n - nmin])) *
0662                             oor0[n - nmin];
0663         }
0664
0665 #pragma omp simd
0666         for (int n = nmin; n < nmax; ++n) {
0667           tmpy[n - nmin] = k[n - nmin] * dady[n - nmin];
0668         }
0669 #pragma omp simd
0670         for (int n = nmin; n < nmax; ++n) {
0671           dDdy[n - nmin] -= (x[n - nmin] * tmpy[n - nmin] * (pxca[n - nmin] - pysa[n - nmin]) +
0672                              y[n - nmin] * (1.f + tmpy[n - nmin] * (pyca[n - nmin] + pxsa[n - nmin]))) *
0673                             oor0[n - nmin];
0674         }
0675 #pragma omp simd
0676         for (int n = nmin; n < nmax; ++n) {
0677           //now r0 depends on ipt and phi as well
0678           tmp[n - nmin] = dadipt[n - nmin] * ipt[n - nmin];
0679         }
0680 #pragma omp simd
0681         for (int n = nmin; n < nmax; ++n) {
0682           dDdipt[n - nmin] -= k[n - nmin] *
0683                               (x[n - nmin] * (pxca[n - nmin] * tmp[n - nmin] - pysa[n - nmin] * tmp[n - nmin] -
0684                                               pyca[n - nmin] - pxsa[n - nmin] + pyin[n - nmin]) +
0685                                y[n - nmin] * (pyca[n - nmin] * tmp[n - nmin] + pxsa[n - nmin] * tmp[n - nmin] -
0686                                               pysa[n - nmin] + pxca[n - nmin] - pxin[n - nmin])) *
0687                               pt[n - nmin] * oor0[n - nmin];
0688         }
0689 #pragma omp simd
0690         for (int n = nmin; n < nmax; ++n) {
0691           dDdphi[n - nmin] += k[n - nmin] *
0692                               (x[n - nmin] * (pysa[n - nmin] - pxin[n - nmin] + pxca[n - nmin]) -
0693                                y[n - nmin] * (pxsa[n - nmin] - pyin[n - nmin] + pyca[n - nmin])) *
0694                               oor0[n - nmin];
0695         }
0696       }
0697
0698 #pragma omp simd
0699       for (int n = nmin; n < nmax; ++n) {
0700         //update parameters
0701         outPar(n, 0, 0) = outPar(n, 0, 0) + 2.f * k[n - nmin] * sinah[n - nmin] *
0702                                                 (pxin[n - nmin] * cosah[n - nmin] - pyin[n - nmin] * sinah[n - nmin]);
0703         outPar(n, 1, 0) = outPar(n, 1, 0) + 2.f * k[n - nmin] * sinah[n - nmin] *
0704                                                 (pyin[n - nmin] * cosah[n - nmin] + pxin[n - nmin] * sinah[n - nmin]);
0705         pxinold[n - nmin] = pxin[n - nmin];  //copy before overwriting
0706         pxin[n - nmin] = pxin[n - nmin] * cosa[n - nmin] - pyin[n - nmin] * sina[n - nmin];
0707         pyin[n - nmin] = pyin[n - nmin] * cosa[n - nmin] + pxinold[n - nmin] * sina[n - nmin];
0708       }
0709       for (int n = nmin; n < nmax; ++n) {
0710         dprint_np(n,
0711                   "outPar(n, 0, 0)=" << outPar(n, 0, 0) << " outPar(n, 1, 0)=" << outPar(n, 1, 0)
0712                                      << " pxin=" << pxin[n - nmin] << " pyin=" << pyin[n - nmin]);
0713       }
0714     }  // iteration loop
0715
0716     float alpha[nmax - nmin];
0717     float dadphi[nmax - nmin];
0718
0719 #pragma omp simd
0720     for (int n = nmin; n < nmax; ++n) {
0721       alpha[n - nmin] = D[n - nmin] * ipt[n - nmin] * kinv[n - nmin];
0722       dadx[n - nmin] = dDdx[n - nmin] * ipt[n - nmin] * kinv[n - nmin];
0723       dady[n - nmin] = dDdy[n - nmin] * ipt[n - nmin] * kinv[n - nmin];
0724       dadipt[n - nmin] = (ipt[n - nmin] * dDdipt[n - nmin] + D[n - nmin]) * kinv[n - nmin];
0725       dadphi[n - nmin] = dDdphi[n - nmin] * ipt[n - nmin] * kinv[n - nmin];
0726     }
0727
0728     if constexpr (Config::useTrigApprox) {
0729 #pragma omp simd
0730       for (int n = nmin; n < nmax; ++n) {
0731         sincos4(alpha[n - nmin], sina[n - nmin], cosa[n - nmin]);
0732       }
0733     } else {
0734 #pragma omp simd
0735       for (int n = nmin; n < nmax; ++n) {
0736         cosa[n - nmin] = std::cos(alpha[n - nmin]);
0737         sina[n - nmin] = std::sin(alpha[n - nmin]);
0738       }
0739     }
0740 #pragma omp simd
0741     for (int n = nmin; n < nmax; ++n) {
0742       errorProp(n, 0, 0) = 1.f + k[n - nmin] * dadx[n - nmin] *
0743                                      (cosPorT[n - nmin] * cosa[n - nmin] - sinPorT[n - nmin] * sina[n - nmin]) *
0744                                      pt[n - nmin];
0745       errorProp(n, 0, 1) = k[n - nmin] * dady[n - nmin] *
0746                            (cosPorT[n - nmin] * cosa[n - nmin] - sinPorT[n - nmin] * sina[n - nmin]) * pt[n - nmin];
0747       errorProp(n, 0, 2) = 0.f;
0748       errorProp(n, 0, 3) =
0749           k[n - nmin] *
0750           (cosPorT[n - nmin] * (ipt[n - nmin] * dadipt[n - nmin] * cosa[n - nmin] - sina[n - nmin]) +
0751            sinPorT[n - nmin] * ((1.f - cosa[n - nmin]) - ipt[n - nmin] * dadipt[n - nmin] * sina[n - nmin])) *
0752           pt[n - nmin] * pt[n - nmin];
0753       errorProp(n, 0, 4) = k[n - nmin] *
0754                            (cosPorT[n - nmin] * dadphi[n - nmin] * cosa[n - nmin] -
0755                             sinPorT[n - nmin] * dadphi[n - nmin] * sina[n - nmin] - sinPorT[n - nmin] * sina[n - nmin] +
0756                             cosPorT[n - nmin] * cosa[n - nmin] - cosPorT[n - nmin]) *
0757                            pt[n - nmin];
0758       errorProp(n, 0, 5) = 0.f;
0759     }
0760
0761 #pragma omp simd
0762     for (int n = nmin; n < nmax; ++n) {
0763       errorProp(n, 1, 0) = k[n - nmin] * dadx[n - nmin] *
0764                            (sinPorT[n - nmin] * cosa[n - nmin] + cosPorT[n - nmin] * sina[n - nmin]) * pt[n - nmin];
0765       errorProp(n, 1, 1) = 1.f + k[n - nmin] * dady[n - nmin] *
0766                                      (sinPorT[n - nmin] * cosa[n - nmin] + cosPorT[n - nmin] * sina[n - nmin]) *
0767                                      pt[n - nmin];
0768       errorProp(n, 1, 2) = 0.f;
0769       errorProp(n, 1, 3) =
0770           k[n - nmin] *
0771           (sinPorT[n - nmin] * (ipt[n - nmin] * dadipt[n - nmin] * cosa[n - nmin] - sina[n - nmin]) +
0772            cosPorT[n - nmin] * (ipt[n - nmin] * dadipt[n - nmin] * sina[n - nmin] - (1.f - cosa[n - nmin]))) *
0773           pt[n - nmin] * pt[n - nmin];
0774       errorProp(n, 1, 4) = k[n - nmin] *
0775                            (sinPorT[n - nmin] * dadphi[n - nmin] * cosa[n - nmin] +
0776                             cosPorT[n - nmin] * dadphi[n - nmin] * sina[n - nmin] + sinPorT[n - nmin] * cosa[n - nmin] +
0777                             cosPorT[n - nmin] * sina[n - nmin] - sinPorT[n - nmin]) *
0778                            pt[n - nmin];
0779       errorProp(n, 1, 5) = 0.f;
0780     }
0781
0782 #pragma omp simd
0783     for (int n = nmin; n < nmax; ++n) {
0784       //no trig approx here, theta can be large
0785       cosPorT[n - nmin] = std::cos(theta[n - nmin]);
0786     }
0787
0788 #pragma omp simd
0789     for (int n = nmin; n < nmax; ++n) {
0790       sinPorT[n - nmin] = std::sin(theta[n - nmin]);
0791     }
0792
0793 #pragma omp simd
0794     for (int n = nmin; n < nmax; ++n) {
0795       //redefine sinPorT as 1./sinPorT to reduce the number of temporaries
0796       sinPorT[n - nmin] = 1.f / sinPorT[n - nmin];
0797     }
0798
0799 #pragma omp simd
0800     for (int n = nmin; n < nmax; ++n) {
0801       outPar(n, 2, 0) =
0802           inPar(n, 2, 0) + k[n - nmin] * alpha[n - nmin] * cosPorT[n - nmin] * pt[n - nmin] * sinPorT[n - nmin];
0803       errorProp(n, 2, 0) = k[n - nmin] * cosPorT[n - nmin] * dadx[n - nmin] * pt[n - nmin] * sinPorT[n - nmin];
0804       errorProp(n, 2, 1) = k[n - nmin] * cosPorT[n - nmin] * dady[n - nmin] * pt[n - nmin] * sinPorT[n - nmin];
0805       errorProp(n, 2, 2) = 1.f;
0806       errorProp(n, 2, 3) = k[n - nmin] * cosPorT[n - nmin] * (ipt[n - nmin] * dadipt[n - nmin] - alpha[n - nmin]) *
0807                            pt[n - nmin] * pt[n - nmin] * sinPorT[n - nmin];
0808       errorProp(n, 2, 4) = k[n - nmin] * dadphi[n - nmin] * cosPorT[n - nmin] * pt[n - nmin] * sinPorT[n - nmin];
0809       errorProp(n, 2, 5) = -k[n - nmin] * alpha[n - nmin] * pt[n - nmin] * sinPorT[n - nmin] * sinPorT[n - nmin];
0810     }
0811
0812 #pragma omp simd
0813     for (int n = nmin; n < nmax; ++n) {
0814       outPar(n, 3, 0) = ipt[n - nmin];
0815       errorProp(n, 3, 0) = 0.f;
0816       errorProp(n, 3, 1) = 0.f;
0817       errorProp(n, 3, 2) = 0.f;
0818       errorProp(n, 3, 3) = 1.f;
0819       errorProp(n, 3, 4) = 0.f;
0820       errorProp(n, 3, 5) = 0.f;
0821     }
0822
0823 #pragma omp simd
0824     for (int n = nmin; n < nmax; ++n) {
0825       outPar(n, 4, 0) = inPar(n, 4, 0) + alpha[n - nmin];
0826       errorProp(n, 4, 0) = dadx[n - nmin];
0827       errorProp(n, 4, 1) = dady[n - nmin];
0828       errorProp(n, 4, 2) = 0.f;
0829       errorProp(n, 4, 3) = dadipt[n - nmin];
0830       errorProp(n, 4, 4) = 1.f + dadphi[n - nmin];
0831       errorProp(n, 4, 5) = 0.f;
0832     }
0833
0834 #pragma omp simd
0835     for (int n = nmin; n < nmax; ++n) {
0836       outPar(n, 5, 0) = theta[n - nmin];
0837       errorProp(n, 5, 0) = 0.f;
0838       errorProp(n, 5, 1) = 0.f;
0839       errorProp(n, 5, 2) = 0.f;
0840       errorProp(n, 5, 3) = 0.f;
0841       errorProp(n, 5, 4) = 0.f;
0842       errorProp(n, 5, 5) = 1.f;
0843     }
0844
0845     for (int n = nmin; n < nmax; ++n) {
0846       dprint_np(
0847           n,
0848           "propagation end, dump parameters\n"
0849               << "   D = " << D[n - nmin] << " alpha = " << alpha[n - nmin] << " kinv = " << kinv[n - nmin] << std::endl
0850               << "   pos = " << outPar(n, 0, 0) << " " << outPar(n, 1, 0) << " " << outPar(n, 2, 0) << "\t\t r="
0851               << std::sqrt(outPar(n, 0, 0) * outPar(n, 0, 0) + outPar(n, 1, 0) * outPar(n, 1, 0)) << std::endl
0852               << "   mom = " << outPar(n, 3, 0) << " " << outPar(n, 4, 0) << " " << outPar(n, 5, 0) << std::endl
0853               << "   cart= " << std::cos(outPar(n, 4, 0)) / outPar(n, 3, 0) << " "
0854               << std::sin(outPar(n, 4, 0)) / outPar(n, 3, 0) << " " << 1. / (outPar(n, 3, 0) * tan(outPar(n, 5, 0)))
0855               << "\t\tpT=" << 1. / std::abs(outPar(n, 3, 0)) << std::endl);
0856     }
0857
0858 #ifdef DEBUG
0859     for (int n = nmin; n < nmax; ++n) {
0860       if (debug && g_debug && n < N_proc) {
0861         dmutex_guard;
0862         std::cout << n << ": jacobian" << std::endl;
0863         printf("%5f %5f %5f %5f %5f %5f\n",
0864                errorProp(n, 0, 0),
0865                errorProp(n, 0, 1),
0866                errorProp(n, 0, 2),
0867                errorProp(n, 0, 3),
0868                errorProp(n, 0, 4),
0869                errorProp(n, 0, 5));
0870         printf("%5f %5f %5f %5f %5f %5f\n",
0871                errorProp(n, 1, 0),
0872                errorProp(n, 1, 1),
0873                errorProp(n, 1, 2),
0874                errorProp(n, 1, 3),
0875                errorProp(n, 1, 4),
0876                errorProp(n, 1, 5));
0877         printf("%5f %5f %5f %5f %5f %5f\n",
0878                errorProp(n, 2, 0),
0879                errorProp(n, 2, 1),
0880                errorProp(n, 2, 2),
0881                errorProp(n, 2, 3),
0882                errorProp(n, 2, 4),
0883                errorProp(n, 2, 5));
0884         printf("%5f %5f %5f %5f %5f %5f\n",
0885                errorProp(n, 3, 0),
0886                errorProp(n, 3, 1),
0887                errorProp(n, 3, 2),
0888                errorProp(n, 3, 3),
0889                errorProp(n, 3, 4),
0890                errorProp(n, 3, 5));
0891         printf("%5f %5f %5f %5f %5f %5f\n",
0892                errorProp(n, 4, 0),
0893                errorProp(n, 4, 1),
0894                errorProp(n, 4, 2),
0895                errorProp(n, 4, 3),
0896                errorProp(n, 4, 4),
0897                errorProp(n, 4, 5));
0898         printf("%5f %5f %5f %5f %5f %5f\n",
0899                errorProp(n, 5, 0),
0900                errorProp(n, 5, 1),
0901                errorProp(n, 5, 2),
0902                errorProp(n, 5, 3),
0903                errorProp(n, 5, 4),
0904                errorProp(n, 5, 5));
0905         printf("\n");
0906       }
0907     }
0908 #endif
0909   }
0910
0911 }  // namespace
0912
0913 // END STUFF FROM PropagationMPlex.icc
0914 // ============================================================================
0915
0916 namespace mkfit {
0917
0918   void helixAtRFromIterativeCCS(const MPlexLV& inPar,
0919                                 const MPlexQI& inChg,
0920                                 const MPlexQF& msRad,
0921                                 MPlexLV& outPar,
0922                                 MPlexLL& errorProp,
0923                                 MPlexQI& outFailFlag,
0924                                 const int N_proc,
0925                                 const PropagationFlags& pflags) {
0926     errorProp.setVal(0.f);
0927     outFailFlag.setVal(0.f);
0928
0929     helixAtRFromIterativeCCS_impl(inPar, inChg, msRad, outPar, errorProp, outFailFlag, 0, NN, N_proc, pflags);
0930   }
0931
0932   void propagateHelixToRMPlex(const MPlexLS& inErr,
0933                               const MPlexLV& inPar,
0934                               const MPlexQI& inChg,
0935                               const MPlexQF& msRad,
0936                               MPlexLS& outErr,
0937                               MPlexLV& outPar,
0938                               MPlexQI& outFailFlag,
0939                               const int N_proc,
0940                               const PropagationFlags& pflags,
0941                               const MPlexQI* noMatEffPtr) {
0942     // bool debug = true;
0943
0944     // This is used further down when calculating similarity with errorProp (and before in DEBUG).
0945     // MT: I don't think this really needed if we use inErr where required.
0946     outErr = inErr;
0947     // This requirement for helixAtRFromIterativeCCS_impl() and for helixAtRFromIterativeCCSFullJac().
0948     // MT: This should be properly handled in both functions (expecting input in output parameters sucks).
0949     outPar = inPar;
0950
0951     MPlexLL errorProp;
0952
0953     helixAtRFromIterativeCCS(inPar, inChg, msRad, outPar, errorProp, outFailFlag, N_proc, pflags);
0954
0955 #ifdef DEBUG
0956     if (debug && g_debug) {
0957       for (int kk = 0; kk < N_proc; ++kk) {
0958         dprintf("outErr before prop %d\n", kk);
0959         for (int i = 0; i < 6; ++i) {
0960           for (int j = 0; j < 6; ++j)
0961             dprintf("%8f ", outErr.At(kk, i, j));
0962           dprintf("\n");
0963         }
0964         dprintf("\n");
0965
0966         dprintf("errorProp %d\n", kk);
0967         for (int i = 0; i < 6; ++i) {
0968           for (int j = 0; j < 6; ++j)
0969             dprintf("%8f ", errorProp.At(kk, i, j));
0970           dprintf("\n");
0971         }
0972         dprintf("\n");
0973       }
0974     }
0975 #endif
0976
0977     // MultHelixProp can be optimized for CCS coordinates, see GenMPlexOps.pl
0978     MPlexLL temp;
0979     MultHelixProp(errorProp, outErr, temp);
0980     MultHelixPropTransp(errorProp, temp, outErr);
0981     // can replace with: MultHelixPropFull(errorProp, outErr, temp); MultHelixPropTranspFull(errorProp, temp, outErr);
0982
0983 #ifdef DEBUG
0984     if (debug && g_debug) {
0985       for (int kk = 0; kk < N_proc; ++kk) {
0986         dprintf("outErr %d\n", kk);
0987         for (int i = 0; i < 6; ++i) {
0988           for (int j = 0; j < 6; ++j)
0989             dprintf("%8f ", outErr.constAt(kk, i, j));
0990           dprintf("\n");
0991         }
0992         dprintf("\n");
0993       }
0994     }
0995 #endif
0996
0997     if (pflags.apply_material) {
0998       MPlexQF hitsRl;
0999       MPlexQF hitsXi;
1000       MPlexQF propSign;
1001
1002       const TrackerInfo& tinfo = *pflags.tracker_info;
1003
1004 #if !defined(__clang__)
1005 #pragma omp simd
1006 #endif
1007       for (int n = 0; n < NN; ++n) {
1008         if (n < N_proc) {
1009           if (outFailFlag(n, 0, 0) || (noMatEffPtr && noMatEffPtr->constAt(n, 0, 0))) {
1010             hitsRl(n, 0, 0) = 0.f;
1011             hitsXi(n, 0, 0) = 0.f;
1012           } else {
1013             const auto mat = tinfo.material_checked(std::abs(outPar(n, 2, 0)), msRad(n, 0, 0));
1014             hitsRl(n, 0, 0) = mat.radl;
1015             hitsXi(n, 0, 0) = mat.bbxi;
1016           }
1017           const float r0 = hipo(inPar(n, 0, 0), inPar(n, 1, 0));
1018           const float r = msRad(n, 0, 0);
1019           propSign(n, 0, 0) = (r > r0 ? 1.f : -1.f);
1020         }
1021       }
1022       MPlexHV plNrm;
1023 #pragma omp simd
1024       for (int n = 0; n < NN; ++n) {
1025         plNrm(n, 0, 0) = std::cos(outPar.constAt(n, 4, 0));
1026         plNrm(n, 1, 0) = std::sin(outPar.constAt(n, 4, 0));
1027         plNrm(n, 2, 0) = 0.f;
1028       }
1029       applyMaterialEffects(hitsRl, hitsXi, propSign, plNrm, outErr, outPar, N_proc);
1030 #ifdef DEBUG
1031       if (debug && g_debug) {
1032         for (int kk = 0; kk < N_proc; ++kk) {
1033           dprintf("propSign %d\n", kk);
1034           for (int i = 0; i < 1; ++i) {
1035             dprintf("%8f ", propSign.constAt(kk, i, 0));
1036           }
1037           dprintf("\n");
1038           dprintf("plNrm %d\n", kk);
1039           for (int i = 0; i < 3; ++i) {
1040             dprintf("%8f ", plNrm.constAt(kk, i, 0));
1041           }
1042           dprintf("\n");
1043           dprintf("outErr(after material) %d\n", kk);
1044           for (int i = 0; i < 6; ++i) {
1045             for (int j = 0; j < 6; ++j)
1046               dprintf("%8f ", outErr.constAt(kk, i, j));
1047             dprintf("\n");
1048           }
1049           dprintf("\n");
1050         }
1051       }
1052 #endif
1053     }
1054
1055     squashPhiMPlex(outPar, N_proc);  // ensure phi is between |pi|
1056
1057     // Matriplex version of:
1058     // result.errors = ROOT::Math::Similarity(errorProp, outErr);
1059
1060     /*
1061      // To be used with: MPT_DIM = 1
1062      if (fabs(sqrt(outPar[0]*outPar[0]+outPar[1]*outPar[1]) - msRad[0]) > 0.0001)
1063      {
1064        std::cout << "DID NOT GET TO R, FailFlag=" << failFlag[0]
1065                  << " dR=" << msRad[0] - hipo(outPar[0],outPar[1])
1066                  << " r="  << msRad[0] << " rin=" << hipo(inPar[0],inPar[1]) << " rout=" << hipo(outPar[0],outPar[1])
1067                  << std::endl;
1068        // std::cout << "    pt=" << pt << " pz=" << inPar.At(n, 2) << std::endl;
1069      }
1070    */
1071
1072     // PROP-FAIL-ENABLE To keep physics changes minimal, we always restore the
1073     // state to input when propagation fails -- as was the default before.
1074     // if (pflags.copy_input_state_on_fail) {
1075     for (int i = 0; i < N_proc; ++i) {
1076       if (outFailFlag(i, 0, 0)) {
1077         outPar.copySlot(i, inPar);
1078         outErr.copySlot(i, inErr);
1079       }
1080     }
1081     // }
1082   }
1083
1084 }  // end namespace mkfit