Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:28:21

0001 #include "FWCore/Utilities/interface/CMSUnrollLoop.h"
0002 #include "RecoTracker/MkFitCore/interface/PropagationConfig.h"
0003 #include "RecoTracker/MkFitCore/interface/Config.h"
0004 #include "RecoTracker/MkFitCore/interface/TrackerInfo.h"
0005 
0006 #include "PropagationMPlex.h"
0007 
0008 //#define DEBUG
0009 #include "Debug.h"
0010 
0011 //==============================================================================
0012 // propagateLineToRMPlex
0013 //==============================================================================
0014 
0015 //using namespace Matriplex;
0016 
0017 namespace mkfit {
0018 
0019   void propagateLineToRMPlex(const MPlexLS& psErr,
0020                              const MPlexLV& psPar,
0021                              const MPlexHS& msErr,
0022                              const MPlexHV& msPar,
0023                              MPlexLS& outErr,
0024                              MPlexLV& outPar,
0025                              const int N_proc) {
0026     // XXX Regenerate parts below with a script.
0027 
0028     const Matriplex::idx_t N = NN;
0029 
0030 #pragma omp simd
0031     for (int n = 0; n < NN; ++n) {
0032       const float cosA = (psPar[0 * N + n] * psPar[3 * N + n] + psPar[1 * N + n] * psPar[4 * N + n]) /
0033                          (std::sqrt((psPar[0 * N + n] * psPar[0 * N + n] + psPar[1 * N + n] * psPar[1 * N + n]) *
0034                                     (psPar[3 * N + n] * psPar[3 * N + n] + psPar[4 * N + n] * psPar[4 * N + n])));
0035       const float dr = (hipo(msPar[0 * N + n], msPar[1 * N + n]) - hipo(psPar[0 * N + n], psPar[1 * N + n])) / cosA;
0036 
0037       dprint_np(n, "propagateLineToRMPlex dr=" << dr);
0038 
0039       const float pt = hipo(psPar[3 * N + n], psPar[4 * N + n]);
0040       const float p = dr / pt;  // path
0041       const float psq = p * p;
0042 
0043       outPar[0 * N + n] = psPar[0 * N + n] + p * psPar[3 * N + n];
0044       outPar[1 * N + n] = psPar[1 * N + n] + p * psPar[4 * N + n];
0045       outPar[2 * N + n] = psPar[2 * N + n] + p * psPar[5 * N + n];
0046       outPar[3 * N + n] = psPar[3 * N + n];
0047       outPar[4 * N + n] = psPar[4 * N + n];
0048       outPar[5 * N + n] = psPar[5 * N + n];
0049 
0050       {
0051         const MPlexLS& A = psErr;
0052         MPlexLS& B = outErr;
0053 
0054         B.fArray[0 * N + n] = A.fArray[0 * N + n];
0055         B.fArray[1 * N + n] = A.fArray[1 * N + n];
0056         B.fArray[2 * N + n] = A.fArray[2 * N + n];
0057         B.fArray[3 * N + n] = A.fArray[3 * N + n];
0058         B.fArray[4 * N + n] = A.fArray[4 * N + n];
0059         B.fArray[5 * N + n] = A.fArray[5 * N + n];
0060         B.fArray[6 * N + n] = A.fArray[6 * N + n] + p * A.fArray[0 * N + n];
0061         B.fArray[7 * N + n] = A.fArray[7 * N + n] + p * A.fArray[1 * N + n];
0062         B.fArray[8 * N + n] = A.fArray[8 * N + n] + p * A.fArray[3 * N + n];
0063         B.fArray[9 * N + n] =
0064             A.fArray[9 * N + n] + p * (A.fArray[6 * N + n] + A.fArray[6 * N + n]) + psq * A.fArray[0 * N + n];
0065         B.fArray[10 * N + n] = A.fArray[10 * N + n] + p * A.fArray[1 * N + n];
0066         B.fArray[11 * N + n] = A.fArray[11 * N + n] + p * A.fArray[2 * N + n];
0067         B.fArray[12 * N + n] = A.fArray[12 * N + n] + p * A.fArray[4 * N + n];
0068         B.fArray[13 * N + n] =
0069             A.fArray[13 * N + n] + p * (A.fArray[7 * N + n] + A.fArray[10 * N + n]) + psq * A.fArray[1 * N + n];
0070         B.fArray[14 * N + n] =
0071             A.fArray[14 * N + n] + p * (A.fArray[11 * N + n] + A.fArray[11 * N + n]) + psq * A.fArray[2 * N + n];
0072         B.fArray[15 * N + n] = A.fArray[15 * N + n] + p * A.fArray[3 * N + n];
0073         B.fArray[16 * N + n] = A.fArray[16 * N + n] + p * A.fArray[4 * N + n];
0074         B.fArray[17 * N + n] = A.fArray[17 * N + n] + p * A.fArray[5 * N + n];
0075         B.fArray[18 * N + n] =
0076             A.fArray[18 * N + n] + p * (A.fArray[8 * N + n] + A.fArray[15 * N + n]) + psq * A.fArray[3 * N + n];
0077         B.fArray[19 * N + n] =
0078             A.fArray[19 * N + n] + p * (A.fArray[12 * N + n] + A.fArray[16 * N + n]) + psq * A.fArray[4 * N + n];
0079         B.fArray[20 * N + n] =
0080             A.fArray[20 * N + n] + p * (A.fArray[17 * N + n] + A.fArray[17 * N + n]) + psq * A.fArray[5 * N + n];
0081       }
0082 
0083       dprint_np(n, "propagateLineToRMPlex arrive at r=" << hipo(outPar[0 * N + n], outPar[1 * N + n]));
0084     }
0085   }
0086 
0087 }  // end namespace mkfit
0088 
0089 //==============================================================================
0090 // propagateHelixToRMPlex
0091 //==============================================================================
0092 
0093 namespace {
0094   using namespace mkfit;
0095 
0096   void MultHelixProp(const MPlexLL& A, const MPlexLS& B, MPlexLL& C) {
0097     // C = A * B
0098 
0099     typedef float T;
0100     const Matriplex::idx_t N = NN;
0101 
0102     const T* a = A.fArray;
0103     ASSUME_ALIGNED(a, 64);
0104     const T* b = B.fArray;
0105     ASSUME_ALIGNED(b, 64);
0106     T* c = C.fArray;
0107     ASSUME_ALIGNED(c, 64);
0108 
0109 #include "MultHelixProp.ah"
0110   }
0111 
0112   void MultHelixPropTransp(const MPlexLL& A, const MPlexLL& B, MPlexLS& C) {
0113     // C = B * AT;
0114 
0115     typedef float T;
0116     const Matriplex::idx_t N = NN;
0117 
0118     const T* a = A.fArray;
0119     ASSUME_ALIGNED(a, 64);
0120     const T* b = B.fArray;
0121     ASSUME_ALIGNED(b, 64);
0122     T* c = C.fArray;
0123     ASSUME_ALIGNED(c, 64);
0124 
0125 #include "MultHelixPropTransp.ah"
0126   }
0127 
0128   void MultHelixPropEndcap(const MPlexLL& A, const MPlexLS& B, MPlexLL& C) {
0129     // C = A * B
0130 
0131     typedef float T;
0132     const Matriplex::idx_t N = NN;
0133 
0134     const T* a = A.fArray;
0135     ASSUME_ALIGNED(a, 64);
0136     const T* b = B.fArray;
0137     ASSUME_ALIGNED(b, 64);
0138     T* c = C.fArray;
0139     ASSUME_ALIGNED(c, 64);
0140 
0141 #include "MultHelixPropEndcap.ah"
0142   }
0143 
0144   void MultHelixPropTranspEndcap(const MPlexLL& A, const MPlexLL& B, MPlexLS& C) {
0145     // C = B * AT;
0146 
0147     typedef float T;
0148     const Matriplex::idx_t N = NN;
0149 
0150     const T* a = A.fArray;
0151     ASSUME_ALIGNED(a, 64);
0152     const T* b = B.fArray;
0153     ASSUME_ALIGNED(b, 64);
0154     T* c = C.fArray;
0155     ASSUME_ALIGNED(c, 64);
0156 
0157 #include "MultHelixPropTranspEndcap.ah"
0158   }
0159 
0160   inline void MultHelixPropTemp(const MPlexLL& A, const MPlexLL& B, MPlexLL& C, int n) {
0161     // C = A * B
0162 
0163     typedef float T;
0164     const Matriplex::idx_t N = NN;
0165 
0166     const T* a = A.fArray;
0167     ASSUME_ALIGNED(a, 64);
0168     const T* b = B.fArray;
0169     ASSUME_ALIGNED(b, 64);
0170     T* c = C.fArray;
0171     ASSUME_ALIGNED(c, 64);
0172 
0173     c[0 * N + n] = a[0 * N + n] * b[0 * N + n] + a[1 * N + n] * b[6 * N + n] + a[2 * N + n] * b[12 * N + n] +
0174                    a[4 * N + n] * b[24 * N + n];
0175     c[1 * N + n] = a[0 * N + n] * b[1 * N + n] + a[1 * N + n] * b[7 * N + n] + a[2 * N + n] * b[13 * N + n] +
0176                    a[4 * N + n] * b[25 * N + n];
0177     c[2 * N + n] = a[2 * N + n];
0178     c[3 * N + n] = a[0 * N + n] * b[3 * N + n] + a[1 * N + n] * b[9 * N + n] + a[2 * N + n] * b[15 * N + n] +
0179                    a[3 * N + n] + a[4 * N + n] * b[27 * N + n];
0180     c[4 * N + n] = a[0 * N + n] * b[4 * N + n] + a[1 * N + n] * b[10 * N + n] + a[4 * N + n];
0181     c[5 * N + n] = a[2 * N + n] * b[17 * N + n] + a[5 * N + n];
0182     c[6 * N + n] = a[6 * N + n] * b[0 * N + n] + a[7 * N + n] * b[6 * N + n] + a[8 * N + n] * b[12 * N + n] +
0183                    a[10 * N + n] * b[24 * N + n];
0184     c[7 * N + n] = a[6 * N + n] * b[1 * N + n] + a[7 * N + n] * b[7 * N + n] + a[8 * N + n] * b[13 * N + n] +
0185                    a[10 * N + n] * b[25 * N + n];
0186     c[8 * N + n] = a[8 * N + n];
0187     c[9 * N + n] = a[6 * N + n] * b[3 * N + n] + a[7 * N + n] * b[9 * N + n] + a[8 * N + n] * b[15 * N + n] +
0188                    a[9 * N + n] + a[10 * N + n] * b[27 * N + n];
0189     c[10 * N + n] = a[6 * N + n] * b[4 * N + n] + a[7 * N + n] * b[10 * N + n] + a[10 * N + n];
0190     c[11 * N + n] = a[8 * N + n] * b[17 * N + n] + a[11 * N + n];
0191     c[12 * N + n] = a[12 * N + n] * b[0 * N + n] + a[13 * N + n] * b[6 * N + n] + a[14 * N + n] * b[12 * N + n] +
0192                     a[16 * N + n] * b[24 * N + n];
0193     c[13 * N + n] = a[12 * N + n] * b[1 * N + n] + a[13 * N + n] * b[7 * N + n] + a[14 * N + n] * b[13 * N + n] +
0194                     a[16 * N + n] * b[25 * N + n];
0195     c[14 * N + n] = a[14 * N + n];
0196     c[15 * N + n] = a[12 * N + n] * b[3 * N + n] + a[13 * N + n] * b[9 * N + n] + a[14 * N + n] * b[15 * N + n] +
0197                     a[15 * N + n] + a[16 * N + n] * b[27 * N + n];
0198     c[16 * N + n] = a[12 * N + n] * b[4 * N + n] + a[13 * N + n] * b[10 * N + n] + a[16 * N + n];
0199     c[17 * N + n] = a[14 * N + n] * b[17 * N + n] + a[17 * N + n];
0200     c[18 * N + n] = a[18 * N + n] * b[0 * N + n] + a[19 * N + n] * b[6 * N + n] + a[20 * N + n] * b[12 * N + n] +
0201                     a[22 * N + n] * b[24 * N + n];
0202     c[19 * N + n] = a[18 * N + n] * b[1 * N + n] + a[19 * N + n] * b[7 * N + n] + a[20 * N + n] * b[13 * N + n] +
0203                     a[22 * N + n] * b[25 * N + n];
0204     c[20 * N + n] = a[20 * N + n];
0205     c[21 * N + n] = a[18 * N + n] * b[3 * N + n] + a[19 * N + n] * b[9 * N + n] + a[20 * N + n] * b[15 * N + n] +
0206                     a[21 * N + n] + a[22 * N + n] * b[27 * N + n];
0207     c[22 * N + n] = a[18 * N + n] * b[4 * N + n] + a[19 * N + n] * b[10 * N + n] + a[22 * N + n];
0208     c[23 * N + n] = a[20 * N + n] * b[17 * N + n] + a[23 * N + n];
0209     c[24 * N + n] = a[24 * N + n] * b[0 * N + n] + a[25 * N + n] * b[6 * N + n] + a[26 * N + n] * b[12 * N + n] +
0210                     a[28 * N + n] * b[24 * N + n];
0211     c[25 * N + n] = a[24 * N + n] * b[1 * N + n] + a[25 * N + n] * b[7 * N + n] + a[26 * N + n] * b[13 * N + n] +
0212                     a[28 * N + n] * b[25 * N + n];
0213     c[26 * N + n] = a[26 * N + n];
0214     c[27 * N + n] = a[24 * N + n] * b[3 * N + n] + a[25 * N + n] * b[9 * N + n] + a[26 * N + n] * b[15 * N + n] +
0215                     a[27 * N + n] + a[28 * N + n] * b[27 * N + n];
0216     c[28 * N + n] = a[24 * N + n] * b[4 * N + n] + a[25 * N + n] * b[10 * N + n] + a[28 * N + n];
0217     c[29 * N + n] = a[26 * N + n] * b[17 * N + n] + a[29 * N + n];
0218     c[30 * N + n] = a[30 * N + n] * b[0 * N + n] + a[31 * N + n] * b[6 * N + n] + a[32 * N + n] * b[12 * N + n] +
0219                     a[34 * N + n] * b[24 * N + n];
0220     c[31 * N + n] = a[30 * N + n] * b[1 * N + n] + a[31 * N + n] * b[7 * N + n] + a[32 * N + n] * b[13 * N + n] +
0221                     a[34 * N + n] * b[25 * N + n];
0222     c[32 * N + n] = a[32 * N + n];
0223     c[33 * N + n] = a[30 * N + n] * b[3 * N + n] + a[31 * N + n] * b[9 * N + n] + a[32 * N + n] * b[15 * N + n] +
0224                     a[33 * N + n] + a[34 * N + n] * b[27 * N + n];
0225     c[34 * N + n] = a[30 * N + n] * b[4 * N + n] + a[31 * N + n] * b[10 * N + n] + a[34 * N + n];
0226     c[35 * N + n] = a[32 * N + n] * b[17 * N + n] + a[35 * N + n];
0227   }
0228 
0229   // this version does not assume to know which elements are 0 or 1, so it does the full multiplication
0230   void MultHelixPropFull(const MPlexLL& A, const MPlexLS& B, MPlexLL& C) {
0231     for (int n = 0; n < NN; ++n) {
0232       for (int i = 0; i < 6; ++i) {
0233 // optimization reports indicate only the inner two loops are good
0234 // candidates for vectorization
0235 #pragma omp simd
0236         for (int j = 0; j < 6; ++j) {
0237           C(n, i, j) = 0.;
0238           for (int k = 0; k < 6; ++k)
0239             C(n, i, j) += A.constAt(n, i, k) * B.constAt(n, k, j);
0240         }
0241       }
0242     }
0243   }
0244 
0245   // this version does not assume to know which elements are 0 or 1, so it does the full mupltiplication
0246   void MultHelixPropTranspFull(const MPlexLL& A, const MPlexLL& B, MPlexLS& C) {
0247     for (int n = 0; n < NN; ++n) {
0248       for (int i = 0; i < 6; ++i) {
0249 // optimization reports indicate only the inner two loops are good
0250 // candidates for vectorization
0251 #pragma omp simd
0252         for (int j = 0; j < 6; ++j) {
0253           C(n, i, j) = 0.;
0254           for (int k = 0; k < 6; ++k)
0255             C(n, i, j) += B.constAt(n, i, k) * A.constAt(n, j, k);
0256         }
0257       }
0258     }
0259   }
0260 
0261 #ifdef UNUSED
0262   // this version does not assume to know which elements are 0 or 1, so it does the full multiplication
0263   void MultHelixPropFull(const MPlexLL& A, const MPlexLL& B, MPlexLL& C) {
0264 #pragma omp simd
0265     for (int n = 0; n < NN; ++n) {
0266       for (int i = 0; i < 6; ++i) {
0267         for (int j = 0; j < 6; ++j) {
0268           C(n, i, j) = 0.;
0269           for (int k = 0; k < 6; ++k)
0270             C(n, i, j) += A.constAt(n, i, k) * B.constAt(n, k, j);
0271         }
0272       }
0273     }
0274   }
0275 
0276   // this version does not assume to know which elements are 0 or 1, so it does the full mupltiplication
0277   void MultHelixPropTranspFull(const MPlexLL& A, const MPlexLL& B, MPlexLL& C) {
0278 #pragma omp simd
0279     for (int n = 0; n < NN; ++n) {
0280       for (int i = 0; i < 6; ++i) {
0281         for (int j = 0; j < 6; ++j) {
0282           C(n, i, j) = 0.;
0283           for (int k = 0; k < 6; ++k)
0284             C(n, i, j) += B.constAt(n, i, k) * A.constAt(n, j, k);
0285         }
0286       }
0287     }
0288   }
0289 #endif
0290 }  // end unnamed namespace
0291 
0292 //==============================================================================
0293 
0294 namespace mkfit {
0295 
0296   void helixAtRFromIterativeCCSFullJac(const MPlexLV& inPar,
0297                                        const MPlexQI& inChg,
0298                                        const MPlexQF& msRad,
0299                                        MPlexLV& outPar,
0300                                        MPlexLL& errorProp,
0301                                        const int N_proc) {
0302     errorProp.setVal(0.f);
0303     MPlexLL errorPropTmp(0.f);   //initialize to zero
0304     MPlexLL errorPropSwap(0.f);  //initialize to zero
0305 
0306     // loop does not vectorize with llvm16, and it issues a warning
0307     // that apparently can't be suppressed with a pragma.  Needs to
0308     // be rechecked if future llvm versions improve vectorization.
0309 #if !defined(__clang__)
0310 #pragma omp simd
0311 #endif
0312     for (int n = 0; n < NN; ++n) {
0313       //initialize erroProp to identity matrix
0314       errorProp(n, 0, 0) = 1.f;
0315       errorProp(n, 1, 1) = 1.f;
0316       errorProp(n, 2, 2) = 1.f;
0317       errorProp(n, 3, 3) = 1.f;
0318       errorProp(n, 4, 4) = 1.f;
0319       errorProp(n, 5, 5) = 1.f;
0320 
0321       const float k = inChg.constAt(n, 0, 0) * 100.f / (-Const::sol * Config::Bfield);
0322       const float r = msRad.constAt(n, 0, 0);
0323       float r0 = hipo(inPar.constAt(n, 0, 0), inPar.constAt(n, 1, 0));
0324 
0325       if (std::abs(r - r0) < 0.0001f) {
0326         dprint_np(n, "distance less than 1mum, skip");
0327         continue;
0328       }
0329 
0330       const float ipt = inPar.constAt(n, 3, 0);
0331       const float phiin = inPar.constAt(n, 4, 0);
0332       const float theta = inPar.constAt(n, 5, 0);
0333 
0334       //set those that are 1. before iterations
0335       errorPropTmp(n, 2, 2) = 1.f;
0336       errorPropTmp(n, 3, 3) = 1.f;
0337       errorPropTmp(n, 4, 4) = 1.f;
0338       errorPropTmp(n, 5, 5) = 1.f;
0339 
0340       float cosah = 0., sinah = 0.;
0341       //no trig approx here, phi and theta can be large
0342       float cosP = std::cos(phiin), sinP = std::sin(phiin);
0343       const float cosT = std::cos(theta), sinT = std::sin(theta);
0344       float pxin = cosP / ipt;
0345       float pyin = sinP / ipt;
0346 
0347       CMS_UNROLL_LOOP_COUNT(Config::Niter)
0348       for (int i = 0; i < Config::Niter; ++i) {
0349         dprint_np(n,
0350                   std::endl
0351                       << "attempt propagation from r=" << r0 << " to r=" << r << std::endl
0352                       << "x=" << outPar.At(n, 0, 0) << " y=" << outPar.At(n, 1, 0) << " z=" << outPar.At(n, 2, 0)
0353                       << " px=" << std::cos(phiin) / ipt << " py=" << std::sin(phiin) / ipt
0354                       << " pz=" << 1.f / (ipt * tan(theta)) << " q=" << inChg.constAt(n, 0, 0) << std::endl);
0355 
0356         r0 = hipo(outPar.constAt(n, 0, 0), outPar.constAt(n, 1, 0));
0357         const float ialpha = (r - r0) * ipt / k;
0358         //alpha+=ialpha;
0359 
0360         if constexpr (Config::useTrigApprox) {
0361           sincos4(ialpha * 0.5f, sinah, cosah);
0362         } else {
0363           cosah = std::cos(ialpha * 0.5f);
0364           sinah = std::sin(ialpha * 0.5f);
0365         }
0366         const float cosa = 1.f - 2.f * sinah * sinah;
0367         const float sina = 2.f * sinah * cosah;
0368 
0369         //derivatives of alpha
0370         const float dadx = -outPar.At(n, 0, 0) * ipt / (k * r0);
0371         const float dady = -outPar.At(n, 1, 0) * ipt / (k * r0);
0372         const float dadipt = (r - r0) / k;
0373 
0374         outPar.At(n, 0, 0) = outPar.constAt(n, 0, 0) + 2.f * k * sinah * (pxin * cosah - pyin * sinah);
0375         outPar.At(n, 1, 0) = outPar.constAt(n, 1, 0) + 2.f * k * sinah * (pyin * cosah + pxin * sinah);
0376         const float pxinold = pxin;  //copy before overwriting
0377         pxin = pxin * cosa - pyin * sina;
0378         pyin = pyin * cosa + pxinold * sina;
0379 
0380         //need phi at origin, so this goes before redefining phi
0381         //no trig approx here, phi can be large
0382         cosP = std::cos(outPar.At(n, 4, 0));
0383         sinP = std::sin(outPar.At(n, 4, 0));
0384 
0385         outPar.At(n, 2, 0) = outPar.constAt(n, 2, 0) + k * ialpha * cosT / (ipt * sinT);
0386         outPar.At(n, 3, 0) = ipt;
0387         outPar.At(n, 4, 0) = outPar.constAt(n, 4, 0) + ialpha;
0388         outPar.At(n, 5, 0) = theta;
0389 
0390         errorPropTmp(n, 0, 0) = 1.f + k * (cosP * dadx * cosa - sinP * dadx * sina) / ipt;
0391         errorPropTmp(n, 0, 1) = k * (cosP * dady * cosa - sinP * dady * sina) / ipt;
0392         errorPropTmp(n, 0, 3) =
0393             k * (cosP * (ipt * dadipt * cosa - sina) + sinP * ((1.f - cosa) - ipt * dadipt * sina)) / (ipt * ipt);
0394         errorPropTmp(n, 0, 4) = -k * (sinP * sina + cosP * (1.f - cosa)) / ipt;
0395 
0396         errorPropTmp(n, 1, 0) = k * (sinP * dadx * cosa + cosP * dadx * sina) / ipt;
0397         errorPropTmp(n, 1, 1) = 1.f + k * (sinP * dady * cosa + cosP * dady * sina) / ipt;
0398         errorPropTmp(n, 1, 3) =
0399             k * (sinP * (ipt * dadipt * cosa - sina) + cosP * (ipt * dadipt * sina - (1.f - cosa))) / (ipt * ipt);
0400         errorPropTmp(n, 1, 4) = k * (cosP * sina - sinP * (1.f - cosa)) / ipt;
0401 
0402         errorPropTmp(n, 2, 0) = k * cosT * dadx / (ipt * sinT);
0403         errorPropTmp(n, 2, 1) = k * cosT * dady / (ipt * sinT);
0404         errorPropTmp(n, 2, 3) = k * cosT * (ipt * dadipt - ialpha) / (ipt * ipt * sinT);
0405         errorPropTmp(n, 2, 5) = -k * ialpha / (ipt * sinT * sinT);
0406 
0407         errorPropTmp(n, 4, 0) = dadx;
0408         errorPropTmp(n, 4, 1) = dady;
0409         errorPropTmp(n, 4, 3) = dadipt;
0410 
0411         MultHelixPropTemp(errorProp, errorPropTmp, errorPropSwap, n);
0412         errorProp = errorPropSwap;
0413       }
0414 
0415       dprint_np(
0416           n,
0417           "propagation end, dump parameters"
0418               << std::endl
0419               << "pos = " << outPar.At(n, 0, 0) << " " << outPar.At(n, 1, 0) << " " << outPar.At(n, 2, 0) << std::endl
0420               << "mom = " << std::cos(outPar.At(n, 4, 0)) / outPar.At(n, 3, 0) << " "
0421               << std::sin(outPar.At(n, 4, 0)) / outPar.At(n, 3, 0) << " "
0422               << 1. / (outPar.At(n, 3, 0) * tan(outPar.At(n, 5, 0)))
0423               << " r=" << std::sqrt(outPar.At(n, 0, 0) * outPar.At(n, 0, 0) + outPar.At(n, 1, 0) * outPar.At(n, 1, 0))
0424               << " pT=" << 1. / std::abs(outPar.At(n, 3, 0)) << std::endl);
0425 
0426 #ifdef DEBUG
0427       if (debug && g_debug && n < N_proc) {
0428         dmutex_guard;
0429         std::cout << n << " jacobian" << std::endl;
0430         printf("%5f %5f %5f %5f %5f %5f\n",
0431                errorProp(n, 0, 0),
0432                errorProp(n, 0, 1),
0433                errorProp(n, 0, 2),
0434                errorProp(n, 0, 3),
0435                errorProp(n, 0, 4),
0436                errorProp(n, 0, 5));
0437         printf("%5f %5f %5f %5f %5f %5f\n",
0438                errorProp(n, 1, 0),
0439                errorProp(n, 1, 1),
0440                errorProp(n, 1, 2),
0441                errorProp(n, 1, 3),
0442                errorProp(n, 1, 4),
0443                errorProp(n, 1, 5));
0444         printf("%5f %5f %5f %5f %5f %5f\n",
0445                errorProp(n, 2, 0),
0446                errorProp(n, 2, 1),
0447                errorProp(n, 2, 2),
0448                errorProp(n, 2, 3),
0449                errorProp(n, 2, 4),
0450                errorProp(n, 2, 5));
0451         printf("%5f %5f %5f %5f %5f %5f\n",
0452                errorProp(n, 3, 0),
0453                errorProp(n, 3, 1),
0454                errorProp(n, 3, 2),
0455                errorProp(n, 3, 3),
0456                errorProp(n, 3, 4),
0457                errorProp(n, 3, 5));
0458         printf("%5f %5f %5f %5f %5f %5f\n",
0459                errorProp(n, 4, 0),
0460                errorProp(n, 4, 1),
0461                errorProp(n, 4, 2),
0462                errorProp(n, 4, 3),
0463                errorProp(n, 4, 4),
0464                errorProp(n, 4, 5));
0465         printf("%5f %5f %5f %5f %5f %5f\n",
0466                errorProp(n, 5, 0),
0467                errorProp(n, 5, 1),
0468                errorProp(n, 5, 2),
0469                errorProp(n, 5, 3),
0470                errorProp(n, 5, 4),
0471                errorProp(n, 5, 5));
0472       }
0473 #endif
0474     }
0475   }
0476 
0477 }  // end namespace mkfit
0478 
0479 //#pragma omp declare simd simdlen(NN) notinbranch linear(n)
0480 #include "PropagationMPlex.icc"
0481 
0482 namespace mkfit {
0483 
0484   void helixAtRFromIterativeCCS(const MPlexLV& inPar,
0485                                 const MPlexQI& inChg,
0486                                 const MPlexQF& msRad,
0487                                 MPlexLV& outPar,
0488                                 MPlexLL& errorProp,
0489                                 MPlexQI& outFailFlag,
0490                                 const int N_proc,
0491                                 const PropagationFlags& pflags) {
0492     errorProp.setVal(0.f);
0493     outFailFlag.setVal(0.f);
0494 
0495     helixAtRFromIterativeCCS_impl(inPar, inChg, msRad, outPar, errorProp, outFailFlag, 0, NN, N_proc, pflags);
0496   }
0497 
0498   void propagateHelixToRMPlex(const MPlexLS& inErr,
0499                               const MPlexLV& inPar,
0500                               const MPlexQI& inChg,
0501                               const MPlexQF& msRad,
0502                               MPlexLS& outErr,
0503                               MPlexLV& outPar,
0504                               MPlexQI& outFailFlag,
0505                               const int N_proc,
0506                               const PropagationFlags& pflags,
0507                               const MPlexQI* noMatEffPtr) {
0508     // bool debug = true;
0509 
0510     // This is used further down when calculating similarity with errorProp (and before in DEBUG).
0511     // MT: I don't think this really needed if we use inErr where required.
0512     outErr = inErr;
0513     // This requirement for helixAtRFromIterativeCCS_impl() and for helixAtRFromIterativeCCSFullJac().
0514     // MT: This should be properly handled in both functions (expecting input in output parameters sucks).
0515     outPar = inPar;
0516 
0517     MPlexLL errorProp;
0518 
0519     helixAtRFromIterativeCCS(inPar, inChg, msRad, outPar, errorProp, outFailFlag, N_proc, pflags);
0520 
0521 #ifdef DEBUG
0522     if (debug && g_debug) {
0523       for (int kk = 0; kk < N_proc; ++kk) {
0524         dprintf("outErr before prop %d\n", kk);
0525         for (int i = 0; i < 6; ++i) {
0526           for (int j = 0; j < 6; ++j)
0527             dprintf("%8f ", outErr.At(kk, i, j));
0528           dprintf("\n");
0529         }
0530         dprintf("\n");
0531 
0532         dprintf("errorProp %d\n", kk);
0533         for (int i = 0; i < 6; ++i) {
0534           for (int j = 0; j < 6; ++j)
0535             dprintf("%8f ", errorProp.At(kk, i, j));
0536           dprintf("\n");
0537         }
0538         dprintf("\n");
0539       }
0540     }
0541 #endif
0542 
0543     // MultHelixProp can be optimized for CCS coordinates, see GenMPlexOps.pl
0544     MPlexLL temp;
0545     MultHelixProp(errorProp, outErr, temp);
0546     MultHelixPropTransp(errorProp, temp, outErr);
0547     // can replace with: MultHelixPropFull(errorProp, outErr, temp); MultHelixPropTranspFull(errorProp, temp, outErr);
0548 
0549     if (pflags.apply_material) {
0550       MPlexQF hitsRl;
0551       MPlexQF hitsXi;
0552       MPlexQF propSign;
0553 
0554       const TrackerInfo& tinfo = *pflags.tracker_info;
0555 
0556 #pragma omp simd
0557       for (int n = 0; n < NN; ++n) {
0558         if (n < N_proc) {
0559           if (outFailFlag(n, 0, 0) || (noMatEffPtr && noMatEffPtr->constAt(n, 0, 0))) {
0560             hitsRl(n, 0, 0) = 0.f;
0561             hitsXi(n, 0, 0) = 0.f;
0562           } else {
0563             auto mat = tinfo.material_checked(std::abs(outPar(n, 2, 0)), msRad(n, 0, 0));
0564             hitsRl(n, 0, 0) = mat.radl;
0565             hitsXi(n, 0, 0) = mat.bbxi;
0566           }
0567           const float r0 = hipo(inPar(n, 0, 0), inPar(n, 1, 0));
0568           const float r = msRad(n, 0, 0);
0569           propSign(n, 0, 0) = (r > r0 ? 1. : -1.);
0570         }
0571       }
0572       MPlexHV plNrm;
0573 #pragma omp simd
0574       for (int n = 0; n < NN; ++n) {
0575         plNrm(n, 0, 0) = std::cos(outPar.constAt(n, 4, 0));
0576         plNrm(n, 1, 0) = std::sin(outPar.constAt(n, 4, 0));
0577         plNrm(n, 2, 0) = 0.f;
0578       }
0579       applyMaterialEffects(hitsRl, hitsXi, propSign, plNrm, outErr, outPar, N_proc);
0580     }
0581 
0582     squashPhiMPlex(outPar, N_proc);  // ensure phi is between |pi|
0583 
0584     // Matriplex version of:
0585     // result.errors = ROOT::Math::Similarity(errorProp, outErr);
0586 
0587     /*
0588      // To be used with: MPT_DIM = 1
0589      if (fabs(sqrt(outPar[0]*outPar[0]+outPar[1]*outPar[1]) - msRad[0]) > 0.0001)
0590      {
0591        std::cout << "DID NOT GET TO R, FailFlag=" << failFlag[0]
0592                  << " dR=" << msRad[0] - std::hypot(outPar[0],outPar[1])
0593                  << " r="  << msRad[0] << " rin=" << std::hypot(inPar[0],inPar[1]) << " rout=" << std::hypot(outPar[0],outPar[1])
0594                  << std::endl;
0595        // std::cout << "    pt=" << pt << " pz=" << inPar.At(n, 2) << std::endl;
0596      }
0597    */
0598 
0599     // PROP-FAIL-ENABLE To keep physics changes minimal, we always restore the
0600     // state to input when propagation fails -- as was the default before.
0601     // if (pflags.copy_input_state_on_fail) {
0602     for (int i = 0; i < N_proc; ++i) {
0603       if (outFailFlag(i, 0, 0)) {
0604         outPar.copySlot(i, inPar);
0605         outErr.copySlot(i, inErr);
0606       }
0607     }
0608     // }
0609   }
0610 
0611   //==============================================================================
0612 
0613   void propagateHelixToZMPlex(const MPlexLS& inErr,
0614                               const MPlexLV& inPar,
0615                               const MPlexQI& inChg,
0616                               const MPlexQF& msZ,
0617                               MPlexLS& outErr,
0618                               MPlexLV& outPar,
0619                               MPlexQI& outFailFlag,
0620                               const int N_proc,
0621                               const PropagationFlags& pflags,
0622                               const MPlexQI* noMatEffPtr) {
0623     // debug = true;
0624 
0625     outErr = inErr;
0626     outPar = inPar;
0627 
0628     MPlexLL errorProp;
0629 
0630     //helixAtZ_new(inPar, inChg, msZ, outPar, errorProp, outFailFlag, N_proc, pflags);
0631     helixAtZ(inPar, inChg, msZ, outPar, errorProp, outFailFlag, N_proc, pflags);
0632 
0633 #ifdef DEBUG
0634     if (debug && g_debug) {
0635       for (int kk = 0; kk < N_proc; ++kk) {
0636         dprintf("inPar %d\n", kk);
0637         for (int i = 0; i < 6; ++i) {
0638           dprintf("%8f ", inPar.constAt(kk, i, 0));
0639         }
0640         dprintf("\n");
0641 
0642         dprintf("inErr %d\n", kk);
0643         for (int i = 0; i < 6; ++i) {
0644           for (int j = 0; j < 6; ++j)
0645             dprintf("%8f ", inErr.constAt(kk, i, j));
0646           dprintf("\n");
0647         }
0648         dprintf("\n");
0649 
0650         dprintf("errorProp %d\n", kk);
0651         for (int i = 0; i < 6; ++i) {
0652           for (int j = 0; j < 6; ++j)
0653             dprintf("%8f ", errorProp.At(kk, i, j));
0654           dprintf("\n");
0655         }
0656         dprintf("\n");
0657       }
0658     }
0659 #endif
0660 
0661 #ifdef DEBUG
0662     if (debug && g_debug) {
0663       for (int kk = 0; kk < N_proc; ++kk) {
0664         dprintf("outErr %d\n", kk);
0665         for (int i = 0; i < 6; ++i) {
0666           for (int j = 0; j < 6; ++j)
0667             dprintf("%8f ", outErr.constAt(kk, i, j));
0668           dprintf("\n");
0669         }
0670         dprintf("\n");
0671       }
0672     }
0673 #endif
0674 
0675     // Matriplex version of: result.errors = ROOT::Math::Similarity(errorProp, outErr);
0676     MPlexLL temp;
0677     MultHelixPropEndcap(errorProp, outErr, temp);
0678     MultHelixPropTranspEndcap(errorProp, temp, outErr);
0679     // can replace with: MultHelixPropFull(errorProp, outErr, temp); MultHelixPropTranspFull(errorProp, temp, outErr);
0680 
0681     if (pflags.apply_material) {
0682       MPlexQF hitsRl;
0683       MPlexQF hitsXi;
0684       MPlexQF propSign;
0685 
0686       const TrackerInfo& tinfo = *pflags.tracker_info;
0687 
0688 #pragma omp simd
0689       for (int n = 0; n < NN; ++n) {
0690         if (n >= N_proc || (noMatEffPtr && noMatEffPtr->constAt(n, 0, 0))) {
0691           hitsRl(n, 0, 0) = 0.f;
0692           hitsXi(n, 0, 0) = 0.f;
0693         } else {
0694           const float hypo = std::hypot(outPar(n, 0, 0), outPar(n, 1, 0));
0695           auto mat = tinfo.material_checked(std::abs(msZ(n, 0, 0)), hypo);
0696           hitsRl(n, 0, 0) = mat.radl;
0697           hitsXi(n, 0, 0) = mat.bbxi;
0698         }
0699         if (n < N_proc) {
0700           const float zout = msZ.constAt(n, 0, 0);
0701           const float zin = inPar.constAt(n, 2, 0);
0702           propSign(n, 0, 0) = (std::abs(zout) > std::abs(zin) ? 1.f : -1.f);
0703         }
0704       }
0705       MPlexHV plNrm;
0706 #pragma omp simd
0707       for (int n = 0; n < NN; ++n) {
0708         plNrm(n, 0, 0) = 0.f;
0709         plNrm(n, 1, 0) = 0.f;
0710         plNrm(n, 2, 0) = 1.f;
0711       }
0712       applyMaterialEffects(hitsRl, hitsXi, propSign, plNrm, outErr, outPar, N_proc);
0713 #ifdef DEBUG
0714       if (debug && g_debug) {
0715         for (int kk = 0; kk < N_proc; ++kk) {
0716           dprintf("propSign %d\n", kk);
0717           for (int i = 0; i < 1; ++i) {
0718             dprintf("%8f ", propSign.constAt(kk, i, 0));
0719           }
0720           dprintf("\n");
0721           dprintf("plNrm %d\n", kk);
0722           for (int i = 0; i < 3; ++i) {
0723             dprintf("%8f ", plNrm.constAt(kk, i, 0));
0724           }
0725           dprintf("\n");
0726           dprintf("outErr(after material) %d\n", kk);
0727           for (int i = 0; i < 6; ++i) {
0728             for (int j = 0; j < 6; ++j)
0729               dprintf("%8f ", outErr.constAt(kk, i, j));
0730             dprintf("\n");
0731           }
0732           dprintf("\n");
0733         }
0734       }
0735 #endif
0736     }
0737 
0738     squashPhiMPlex(outPar, N_proc);  // ensure phi is between |pi|
0739 
0740     // PROP-FAIL-ENABLE To keep physics changes minimal, we always restore the
0741     // state to input when propagation fails -- as was the default before.
0742     // if (pflags.copy_input_state_on_fail) {
0743     for (int i = 0; i < N_proc; ++i) {
0744       if (outFailFlag(i, 0, 0)) {
0745         outPar.copySlot(i, inPar);
0746         outErr.copySlot(i, inErr);
0747       }
0748     }
0749     // }
0750   }
0751 
0752   void helixAtZ(const MPlexLV& inPar,
0753                 const MPlexQI& inChg,
0754                 const MPlexQF& msZ,
0755                 MPlexLV& outPar,
0756                 MPlexLL& errorProp,
0757                 MPlexQI& outFailFlag,
0758                 const int N_proc,
0759                 const PropagationFlags& pflags) {
0760     errorProp.setVal(0.f);
0761     outFailFlag.setVal(0.f);
0762 
0763     // debug = true;
0764 #pragma omp simd
0765     for (int n = 0; n < NN; ++n) {
0766       //initialize erroProp to identity matrix, except element 2,2 which is zero
0767       errorProp(n, 0, 0) = 1.f;
0768       errorProp(n, 1, 1) = 1.f;
0769       errorProp(n, 3, 3) = 1.f;
0770       errorProp(n, 4, 4) = 1.f;
0771       errorProp(n, 5, 5) = 1.f;
0772     }
0773     float zout[NN];
0774     float zin[NN];
0775     float ipt[NN];
0776     float phiin[NN];
0777     float theta[NN];
0778 #pragma omp simd
0779     for (int n = 0; n < NN; ++n) {
0780       //initialize erroProp to identity matrix, except element 2,2 which is zero
0781       zout[n] = msZ.constAt(n, 0, 0);
0782       zin[n] = inPar.constAt(n, 2, 0);
0783       ipt[n] = inPar.constAt(n, 3, 0);
0784       phiin[n] = inPar.constAt(n, 4, 0);
0785       theta[n] = inPar.constAt(n, 5, 0);
0786     }
0787 
0788     float k[NN];
0789     if (pflags.use_param_b_field) {
0790 #pragma omp simd
0791       for (int n = 0; n < NN; ++n) {
0792         k[n] = inChg.constAt(n, 0, 0) * 100.f /
0793                (-Const::sol * Config::bFieldFromZR(zin[n], hipo(inPar.constAt(n, 0, 0), inPar.constAt(n, 1, 0))));
0794       }
0795     } else {
0796 #pragma omp simd
0797       for (int n = 0; n < NN; ++n) {
0798         k[n] = inChg.constAt(n, 0, 0) * 100.f / (-Const::sol * Config::Bfield);
0799       }
0800     }
0801 
0802     float kinv[NN];
0803 #pragma omp simd
0804     for (int n = 0; n < NN; ++n) {
0805       kinv[n] = 1.f / k[n];
0806     }
0807 
0808 #pragma omp simd
0809     for (int n = 0; n < NN; ++n) {
0810       dprint_np(n,
0811                 std::endl
0812                     << "input parameters"
0813                     << " inPar.constAt(n, 0, 0)=" << std::setprecision(9) << inPar.constAt(n, 0, 0)
0814                     << " inPar.constAt(n, 1, 0)=" << std::setprecision(9) << inPar.constAt(n, 1, 0)
0815                     << " inPar.constAt(n, 2, 0)=" << std::setprecision(9) << inPar.constAt(n, 2, 0)
0816                     << " inPar.constAt(n, 3, 0)=" << std::setprecision(9) << inPar.constAt(n, 3, 0)
0817                     << " inPar.constAt(n, 4, 0)=" << std::setprecision(9) << inPar.constAt(n, 4, 0)
0818                     << " inPar.constAt(n, 5, 0)=" << std::setprecision(9) << inPar.constAt(n, 5, 0)
0819                     << " inChg.constAt(n, 0, 0)=" << std::setprecision(9) << inChg.constAt(n, 0, 0));
0820     }
0821 #pragma omp simd
0822     for (int n = 0; n < NN; ++n) {
0823       dprint_np(n,
0824                 "propagation start, dump parameters"
0825                     << std::endl
0826                     << "pos = " << inPar.constAt(n, 0, 0) << " " << inPar.constAt(n, 1, 0) << " "
0827                     << inPar.constAt(n, 2, 0) << std::endl
0828                     << "mom (cart) = " << std::cos(inPar.constAt(n, 4, 0)) / inPar.constAt(n, 3, 0) << " "
0829                     << std::sin(inPar.constAt(n, 4, 0)) / inPar.constAt(n, 3, 0) << " "
0830                     << 1. / (inPar.constAt(n, 3, 0) * tan(inPar.constAt(n, 5, 0))) << " r="
0831                     << std::sqrt(inPar.constAt(n, 0, 0) * inPar.constAt(n, 0, 0) +
0832                                  inPar.constAt(n, 1, 0) * inPar.constAt(n, 1, 0))
0833                     << " pT=" << 1. / std::abs(inPar.constAt(n, 3, 0)) << " q=" << inChg.constAt(n, 0, 0)
0834                     << " targetZ=" << msZ.constAt(n, 0, 0) << std::endl);
0835     }
0836 
0837     float pt[NN];
0838 #pragma omp simd
0839     for (int n = 0; n < NN; ++n) {
0840       pt[n] = 1.f / ipt[n];
0841     }
0842 
0843     //no trig approx here, phi can be large
0844     float cosP[NN];
0845     float sinP[NN];
0846 #pragma omp simd
0847     for (int n = 0; n < NN; ++n) {
0848       cosP[n] = std::cos(phiin[n]);
0849     }
0850 
0851 #pragma omp simd
0852     for (int n = 0; n < NN; ++n) {
0853       sinP[n] = std::sin(phiin[n]);
0854     }
0855 
0856     float cosT[NN];
0857     float sinT[NN];
0858 #pragma omp simd
0859     for (int n = 0; n < NN; ++n) {
0860       cosT[n] = std::cos(theta[n]);
0861     }
0862 
0863 #pragma omp simd
0864     for (int n = 0; n < NN; ++n) {
0865       sinT[n] = std::sin(theta[n]);
0866     }
0867 
0868     float tanT[NN];
0869     float icos2T[NN];
0870     float pxin[NN];
0871     float pyin[NN];
0872 #pragma omp simd
0873     for (int n = 0; n < NN; ++n) {
0874       tanT[n] = sinT[n] / cosT[n];
0875       icos2T[n] = 1.f / (cosT[n] * cosT[n]);
0876       pxin[n] = cosP[n] * pt[n];
0877       pyin[n] = sinP[n] * pt[n];
0878     }
0879 
0880     float deltaZ[NN];
0881     float alpha[NN];
0882 #pragma omp simd
0883     for (int n = 0; n < NN; ++n) {
0884       deltaZ[n] = zout[n] - zin[n];
0885       alpha[n] = deltaZ[n] * tanT[n] * ipt[n] * kinv[n];
0886     }
0887 
0888     float cosahTmp[NN];
0889     float sinahTmp[NN];
0890     if constexpr (Config::useTrigApprox) {
0891 #if !defined(__INTEL_COMPILER)
0892 #pragma omp simd
0893 #endif
0894       for (int n = 0; n < NN; ++n) {
0895         sincos4(alpha[n] * 0.5f, sinahTmp[n], cosahTmp[n]);
0896       }
0897     } else {
0898 #if !defined(__INTEL_COMPILER)
0899 #pragma omp simd
0900 #endif
0901       for (int n = 0; n < NN; ++n) {
0902         cosahTmp[n] = std::cos(alpha[n] * 0.5f);
0903       }
0904 #if !defined(__INTEL_COMPILER)
0905 #pragma omp simd
0906 #endif
0907       for (int n = 0; n < NN; ++n) {
0908         sinahTmp[n] = std::sin(alpha[n] * 0.5f);
0909       }
0910     }
0911 
0912     float cosah[NN];
0913     float sinah[NN];
0914     float cosa[NN];
0915     float sina[NN];
0916 #pragma omp simd
0917     for (int n = 0; n < NN; ++n) {
0918       cosah[n] = cosahTmp[n];
0919       sinah[n] = sinahTmp[n];
0920       cosa[n] = 1.f - 2.f * sinah[n] * sinah[n];
0921       sina[n] = 2.f * sinah[n] * cosah[n];
0922     }
0923 
0924 //update parameters
0925 #pragma omp simd
0926     for (int n = 0; n < NN; ++n) {
0927       outPar.At(n, 0, 0) = outPar.At(n, 0, 0) + 2.f * k[n] * sinah[n] * (pxin[n] * cosah[n] - pyin[n] * sinah[n]);
0928       outPar.At(n, 1, 0) = outPar.At(n, 1, 0) + 2.f * k[n] * sinah[n] * (pyin[n] * cosah[n] + pxin[n] * sinah[n]);
0929       outPar.At(n, 2, 0) = zout[n];
0930       outPar.At(n, 4, 0) = phiin[n] + alpha[n];
0931     }
0932 
0933 #pragma omp simd
0934     for (int n = 0; n < NN; ++n) {
0935       dprint_np(n,
0936                 "propagation to Z end (OLD), dump parameters\n"
0937                     << "   pos = " << outPar(n, 0, 0) << " " << outPar(n, 1, 0) << " " << outPar(n, 2, 0) << "\t\t r="
0938                     << std::sqrt(outPar(n, 0, 0) * outPar(n, 0, 0) + outPar(n, 1, 0) * outPar(n, 1, 0)) << std::endl
0939                     << "   mom = " << outPar(n, 3, 0) << " " << outPar(n, 4, 0) << " " << outPar(n, 5, 0) << std::endl
0940                     << " cart= " << std::cos(outPar(n, 4, 0)) / outPar(n, 3, 0) << " "
0941                     << std::sin(outPar(n, 4, 0)) / outPar(n, 3, 0) << " "
0942                     << 1. / (outPar(n, 3, 0) * tan(outPar(n, 5, 0))) << "\t\tpT=" << 1. / std::abs(outPar(n, 3, 0))
0943                     << std::endl);
0944     }
0945 
0946     float pxcaMpysa[NN];
0947 #pragma omp simd
0948     for (int n = 0; n < NN; ++n) {
0949       pxcaMpysa[n] = pxin[n] * cosa[n] - pyin[n] * sina[n];
0950     }
0951 
0952 #pragma omp simd
0953     for (int n = 0; n < NN; ++n) {
0954       errorProp(n, 0, 2) = -tanT[n] * ipt[n] * pxcaMpysa[n];
0955       errorProp(n, 0, 3) =
0956           k[n] * pt[n] * pt[n] *
0957           (cosP[n] * (alpha[n] * cosa[n] - sina[n]) + sinP[n] * 2.f * sinah[n] * (sinah[n] - alpha[n] * cosah[n]));
0958       errorProp(n, 0, 4) = -2.f * k[n] * pt[n] * sinah[n] * (sinP[n] * cosah[n] + cosP[n] * sinah[n]);
0959       errorProp(n, 0, 5) = deltaZ[n] * ipt[n] * pxcaMpysa[n] * icos2T[n];
0960     }
0961 
0962     float pycaPpxsa[NN];
0963 #pragma omp simd
0964     for (int n = 0; n < NN; ++n) {
0965       pycaPpxsa[n] = pyin[n] * cosa[n] + pxin[n] * sina[n];
0966     }
0967 
0968 #pragma omp simd
0969     for (int n = 0; n < NN; ++n) {
0970       errorProp(n, 1, 2) = -tanT[n] * ipt[n] * pycaPpxsa[n];
0971       errorProp(n, 1, 3) =
0972           k[n] * pt[n] * pt[n] *
0973           (sinP[n] * (alpha[n] * cosa[n] - sina[n]) - cosP[n] * 2.f * sinah[n] * (sinah[n] - alpha[n] * cosah[n]));
0974       errorProp(n, 1, 4) = 2.f * k[n] * pt[n] * sinah[n] * (cosP[n] * cosah[n] - sinP[n] * sinah[n]);
0975       errorProp(n, 1, 5) = deltaZ[n] * ipt[n] * pycaPpxsa[n] * icos2T[n];
0976     }
0977 
0978 #pragma omp simd
0979     for (int n = 0; n < NN; ++n) {
0980       errorProp(n, 4, 2) = -ipt[n] * tanT[n] * kinv[n];
0981       errorProp(n, 4, 3) = tanT[n] * deltaZ[n] * kinv[n];
0982       errorProp(n, 4, 5) = ipt[n] * deltaZ[n] * kinv[n] * icos2T[n];
0983     }
0984 
0985 #pragma omp simd
0986     for (int n = 0; n < NN; ++n) {
0987       dprint_np(
0988           n,
0989           "propagation end, dump parameters"
0990               << std::endl
0991               << "pos = " << outPar.At(n, 0, 0) << " " << outPar.At(n, 1, 0) << " " << outPar.At(n, 2, 0) << std::endl
0992               << "mom (cart) = " << std::cos(outPar.At(n, 4, 0)) / outPar.At(n, 3, 0) << " "
0993               << std::sin(outPar.At(n, 4, 0)) / outPar.At(n, 3, 0) << " "
0994               << 1. / (outPar.At(n, 3, 0) * tan(outPar.At(n, 5, 0)))
0995               << " r=" << std::sqrt(outPar.At(n, 0, 0) * outPar.At(n, 0, 0) + outPar.At(n, 1, 0) * outPar.At(n, 1, 0))
0996               << " pT=" << 1. / std::abs(outPar.At(n, 3, 0)) << std::endl);
0997     }
0998 
0999     // PROP-FAIL-ENABLE Disabled to keep physics changes minimal.
1000     // To be reviewed, enabled and processed accordingly elsewhere.
1001     /*
1002     // Check for errors, set fail-flag.
1003     for (int n = 0; n < NN; ++n) {
1004       // We propagate for alpha: mark fail when prop angle more than pi/2
1005       if (std::abs(alpha[n]) > 1.57) {
1006         dprintf("helixAtZ: more than quarter turn, alpha = %f\n", alpha[n]);
1007         outFailFlag[n] = 1;
1008       } else {
1009         // Have we reached desired z? We can't know, we copy desired z to actual z.
1010         // Are we close to apex? Same condition as in propToR, 12.5 deg, cos(78.5deg) = 0.2
1011         float dotp = (outPar.At(n, 0, 0) * std::cos(outPar.At(n, 4, 0)) +
1012                       outPar.At(n, 1, 0) * std::sin(outPar.At(n, 4, 0))) /
1013                      std::hypot(outPar.At(n, 0, 0), outPar.At(n, 1, 0));
1014         if (dotp < 0.2 || dotp < 0) {
1015           dprintf("helixAtZ: dot product bad, dotp = %f\n", dotp);
1016           outFailFlag[n] = 1;
1017         }
1018       }
1019     }
1020     */
1021 
1022 #ifdef DEBUG
1023     if (debug && g_debug) {
1024       for (int n = 0; n < N_proc; ++n) {
1025         dmutex_guard;
1026         std::cout << n << ": jacobian" << std::endl;
1027         printf("%5f %5f %5f %5f %5f %5f\n",
1028                errorProp(n, 0, 0),
1029                errorProp(n, 0, 1),
1030                errorProp(n, 0, 2),
1031                errorProp(n, 0, 3),
1032                errorProp(n, 0, 4),
1033                errorProp(n, 0, 5));
1034         printf("%5f %5f %5f %5f %5f %5f\n",
1035                errorProp(n, 1, 0),
1036                errorProp(n, 1, 1),
1037                errorProp(n, 1, 2),
1038                errorProp(n, 1, 3),
1039                errorProp(n, 1, 4),
1040                errorProp(n, 1, 5));
1041         printf("%5f %5f %5f %5f %5f %5f\n",
1042                errorProp(n, 2, 0),
1043                errorProp(n, 2, 1),
1044                errorProp(n, 2, 2),
1045                errorProp(n, 2, 3),
1046                errorProp(n, 2, 4),
1047                errorProp(n, 2, 5));
1048         printf("%5f %5f %5f %5f %5f %5f\n",
1049                errorProp(n, 3, 0),
1050                errorProp(n, 3, 1),
1051                errorProp(n, 3, 2),
1052                errorProp(n, 3, 3),
1053                errorProp(n, 3, 4),
1054                errorProp(n, 3, 5));
1055         printf("%5f %5f %5f %5f %5f %5f\n",
1056                errorProp(n, 4, 0),
1057                errorProp(n, 4, 1),
1058                errorProp(n, 4, 2),
1059                errorProp(n, 4, 3),
1060                errorProp(n, 4, 4),
1061                errorProp(n, 4, 5));
1062         printf("%5f %5f %5f %5f %5f %5f\n",
1063                errorProp(n, 5, 0),
1064                errorProp(n, 5, 1),
1065                errorProp(n, 5, 2),
1066                errorProp(n, 5, 3),
1067                errorProp(n, 5, 4),
1068                errorProp(n, 5, 5));
1069       }
1070     }
1071 #endif
1072   }
1073 
1074   void helixAtPlane(const MPlexLV& inPar,
1075                     const MPlexQI& inChg,
1076                     const MPlexHV& plPnt,
1077                     const MPlexHV& plNrm,
1078                     MPlexQF& pathL,
1079                     MPlexLV& outPar,
1080                     MPlexLL& errorProp,
1081                     MPlexQI& outFailFlag,
1082                     const int N_proc,
1083                     const PropagationFlags& pflags) {
1084     errorProp.setVal(0.f);
1085     outFailFlag.setVal(0.f);
1086 
1087     helixAtPlane_impl(inPar, inChg, plPnt, plNrm, pathL, outPar, errorProp, outFailFlag, 0, NN, N_proc, pflags);
1088   }
1089 
1090   void propagateHelixToPlaneMPlex(const MPlexLS& inErr,
1091                                   const MPlexLV& inPar,
1092                                   const MPlexQI& inChg,
1093                                   const MPlexHV& plPnt,
1094                                   const MPlexHV& plNrm,
1095                                   MPlexLS& outErr,
1096                                   MPlexLV& outPar,
1097                                   MPlexQI& outFailFlag,
1098                                   const int N_proc,
1099                                   const PropagationFlags& pflags,
1100                                   const MPlexQI* noMatEffPtr) {
1101     // debug = true;
1102 
1103     outErr = inErr;
1104     outPar = inPar;
1105 
1106     MPlexQF pathL;
1107     MPlexLL errorProp;
1108 
1109     helixAtPlane(inPar, inChg, plPnt, plNrm, pathL, outPar, errorProp, outFailFlag, N_proc, pflags);
1110 
1111     for (int n = 0; n < NN; ++n) {
1112       dprint_np(
1113           n,
1114           "propagation to plane end, dump parameters\n"
1115               //<< "   D = " << s[n] << " alpha = " << s[n] * std::sin(inPar(n, 5, 0)) * inPar(n, 3, 0) * kinv[n] << " kinv = " << kinv[n] << std::endl
1116               << "   pos = " << outPar(n, 0, 0) << " " << outPar(n, 1, 0) << " " << outPar(n, 2, 0) << "\t\t r="
1117               << std::sqrt(outPar(n, 0, 0) * outPar(n, 0, 0) + outPar(n, 1, 0) * outPar(n, 1, 0)) << std::endl
1118               << "   mom = " << outPar(n, 3, 0) << " " << outPar(n, 4, 0) << " " << outPar(n, 5, 0) << std::endl
1119               << " cart= " << std::cos(outPar(n, 4, 0)) / outPar(n, 3, 0) << " "
1120               << std::sin(outPar(n, 4, 0)) / outPar(n, 3, 0) << " " << 1. / (outPar(n, 3, 0) * tan(outPar(n, 5, 0)))
1121               << "\t\tpT=" << 1. / std::abs(outPar(n, 3, 0)) << std::endl);
1122     }
1123 
1124 #ifdef DEBUG
1125     if (debug && g_debug) {
1126       for (int kk = 0; kk < N_proc; ++kk) {
1127         dprintf("inPar %d\n", kk);
1128         for (int i = 0; i < 6; ++i) {
1129           dprintf("%8f ", inPar.constAt(kk, i, 0));
1130         }
1131         dprintf("\n");
1132         dprintf("inErr %d\n", kk);
1133         for (int i = 0; i < 6; ++i) {
1134           for (int j = 0; j < 6; ++j)
1135             dprintf("%8f ", inErr.constAt(kk, i, j));
1136           dprintf("\n");
1137         }
1138         dprintf("\n");
1139 
1140         for (int kk = 0; kk < N_proc; ++kk) {
1141           dprintf("plNrm %d\n", kk);
1142           for (int j = 0; j < 3; ++j)
1143             dprintf("%8f ", plNrm.constAt(kk, 0, j));
1144         }
1145         dprintf("\n");
1146 
1147         for (int kk = 0; kk < N_proc; ++kk) {
1148           dprintf("pathL %d\n", kk);
1149           for (int j = 0; j < 1; ++j)
1150             dprintf("%8f ", pathL.constAt(kk, 0, j));
1151         }
1152         dprintf("\n");
1153 
1154         dprintf("errorProp %d\n", kk);
1155         for (int i = 0; i < 6; ++i) {
1156           for (int j = 0; j < 6; ++j)
1157             dprintf("%8f ", errorProp.At(kk, i, j));
1158           dprintf("\n");
1159         }
1160         dprintf("\n");
1161       }
1162     }
1163 #endif
1164 
1165     // Matriplex version of:
1166     // result.errors = ROOT::Math::Similarity(errorProp, outErr);
1167     MPlexLL temp;
1168     MultHelixPropFull(errorProp, outErr, temp);
1169     MultHelixPropTranspFull(errorProp, temp, outErr);
1170 
1171 #ifdef DEBUG
1172     if (debug && g_debug) {
1173       for (int kk = 0; kk < N_proc; ++kk) {
1174         dprintf("outErr %d\n", kk);
1175         for (int i = 0; i < 6; ++i) {
1176           for (int j = 0; j < 6; ++j)
1177             dprintf("%8f ", outErr.constAt(kk, i, j));
1178           dprintf("\n");
1179         }
1180         dprintf("\n");
1181       }
1182     }
1183 #endif
1184 
1185     if (pflags.apply_material) {
1186       MPlexQF hitsRl;
1187       MPlexQF hitsXi;
1188       MPlexQF propSign;
1189 
1190       const TrackerInfo& tinfo = *pflags.tracker_info;
1191 
1192 #pragma omp simd
1193       for (int n = 0; n < NN; ++n) {
1194         if (n >= N_proc || (noMatEffPtr && noMatEffPtr->constAt(n, 0, 0))) {
1195           hitsRl(n, 0, 0) = 0.f;
1196           hitsXi(n, 0, 0) = 0.f;
1197         } else {
1198           const float hypo = std::hypot(outPar(n, 0, 0), outPar(n, 1, 0));
1199           auto mat = tinfo.material_checked(std::abs(outPar(n, 2, 0)), hypo);
1200           hitsRl(n, 0, 0) = mat.radl;
1201           hitsXi(n, 0, 0) = mat.bbxi;
1202         }
1203         propSign(n, 0, 0) = (pathL(n, 0, 0) > 0.f ? 1.f : -1.f);
1204       }
1205       applyMaterialEffects(hitsRl, hitsXi, propSign, plNrm, outErr, outPar, N_proc);
1206 #ifdef DEBUG
1207       if (debug && g_debug) {
1208         for (int kk = 0; kk < N_proc; ++kk) {
1209           dprintf("propSign %d\n", kk);
1210           for (int i = 0; i < 1; ++i) {
1211             dprintf("%8f ", propSign.constAt(kk, i, 0));
1212           }
1213           dprintf("\n");
1214           dprintf("plNrm %d\n", kk);
1215           for (int i = 0; i < 3; ++i) {
1216             dprintf("%8f ", plNrm.constAt(kk, i, 0));
1217           }
1218           dprintf("\n");
1219           dprintf("outErr(after material) %d\n", kk);
1220           for (int i = 0; i < 6; ++i) {
1221             for (int j = 0; j < 6; ++j)
1222               dprintf("%8f ", outErr.constAt(kk, i, j));
1223             dprintf("\n");
1224           }
1225           dprintf("\n");
1226         }
1227       }
1228 #endif
1229     }
1230 
1231     squashPhiMPlex(outPar, N_proc);  // ensure phi is between |pi|
1232 
1233     // PROP-FAIL-ENABLE To keep physics changes minimal, we always restore the
1234     // state to input when propagation fails -- as was the default before.
1235     // if (pflags.copy_input_state_on_fail) {
1236     for (int i = 0; i < N_proc; ++i) {
1237       if (outFailFlag(i, 0, 0)) {
1238         outPar.copySlot(i, inPar);
1239         outErr.copySlot(i, inErr);
1240       }
1241     }
1242     // }
1243   }
1244 
1245   //==============================================================================
1246 
1247   void applyMaterialEffects(const MPlexQF& hitsRl,
1248                             const MPlexQF& hitsXi,
1249                             const MPlexQF& propSign,
1250                             const MPlexHV& plNrm,
1251                             MPlexLS& outErr,
1252                             MPlexLV& outPar,
1253                             const int N_proc) {
1254 #pragma omp simd
1255     for (int n = 0; n < NN; ++n) {
1256       if (n >= N_proc)
1257         continue;
1258       float radL = hitsRl.constAt(n, 0, 0);
1259       if (radL < 1e-13f)
1260         continue;  //ugly, please fixme
1261       const float theta = outPar.constAt(n, 5, 0);
1262       // const float pt = 1.f / outPar.constAt(n, 3, 0);  //fixme, make sure it is positive?
1263       const float ipt = outPar.constAt(n, 3, 0);
1264       const float pt = 1.f / ipt;  //fixme, make sure it is positive?
1265       const float ipt2 = ipt * ipt;
1266       const float p = pt / std::sin(theta);
1267       const float pz = p * std::cos(theta);
1268       const float p2 = p * p;
1269       constexpr float mpi = 0.140;       // m=140 MeV, pion
1270       constexpr float mpi2 = mpi * mpi;  // m=140 MeV, pion
1271       const float beta2 = p2 / (p2 + mpi2);
1272       const float beta = std::sqrt(beta2);
1273       //radiation lenght, corrected for the crossing angle (cos alpha from dot product of radius vector and momentum)
1274       const float invCos =
1275           p / std::abs(pt * std::cos(outPar.constAt(n, 4, 0)) * plNrm.constAt(n, 0, 0) +
1276                        pt * std::sin(outPar.constAt(n, 4, 0)) * plNrm.constAt(n, 1, 0) + pz * plNrm.constAt(n, 2, 0));
1277       radL = radL * invCos;  //fixme works only for barrel geom
1278       // multiple scattering
1279       //vary independently phi and theta by the rms of the planar multiple scattering angle
1280       // XXX-KMD radL < 0, see your fixme above! Repeating bailout
1281       if (radL < 1e-13f)
1282         continue;
1283       // const float thetaMSC = 0.0136f*std::sqrt(radL)*(1.f+0.038f*std::log(radL))/(beta*p);// eq 32.15
1284       // const float thetaMSC2 = thetaMSC*thetaMSC;
1285       const float thetaMSC = 0.0136f * (1.f + 0.038f * std::log(radL)) / (beta * p);  // eq 32.15
1286       const float thetaMSC2 = thetaMSC * thetaMSC * radL;
1287       if constexpr (Config::usePtMultScat) {
1288         outErr.At(n, 3, 3) += thetaMSC2 * pz * pz * ipt2 * ipt2;
1289         outErr.At(n, 3, 5) -= thetaMSC2 * pz * ipt2;
1290         outErr.At(n, 4, 4) += thetaMSC2 * p2 * ipt2;
1291         outErr.At(n, 5, 5) += thetaMSC2;
1292       } else {
1293         outErr.At(n, 4, 4) += thetaMSC2;
1294         outErr.At(n, 5, 5) += thetaMSC2;
1295       }
1296       //std::cout << "beta=" << beta << " p=" << p << std::endl;
1297       //std::cout << "multiple scattering thetaMSC=" << thetaMSC << " thetaMSC2=" << thetaMSC2 << " radL=" << radL << std::endl;
1298       // energy loss
1299       // XXX-KMD beta2 = 1 => 1 / sqrt(0)
1300       // const float gamma = 1.f/std::sqrt(1.f - std::min(beta2, 0.999999f));
1301       // const float gamma2 = gamma*gamma;
1302       const float gamma2 = (p2 + mpi2) / mpi2;
1303       const float gamma = std::sqrt(gamma2);  //1.f/std::sqrt(1.f - std::min(beta2, 0.999999f));
1304       constexpr float me = 0.0005;            // m=0.5 MeV, electron
1305       const float wmax = 2.f * me * beta2 * gamma2 / (1.f + 2.f * gamma * me / mpi + me * me / (mpi * mpi));
1306       constexpr float I = 16.0e-9 * 10.75;
1307       const float deltahalf = std::log(28.816e-9f * std::sqrt(2.33f * 0.498f) / I) + std::log(beta * gamma) - 0.5f;
1308       const float dEdx =
1309           beta < 1.f
1310               ? (2.f * (hitsXi.constAt(n, 0, 0) * invCos *
1311                         (0.5f * std::log(2.f * me * beta2 * gamma2 * wmax / (I * I)) - beta2 - deltahalf) / beta2))
1312               : 0.f;  //protect against infs and nans
1313       // dEdx = dEdx*2.;//xi in cmssw is defined with an extra factor 0.5 with respect to formula 27.1 in pdg
1314       //std::cout << "dEdx=" << dEdx << " delta=" << deltahalf << " wmax=" << wmax << " Xi=" << hitsXi.constAt(n,0,0) << std::endl;
1315       const float dP = propSign.constAt(n, 0, 0) * dEdx / beta;
1316       outPar.At(n, 3, 0) = p / (std::max(p + dP, 0.001f) * pt);  //stay above 1MeV
1317       //assume 100% uncertainty
1318       outErr.At(n, 3, 3) += dP * dP / (p2 * pt * pt);
1319     }
1320   }
1321 
1322 }  // end namespace mkfit