Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-25 02:14:14

0001 #include "MkFinder.h"
0002 
0003 #include "RecoTracker/MkFitCore/interface/cms_common_macros.h"
0004 #include "RecoTracker/MkFitCore/interface/IterationConfig.h"
0005 #include "CandCloner.h"
0006 #include "FindingFoos.h"
0007 #include "KalmanUtilsMPlex.h"
0008 #include "MatriplexPackers.h"
0009 #include "MiniPropagators.h"
0010 
0011 //#define DEBUG
0012 #include "Debug.h"
0013 
0014 #if defined(MKFIT_STANDALONE)
0015 #include "RecoTracker/MkFitCore/standalone/Event.h"
0016 #endif
0017 
0018 #ifdef RNT_DUMP_MkF_SelHitIdcs
0019 // declares struct RntIfc_selectHitIndices rnt_shi in unnamed namespace;
0020 #include "RecoTracker/MkFitCore/standalone/RntDumper/MkFinder_selectHitIndices.icc"
0021 #endif
0022 
0023 #include "vdt/atan2.h"
0024 
0025 #include <algorithm>
0026 #include <queue>
0027 
0028 namespace mkfit {
0029 
0030   void MkFinder::setup(const PropagationConfig &pc,
0031                        const IterationConfig &ic,
0032                        const IterationParams &ip,
0033                        const IterationLayerConfig &ilc,
0034                        const SteeringParams &sp,
0035                        const std::vector<bool> *ihm,
0036                        const Event *ev,
0037                        int region,
0038                        bool infwd) {
0039     m_prop_config = &pc;
0040     m_iteration_config = &ic;
0041     m_iteration_params = &ip;
0042     m_iteration_layer_config = &ilc;
0043     m_steering_params = &sp;
0044     m_iteration_hit_mask = ihm;
0045     m_event = ev;
0046     m_current_region = region;
0047     m_in_fwd = infwd;
0048   }
0049 
0050   void MkFinder::setup_bkfit(const PropagationConfig &pc, const SteeringParams &sp, const Event *ev) {
0051     m_prop_config = &pc;
0052     m_steering_params = &sp;
0053     m_event = ev;
0054   }
0055 
0056   void MkFinder::release() {
0057     m_prop_config = nullptr;
0058     m_iteration_config = nullptr;
0059     m_iteration_params = nullptr;
0060     m_iteration_layer_config = nullptr;
0061     m_steering_params = nullptr;
0062     m_iteration_hit_mask = nullptr;
0063     m_event = nullptr;
0064     m_current_region = -1;
0065     m_in_fwd = true;
0066   }
0067 
0068   void MkFinder::begin_layer(const LayerOfHits &layer_of_hits) {
0069 #ifdef RNT_DUMP_MkF_SelHitIdcs
0070     const LayerOfHits &L = layer_of_hits;
0071     const LayerInfo &LI = L.layer_info();
0072     rnt_shi.ResetH();
0073     rnt_shi.ResetF();
0074     *rnt_shi.h = {m_event->evtID(),
0075                   m_iteration_config->m_iteration_index,
0076                   m_iteration_config->m_track_algorithm,
0077                   m_current_region,
0078                   L.layer_id(),
0079                   L.is_barrel() ? LI.rin() : LI.zmin(),
0080                   LI.is_barrel() ? LI.rout() : LI.zmax(),
0081                   L.is_barrel(),
0082                   L.is_pixel(),
0083                   L.is_stereo()};
0084     *rnt_shi.f = *rnt_shi.h;
0085 #endif
0086   }
0087 
0088   void MkFinder::end_layer() {
0089 #ifdef RNT_DUMP_MkF_SelHitIdcs
0090     rnt_shi.FillH();
0091     rnt_shi.FillF();
0092 #endif
0093   }
0094 
0095   //==============================================================================
0096   // Input / Output TracksAndHitIdx
0097   //==============================================================================
0098 
0099   void MkFinder::inputTracksAndHitIdx(const std::vector<Track> &tracks, int beg, int end, bool inputProp) {
0100     // Assign track parameters to initial state and copy hit values in.
0101 
0102     // This might not be true for the last chunk!
0103     // assert(end - beg == NN);
0104 
0105     const int iI = inputProp ? iP : iC;
0106 
0107     for (int i = beg, imp = 0; i < end; ++i, ++imp) {
0108       copy_in(tracks[i], imp, iI);
0109     }
0110   }
0111 
0112   void MkFinder::inputTracksAndHitIdx(
0113       const std::vector<Track> &tracks, const std::vector<int> &idxs, int beg, int end, bool inputProp, int mp_offset) {
0114     // Assign track parameters to initial state and copy hit values in.
0115 
0116     // This might not be true for the last chunk!
0117     // assert(end - beg == NN);
0118 
0119     const int iI = inputProp ? iP : iC;
0120 
0121     for (int i = beg, imp = mp_offset; i < end; ++i, ++imp) {
0122       copy_in(tracks[idxs[i]], imp, iI);
0123     }
0124   }
0125 
0126   void MkFinder::inputTracksAndHitIdx(const std::vector<CombCandidate> &tracks,
0127                                       const std::vector<std::pair<int, int>> &idxs,
0128                                       int beg,
0129                                       int end,
0130                                       bool inputProp) {
0131     // Assign track parameters to initial state and copy hit values in.
0132 
0133     // This might not be true for the last chunk!
0134     // assert(end - beg == NN);
0135 
0136     const int iI = inputProp ? iP : iC;
0137 
0138     for (int i = beg, imp = 0; i < end; ++i, ++imp) {
0139       const TrackCand &trk = tracks[idxs[i].first][idxs[i].second];
0140 
0141       copy_in(trk, imp, iI);
0142 
0143       m_SeedIdx(imp, 0, 0) = idxs[i].first;
0144       m_CandIdx(imp, 0, 0) = idxs[i].second;
0145       m_SeedOriginIdx[imp] = tracks[idxs[i].first].seed_origin_index();
0146     }
0147   }
0148 
0149   void MkFinder::inputTracksAndHits(const std::vector<CombCandidate> &tracks,
0150                                     const LayerOfHits &layer_of_hits,
0151                                     const std::vector<UpdateIndices> &idxs,
0152                                     int beg,
0153                                     int end,
0154                                     bool inputProp) {
0155     // Assign track parameters to initial state and copy hit values in.
0156 
0157     // This might not be true for the last chunk!
0158     // assert(end - beg == NN);
0159 
0160     const int iI = inputProp ? iP : iC;
0161 
0162     for (int i = beg, imp = 0; i < end; ++i, ++imp) {
0163       const TrackCand &trk = tracks[idxs[i].seed_idx][idxs[i].cand_idx];
0164 
0165       copy_in(trk, imp, iI);
0166 
0167       m_SeedIdx(imp, 0, 0) = idxs[i].seed_idx;
0168       m_CandIdx(imp, 0, 0) = idxs[i].cand_idx;
0169       m_SeedOriginIdx[imp] = tracks[idxs[i].seed_idx].seed_origin_index();
0170 
0171       // Reuse selectHitIndices() arrays -- used also in packModuleNormDir()
0172       m_XHitArr(imp, 0, 0) = idxs[i].hit_idx;
0173       m_XHitSize(imp, 0, 0) = 1;
0174 
0175       const Hit &hit = layer_of_hits.refHit(idxs[i].hit_idx);
0176       m_msErr.copyIn(imp, hit.errArray());
0177       m_msPar.copyIn(imp, hit.posArray());
0178     }
0179   }
0180 
0181   void MkFinder::inputOverlapHits(const LayerOfHits &layer_of_hits,
0182                                   const std::vector<UpdateIndices> &idxs,
0183                                   int beg,
0184                                   int end) {
0185     // Copy overlap hit values in.
0186 
0187     for (int i = beg, imp = 0; i < end; ++i, ++imp) {
0188       const Hit &hit = layer_of_hits.refHit(idxs[i].ovlp_idx);
0189       m_msErr.copyIn(imp, hit.errArray());
0190       m_msPar.copyIn(imp, hit.posArray());
0191     }
0192   }
0193 
0194   void MkFinder::inputTracksAndHitIdx(const std::vector<CombCandidate> &tracks,
0195                                       const std::vector<std::pair<int, IdxChi2List>> &idxs,
0196                                       int beg,
0197                                       int end,
0198                                       bool inputProp) {
0199     // Assign track parameters to initial state and copy hit values in.
0200 
0201     // This might not be true for the last chunk!
0202     // assert(end - beg == NN);
0203 
0204     const int iI = inputProp ? iP : iC;
0205 
0206     for (int i = beg, imp = 0; i < end; ++i, ++imp) {
0207       const TrackCand &trk = tracks[idxs[i].first][idxs[i].second.trkIdx];
0208 
0209       copy_in(trk, imp, iI);
0210 
0211       m_SeedIdx(imp, 0, 0) = idxs[i].first;
0212       m_CandIdx(imp, 0, 0) = idxs[i].second.trkIdx;
0213       m_SeedOriginIdx[imp] = tracks[idxs[i].first].seed_origin_index();
0214     }
0215   }
0216 
0217   void MkFinder::outputTracksAndHitIdx(std::vector<Track> &tracks, int beg, int end, bool outputProp) const {
0218     // Copies requested track parameters into Track objects.
0219     // The tracks vector should be resized to allow direct copying.
0220 
0221     const int iO = outputProp ? iP : iC;
0222 
0223     for (int i = beg, imp = 0; i < end; ++i, ++imp) {
0224       copy_out(tracks[i], imp, iO);
0225     }
0226   }
0227 
0228   void MkFinder::outputTracksAndHitIdx(
0229       std::vector<Track> &tracks, const std::vector<int> &idxs, int beg, int end, bool outputProp) const {
0230     // Copies requested track parameters into Track objects.
0231     // The tracks vector should be resized to allow direct copying.
0232 
0233     const int iO = outputProp ? iP : iC;
0234 
0235     for (int i = beg, imp = 0; i < end; ++i, ++imp) {
0236       copy_out(tracks[idxs[i]], imp, iO);
0237     }
0238   }
0239 
0240   void MkFinder::packModuleNormDir(
0241       const LayerOfHits &layer_of_hits, int hit_cnt, MPlexHV &norm, MPlexHV &dir, int N_proc) const {
0242     for (int itrack = 0; itrack < NN; ++itrack) {
0243       if (itrack < N_proc && hit_cnt < m_XHitSize[itrack]) {
0244         const auto &hit = layer_of_hits.refHit(m_XHitArr.constAt(itrack, hit_cnt, 0));
0245         unsigned int mid = hit.detIDinLayer();
0246         const ModuleInfo &mi = layer_of_hits.layer_info().module_info(mid);
0247         norm.At(itrack, 0, 0) = mi.zdir[0];
0248         norm.At(itrack, 1, 0) = mi.zdir[1];
0249         norm.At(itrack, 2, 0) = mi.zdir[2];
0250         dir.At(itrack, 0, 0) = mi.xdir[0];
0251         dir.At(itrack, 1, 0) = mi.xdir[1];
0252         dir.At(itrack, 2, 0) = mi.xdir[2];
0253       }
0254     }
0255   }
0256 
0257   //==============================================================================
0258   // getHitSelDynamicWindows
0259   //==============================================================================
0260   // From HitSelectionWindows.h: track-related config on hit selection windows
0261 
0262   void MkFinder::getHitSelDynamicWindows(
0263       const float invpt, const float theta, float &min_dq, float &max_dq, float &min_dphi, float &max_dphi) {
0264     float max_invpt = std::min(invpt, 10.0f);  // => pT>0.1 GeV
0265 
0266     enum SelWinParameters_e { dp_sf = 0, dp_0, dp_1, dp_2, dq_sf, dq_0, dq_1, dq_2 };
0267     auto &v = m_iteration_layer_config->get_window_params(m_in_fwd, true);
0268 
0269     if (!v.empty()) {
0270       // dq hit selection window
0271       const float this_dq = v[dq_sf] * (v[dq_0] * max_invpt + v[dq_1] * theta + v[dq_2]);
0272       // In case value is below 0 (bad window derivation or other reasons), leave original limits
0273       if (this_dq > 0.f) {
0274         min_dq = this_dq;
0275         max_dq = 2.0f * min_dq;
0276       }
0277 
0278       // dphi hit selection window
0279       const float this_dphi = v[dp_sf] * (v[dp_0] * max_invpt + v[dp_1] * theta + v[dp_2]);
0280       // In case value is too low (bad window derivation or other reasons), leave original limits
0281       if (this_dphi > min_dphi) {
0282         min_dphi = this_dphi;
0283         max_dphi = 2.0f * min_dphi;
0284       }
0285     }
0286   }
0287 
0288   //==============================================================================
0289   // getHitSelDynamicChi2Cut
0290   //==============================================================================
0291   // From HitSelectionWindows.h: track-related config on hit selection windows
0292 
0293   inline float MkFinder::getHitSelDynamicChi2Cut(const int itrk, const int ipar) {
0294     const float minChi2Cut = m_iteration_params->chi2Cut_min;
0295     const float invpt = m_Par[ipar].At(itrk, 3, 0);
0296     const float theta = std::abs(m_Par[ipar].At(itrk, 5, 0) - Const::PIOver2);
0297 
0298     float max_invpt = std::min(invpt, 10.0f);  // => pT>0.1 GeV
0299 
0300     enum SelWinParameters_e { c2_sf = 8, c2_0, c2_1, c2_2 };
0301     auto &v = m_iteration_layer_config->get_window_params(m_in_fwd, true);
0302 
0303     if (!v.empty()) {
0304       float this_c2 = v[c2_sf] * (v[c2_0] * max_invpt + v[c2_1] * theta + v[c2_2]);
0305       // In case value is too low (bad window derivation or other reasons), leave original limits
0306       if (this_c2 > minChi2Cut)
0307         return this_c2;
0308     }
0309     return minChi2Cut;
0310   }
0311 
0312   //==============================================================================
0313   // SelectHitIndices
0314   //==============================================================================
0315 
0316   void MkFinder::selectHitIndices(const LayerOfHits &layer_of_hits, const int N_proc, bool fill_binsearch_only) {
0317     // bool debug = true;
0318     using bidx_t = LayerOfHits::bin_index_t;
0319     using bcnt_t = LayerOfHits::bin_content_t;
0320     const LayerOfHits &L = layer_of_hits;
0321     const IterationLayerConfig &ILC = *m_iteration_layer_config;
0322 
0323     const int iI = iP;
0324     const float nSigmaPhi = 3;
0325     const float nSigmaZ = 3;
0326     const float nSigmaR = 3;
0327 
0328     dprintf("LayerOfHits::SelectHitIndices %s layer=%d N_proc=%d\n",
0329             L.is_barrel() ? "barrel" : "endcap",
0330             L.layer_id(),
0331             N_proc);
0332 
0333     float dqv[NN], dphiv[NN], qv[NN], phiv[NN];
0334     bidx_t qb1v[NN], qb2v[NN], qbv[NN], pb1v[NN], pb2v[NN];
0335 
0336     const auto assignbins = [&](int itrack,
0337                                 float q,
0338                                 float dq,
0339                                 float phi,
0340                                 float dphi,
0341                                 float min_dq,
0342                                 float max_dq,
0343                                 float min_dphi,
0344                                 float max_dphi) {
0345       dphi = std::clamp(std::abs(dphi), min_dphi, max_dphi);
0346       dq = std::clamp(dq, min_dq, max_dq);
0347       //
0348       qv[itrack] = q;
0349       phiv[itrack] = phi;
0350       dphiv[itrack] = dphi;
0351       dqv[itrack] = dq;
0352       //
0353       qbv[itrack] = L.qBinChecked(q);
0354       qb1v[itrack] = L.qBinChecked(q - dq);
0355       qb2v[itrack] = L.qBinChecked(q + dq) + 1;
0356       pb1v[itrack] = L.phiBinChecked(phi - dphi);
0357       pb2v[itrack] = L.phiMaskApply(L.phiBin(phi + dphi) + 1);
0358     };
0359 
0360     const auto calcdphi2 = [&](int itrack, float dphidx, float dphidy) {
0361       return dphidx * dphidx * m_Err[iI].constAt(itrack, 0, 0) + dphidy * dphidy * m_Err[iI].constAt(itrack, 1, 1) +
0362              2 * dphidx * dphidy * m_Err[iI].constAt(itrack, 0, 1);
0363     };
0364 
0365     const auto calcdphi = [&](float dphi2, float min_dphi) {
0366       return std::max(nSigmaPhi * std::sqrt(std::abs(dphi2)), min_dphi);
0367     };
0368 
0369     if (L.is_barrel()) {
0370       // Pull out the part of the loop that vectorizes with icc and gcc
0371       // In llvm16 clang issues a warning that it can't vectorize
0372       // the loop.  Unfortunately, there doesn't seem to be a
0373       // pragma to suppress the warning, so we ifdef it out.  This
0374       // should be rechecked if llvm vectorization improves.
0375 #if !defined(__clang__)
0376 #pragma omp simd
0377 #endif
0378       for (int itrack = 0; itrack < NN; ++itrack) {
0379         if (itrack >= N_proc)
0380           continue;
0381         m_XHitSize[itrack] = 0;
0382 
0383         float min_dq = ILC.min_dq();
0384         float max_dq = ILC.max_dq();
0385         float min_dphi = ILC.min_dphi();
0386         float max_dphi = ILC.max_dphi();
0387 
0388         const float invpt = m_Par[iI].At(itrack, 3, 0);
0389         const float theta = std::fabs(m_Par[iI].At(itrack, 5, 0) - Const::PIOver2);
0390         getHitSelDynamicWindows(invpt, theta, min_dq, max_dq, min_dphi, max_dphi);
0391 
0392         const float x = m_Par[iI].constAt(itrack, 0, 0);
0393         const float y = m_Par[iI].constAt(itrack, 1, 0);
0394         const float r2 = x * x + y * y;
0395         const float dphidx = -y / r2, dphidy = x / r2;
0396         const float dphi2 = calcdphi2(itrack, dphidx, dphidy);
0397 #ifdef HARD_CHECK
0398         assert(dphi2 >= 0);
0399 #endif
0400 
0401         const float phi = getPhi(x, y);
0402         float dphi = calcdphi(dphi2, min_dphi);
0403 
0404         const float z = m_Par[iI].constAt(itrack, 2, 0);
0405         const float dz = std::abs(nSigmaZ * std::sqrt(m_Err[iI].constAt(itrack, 2, 2)));
0406         const float edgeCorr =
0407             std::abs(0.5f * (L.layer_info().rout() - L.layer_info().rin()) / std::tan(m_Par[iI].constAt(itrack, 5, 0)));
0408         // XXX-NUM-ERR above, m_Err(2,2) gets negative!
0409 
0410         m_XWsrResult[itrack] = L.is_within_z_sensitive_region(z, std::sqrt(dz * dz + edgeCorr * edgeCorr));
0411         assignbins(itrack, z, dz, phi, dphi, min_dq, max_dq, min_dphi, max_dphi);
0412 
0413         // Relax propagation-fail detection to be in line with pre-43145.
0414         if (m_FailFlag[itrack] && std::sqrt(r2) >= L.layer_info().rin()) {
0415           m_FailFlag[itrack] = 0;
0416         }
0417       }
0418     } else  // endcap
0419     {
0420       //layer half-thikness for dphi spread calculation; only for very restrictive iters
0421       const float layerD = std::abs(L.layer_info().zmax() - L.layer_info().zmin()) * 0.5f *
0422                            (m_iteration_params->maxConsecHoles == 0 || m_iteration_params->maxHolesPerCand == 0);
0423       // Pull out the part of the loop that vectorizes with icc and gcc
0424 #if !defined(__clang__)
0425 #pragma omp simd
0426 #endif
0427       for (int itrack = 0; itrack < NN; ++itrack) {
0428         m_XHitSize[itrack] = 0;
0429 
0430         float min_dq = ILC.min_dq();
0431         float max_dq = ILC.max_dq();
0432         float min_dphi = ILC.min_dphi();
0433         float max_dphi = ILC.max_dphi();
0434 
0435         const float invpt = m_Par[iI].At(itrack, 3, 0);
0436         const float theta = std::fabs(m_Par[iI].At(itrack, 5, 0) - Const::PIOver2);
0437         getHitSelDynamicWindows(invpt, theta, min_dq, max_dq, min_dphi, max_dphi);
0438 
0439         const float x = m_Par[iI].constAt(itrack, 0, 0);
0440         const float y = m_Par[iI].constAt(itrack, 1, 0);
0441         const float r2 = x * x + y * y;
0442         const float r2Inv = 1.f / r2;
0443         const float dphidx = -y * r2Inv, dphidy = x * r2Inv;
0444         const float phi = getPhi(x, y);
0445         const float dphi2 =
0446             calcdphi2(itrack, dphidx, dphidy)
0447             //range from finite layer thickness
0448             + std::pow(layerD * std::tan(m_Par[iI].At(itrack, 5, 0)) * std::sin(m_Par[iI].At(itrack, 4, 0) - phi), 2) *
0449                   r2Inv;
0450 #ifdef HARD_CHECK
0451         assert(dphi2 >= 0);
0452 #endif
0453 
0454         float dphi = calcdphi(dphi2, min_dphi);
0455 
0456         const float r = std::sqrt(r2);
0457         const float dr = nSigmaR * std::sqrt(std::abs(x * x * m_Err[iI].constAt(itrack, 0, 0) +
0458                                                       y * y * m_Err[iI].constAt(itrack, 1, 1) +
0459                                                       2 * x * y * m_Err[iI].constAt(itrack, 0, 1)) /
0460                                              r2);
0461         const float edgeCorr = std::abs(0.5f * (L.layer_info().zmax() - L.layer_info().zmin()) *
0462                                         std::tan(m_Par[iI].constAt(itrack, 5, 0)));
0463 
0464         m_XWsrResult[itrack] = L.is_within_r_sensitive_region(r, std::sqrt(dr * dr + edgeCorr * edgeCorr));
0465         assignbins(itrack, r, dr, phi, dphi, min_dq, max_dq, min_dphi, max_dphi);
0466       }
0467     }
0468 
0469 #ifdef RNT_DUMP_MkF_SelHitIdcs
0470     if (fill_binsearch_only) {
0471       // XXX loop over good indices (prepared in V2) and put in V1 BinSearch results
0472       for (auto i : rnt_shi.f_h_idcs) {
0473         CandInfo &ci = (*rnt_shi.ci)[rnt_shi.f_h_remap[i]];
0474         ci.bso = BinSearch({phiv[i],
0475                             dphiv[i],
0476                             qv[i],
0477                             dqv[i],
0478                             pb1v[i],
0479                             pb2v[i],
0480                             qb1v[i],
0481                             qb2v[i],
0482                             m_XWsrResult[i].m_wsr,
0483                             m_XWsrResult[i].m_in_gap,
0484                             false});
0485       }
0486       return;
0487     }
0488 #endif
0489 
0490     // Vectorizing this makes it run slower!
0491     //#pragma omp simd
0492     for (int itrack = 0; itrack < NN; ++itrack) {
0493       if (itrack >= N_proc) {
0494         continue;
0495       }
0496       // PROP-FAIL-ENABLE The following to be enabled when propagation failure
0497       // detection is properly implemented in propagate-to-R/Z.
0498       if (m_FailFlag[itrack]) {
0499         m_XWsrResult[itrack].m_wsr = WSR_Failed;
0500         continue;
0501       }
0502 
0503       if (m_XWsrResult[itrack].m_wsr == WSR_Outside) {
0504         continue;
0505       }
0506 
0507       const bidx_t qb = qbv[itrack];
0508       const bidx_t qb1 = qb1v[itrack];
0509       const bidx_t qb2 = qb2v[itrack];
0510       const bidx_t pb1 = pb1v[itrack];
0511       const bidx_t pb2 = pb2v[itrack];
0512 
0513       const float q = qv[itrack];
0514       const float phi = phiv[itrack];
0515       const float dphi = dphiv[itrack];
0516       const float dq = dqv[itrack];
0517 
0518       // clang-format off
0519       dprintf("  %2d/%2d: %6.3f %6.3f %6.6f %7.5f %3u %3u %4u %4u\n",
0520               L.layer_id(), itrack, q, phi, dq, dphi,
0521               qb1, qb2, pb1, pb2);
0522       // clang-format on
0523 
0524 #if defined(DUMPHITWINDOW) && defined(MKFIT_STANDALONE)
0525       const auto ngr = [](float f) { return isFinite(f) ? f : -999.0f; };
0526 
0527       const int seed_lbl = m_event->currentSeed(m_SeedOriginIdx[itrack]).label();
0528       Event::SimLabelFromHits slfh = m_event->simLabelForCurrentSeed(m_SeedOriginIdx[itrack]);
0529       const int seed_mcid = (slfh.is_set() && slfh.good_frac() > 0.7f) ? slfh.label : -999999;
0530 #endif
0531 
0532       for (bidx_t qi = qb1; qi != qb2; ++qi) {
0533         for (bidx_t pi = pb1; pi != pb2; pi = L.phiMaskApply(pi + 1)) {
0534           // Limit to central Q-bin
0535           if (qi == qb && L.isBinDead(pi, qi) == true) {
0536             dprint("dead module for track in layer=" << L.layer_id() << " qb=" << qi << " pi=" << pi << " q=" << q
0537                                                      << " phi=" << phi);
0538             m_XWsrResult[itrack].m_in_gap = true;
0539           }
0540 
0541           // MT: The following line is the biggest hog (4% total run time).
0542           // This comes from cache misses, I presume.
0543           // It might make sense to make first loop to extract bin indices
0544           // and issue prefetches at the same time.
0545           // Then enter vectorized loop to actually collect the hits in proper order.
0546 
0547           //SK: ~20x1024 bin sizes give mostly 1 hit per bin. Commented out for 128 bins or less
0548           // #pragma nounroll
0549           auto pbi = L.phiQBinContent(pi, qi);
0550           for (bcnt_t hi = pbi.begin(); hi < pbi.end(); ++hi) {
0551             // MT: Access into m_hit_zs and m_hit_phis is 1% run-time each.
0552 
0553             const unsigned int hi_orig = L.getOriginalHitIndex(hi);
0554 
0555             if (m_iteration_hit_mask && (*m_iteration_hit_mask)[hi_orig]) {
0556               dprintf(
0557                   "Yay, denying masked hit on layer %u, hi %u, orig idx %u\n", L.layer_info().layer_id(), hi, hi_orig);
0558               continue;
0559             }
0560 
0561             if (Config::usePhiQArrays) {
0562               if (m_XHitSize[itrack] >= MPlexHitIdxMax)
0563                 break;
0564 
0565               const float ddq = std::abs(q - L.hit_q(hi));
0566               const float ddphi = cdist(std::abs(phi - L.hit_phi(hi)));
0567 
0568               // clang-format off
0569               dprintf("     SHI %3u %4u %5u  %6.3f %6.3f %6.4f %7.5f   %s\n",
0570                       qi, pi, hi, L.hit_q(hi), L.hit_phi(hi),
0571                       ddq, ddphi, (ddq < dq && ddphi < dphi) ? "PASS" : "FAIL");
0572               // clang-format on
0573 
0574 #if defined(DUMPHITWINDOW) && defined(MKFIT_STANDALONE)
0575               // clang-format off
0576               MPlexQF thisOutChi2;
0577               {
0578                 const MCHitInfo &mchinfo = m_event->simHitsInfo_[L.refHit(hi).mcHitID()];
0579                 int mchid = mchinfo.mcTrackID();
0580                 int st_isfindable = 0;
0581                 int st_label = -999999;
0582                 int st_prodtype = 0;
0583                 int st_nhits = -1;
0584                 int st_charge = 0;
0585                 float st_r = -999.;
0586                 float st_z = -999.;
0587                 float st_pt = -999.;
0588                 float st_eta = -999.;
0589                 float st_phi = -999.;
0590                 if (mchid >= 0) {
0591                   Track simtrack = m_event->simTracks_[mchid];
0592                   st_isfindable = (int)simtrack.isFindable();
0593                   st_label = simtrack.label();
0594                   st_prodtype = (int)simtrack.prodType();
0595                   st_pt = simtrack.pT();
0596                   st_eta = simtrack.momEta();
0597                   st_phi = simtrack.momPhi();
0598                   st_nhits = simtrack.nTotalHits();
0599                   st_charge = simtrack.charge();
0600                   st_r = simtrack.posR();
0601                   st_z = simtrack.z();
0602                 }
0603 
0604                 const Hit &thishit = L.refHit(hi_orig);
0605                 m_msErr.copyIn(itrack, thishit.errArray());
0606                 m_msPar.copyIn(itrack, thishit.posArray());
0607 
0608                 MPlexQI propFail;
0609                 MPlexLV tmpPropPar;
0610                 const FindingFoos &fnd_foos = FindingFoos::get_finding_foos(L.is_barrel());
0611                 (*fnd_foos.m_compute_chi2_foo)(m_Err[iI],
0612                                                m_Par[iI],
0613                                                m_Chg,
0614                                                m_msErr,
0615                                                m_msPar,
0616                                                thisOutChi2,
0617                                                tmpPropPar,
0618                                                propFail,
0619                                                N_proc,
0620                                                m_prop_config->finding_intra_layer_pflags,
0621                                                m_prop_config->finding_requires_propagation_to_hit_pos);
0622 
0623                 float hx = thishit.x();
0624                 float hy = thishit.y();
0625                 float hz = thishit.z();
0626                 float hr = std::hypot(hx, hy);
0627                 float hphi = std::atan2(hy, hx);
0628                 float hex = ngr( std::sqrt(thishit.exx()) );
0629                 float hey = ngr( std::sqrt(thishit.eyy()) );
0630                 float hez = ngr( std::sqrt(thishit.ezz()) );
0631                 float her = ngr( std::sqrt(
0632                     (hx * hx * thishit.exx() + hy * hy * thishit.eyy() + 2.0f * hx * hy * m_msErr.At(itrack, 0, 1)) /
0633                     (hr * hr)) );
0634                 float hephi = ngr( std::sqrt(thishit.ephi()) );
0635                 float hchi2 = ngr( thisOutChi2[itrack] );
0636                 float tx = m_Par[iI].At(itrack, 0, 0);
0637                 float ty = m_Par[iI].At(itrack, 1, 0);
0638                 float tz = m_Par[iI].At(itrack, 2, 0);
0639                 float tr = std::hypot(tx, ty);
0640                 float tphi = std::atan2(ty, tx);
0641                 // float tchi2 = ngr( m_Chi2(itrack, 0, 0) ); // unused
0642                 float tex = ngr( std::sqrt(m_Err[iI].At(itrack, 0, 0)) );
0643                 float tey = ngr( std::sqrt(m_Err[iI].At(itrack, 1, 1)) );
0644                 float tez = ngr( std::sqrt(m_Err[iI].At(itrack, 2, 2)) );
0645                 float ter = ngr( std::sqrt(
0646                     (tx * tx * tex * tex + ty * ty * tey * tey + 2.0f * tx * ty * m_Err[iI].At(itrack, 0, 1)) /
0647                     (tr * tr)) );
0648                 float tephi = ngr( std::sqrt(
0649                     (ty * ty * tex * tex + tx * tx * tey * tey - 2.0f * tx * ty * m_Err[iI].At(itrack, 0, 1)) /
0650                     (tr * tr * tr * tr)) );
0651                 float ht_dxy = std::hypot(hx - tx, hy - ty);
0652                 float ht_dz = hz - tz;
0653                 float ht_dphi = cdist(std::abs(hphi - tphi));
0654 
0655                 static bool first = true;
0656                 if (first) {
0657                   printf(
0658                       "HITWINDOWSEL "
0659                       "evt_id/I:track_algo/I:"
0660                       "lyr_id/I:lyr_isbrl/I:hit_idx/I:"
0661                       "trk_cnt/I:trk_idx/I:trk_label/I:"
0662                       "trk_pt/F:trk_eta/F:trk_mphi/F:trk_chi2/F:"
0663                       "nhits/I:"
0664                       "seed_idx/I:seed_label/I:seed_mcid/I:"
0665                       "hit_mcid/I:"
0666                       "st_isfindable/I:st_prodtype/I:st_label/I:"
0667                       "st_pt/F:st_eta/F:st_phi/F:"
0668                       "st_nhits/I:st_charge/I:st_r/F:st_z/F:"
0669                       "trk_q/F:hit_q/F:dq_trkhit/F:dq_cut/F:trk_phi/F:hit_phi/F:dphi_trkhit/F:dphi_cut/F:"
0670                       "t_x/F:t_y/F:t_r/F:t_phi/F:t_z/F:"
0671                       "t_ex/F:t_ey/F:t_er/F:t_ephi/F:t_ez/F:"
0672                       "h_x/F:h_y/F:h_r/F:h_phi/F:h_z/F:"
0673                       "h_ex/F:h_ey/F:h_er/F:h_ephi/F:h_ez/F:"
0674                       "ht_dxy/F:ht_dz/F:ht_dphi/F:"
0675                       "h_chi2/F"
0676                       "\n");
0677                   first = false;
0678                 }
0679 
0680                 if (!(std::isnan(phi)) && !(std::isnan(getEta(m_Par[iI].At(itrack, 5, 0))))) {
0681                   //|| std::isnan(ter) || std::isnan(her) || std::isnan(m_Chi2(itrack, 0, 0)) || std::isnan(hchi2)))
0682                   printf("HITWINDOWSEL "
0683                          "%d %d"
0684                          "%d %d %d "
0685                          "%d %d %d "
0686                          "%6.3f %6.3f %6.3f %6.3f "
0687                          "%d "
0688                          "%d %d %d "
0689                          "%d "
0690                          "%d %d %d "
0691                          "%6.3f %6.3f %6.3f "
0692                          "%d %d %6.3f %6.3f "
0693                          "%6.3f %6.3f %6.3f %6.3f %6.3f %6.3f %6.3f %6.3f "
0694                          "%6.3f %6.3f %6.3f %6.3f %6.3f "
0695                          "%6.6f %6.6f %6.6f %6.6f %6.6f "
0696                          "%6.3f %6.3f %6.3f %6.3f %6.3f "
0697                          "%6.6f %6.6f %6.6f %6.6f %6.6f "
0698                          "%6.3f %6.3f %6.3f "
0699                          "%6.3f"
0700                          "\n",
0701                          m_event->evtID(), m_iteration_config->m_track_algorithm,
0702                          L.layer_id(), L.is_barrel(), hi_orig,
0703                          itrack, m_CandIdx(itrack, 0, 0), m_Label(itrack, 0, 0),
0704                          1.0f / m_Par[iI].At(itrack, 3, 0), getEta(m_Par[iI].At(itrack, 5, 0)), m_Par[iI].At(itrack, 4, 0), m_Chi2(itrack, 0, 0),
0705                          m_NFoundHits(itrack, 0, 0),
0706                          m_SeedOriginIdx(itrack, 0, 0), seed_lbl, seed_mcid,
0707                          mchid,
0708                          st_isfindable, st_prodtype, st_label,
0709                          st_pt, st_eta, st_phi,
0710                          st_nhits, st_charge, st_r, st_z,
0711                          q, L.hit_q(hi), ddq, dq, phi, L.hit_phi(hi), ddphi, dphi,
0712                          tx, ty, tr, tphi, tz,
0713                          tex, tey, ter, tephi, tez,
0714                          hx, hy, hr, hphi, hz,
0715                          hex, hey, her, hephi, hez,
0716                          ht_dxy, ht_dz, ht_dphi,
0717                          hchi2);
0718                 }
0719               }
0720               // clang-format on
0721 #endif
0722 
0723               if (ddq >= dq)
0724                 continue;
0725               if (ddphi >= dphi)
0726                 continue;
0727 
0728               // MT: Removing extra check gives full efficiency ...
0729               //     and means our error estimations are wrong!
0730               // Avi says we should have *minimal* search windows per layer.
0731               // Also ... if bins are sufficiently small, we do not need the extra
0732               // checks, see above.
0733               m_XHitArr.At(itrack, m_XHitSize[itrack]++, 0) = hi_orig;
0734             } else {
0735               // MT: The following check alone makes more sense with spiral traversal,
0736               // we'd be taking in closest hits first.
0737 
0738               // Hmmh -- there used to be some more checks here.
0739               // Or, at least, the phi binning was much smaller and no further checks were done.
0740               assert(false && "this code has not been used in a while -- see comments in code");
0741 
0742               if (m_XHitSize[itrack] < MPlexHitIdxMax) {
0743                 m_XHitArr.At(itrack, m_XHitSize[itrack]++, 0) = hi_orig;
0744               }
0745             }
0746           }  //hi
0747         }    //pi
0748       }      //qi
0749     }        //itrack
0750   }
0751 
0752   //==============================================================================
0753   // SelectHitIndicesV2
0754   //==============================================================================
0755 
0756   void MkFinder::selectHitIndicesV2(const LayerOfHits &layer_of_hits, const int N_proc) {
0757     // bool debug = true;
0758     using bidx_t = LayerOfHits::bin_index_t;
0759     using bcnt_t = LayerOfHits::bin_content_t;
0760     const LayerOfHits &L = layer_of_hits;
0761     const LayerInfo &LI = L.layer_info();
0762 
0763     const int iI = iP;
0764 
0765     dprintf("LayerOfHits::SelectHitIndicesV2 %s layer=%d N_proc=%d\n",
0766             L.is_barrel() ? "barrel" : "endcap",
0767             L.layer_id(),
0768             N_proc);
0769 
0770 #ifdef RNT_DUMP_MkF_SelHitIdcs
0771     rnt_shi.InnerIdcsReset(N_proc);
0772     Event::SimLabelFromHits sim_lbls[NN];
0773     for (int i = 0; i < N_proc; ++i) {
0774       sim_lbls[i] = m_event->simLabelForCurrentSeed(m_SeedOriginIdx[i]);
0775       if (m_FailFlag[i]) {
0776         rnt_shi.RegisterFailedProp(i, m_Par[1 - iI], m_Par[iI], m_event, m_SeedOriginIdx[i]);
0777       } else if (sim_lbls[i].is_set()) {
0778         CandInfo &ci = rnt_shi.RegisterGoodProp(i, m_Par[iI], m_event, m_SeedOriginIdx[i]);
0779         ci.ic2list.reset();  // zero initialize
0780       }  // else ... could do something about the bad seeds ... probably better to collect elsewhere.
0781     }
0782     // Get BinSearch result from V1. Note -- it can clear m_FailFlag for some cands!
0783     auto ff_stash = m_FailFlag;
0784     selectHitIndices(layer_of_hits, N_proc, true);
0785     m_FailFlag = ff_stash;
0786 #endif
0787 
0788     constexpr int NEW_MAX_HIT = 6;  // 4 - 6 give about the same # of tracks in quality-val
0789     constexpr float DDPHI_PRESEL_FAC = 2.0f;
0790     constexpr float DDQ_PRESEL_FAC = 1.2f;
0791     constexpr float PHI_BIN_EXTRA_FAC = 2.75f;
0792     constexpr float Q_BIN_EXTRA_FAC = 1.6f;
0793 
0794     namespace mp = mini_propagators;
0795     struct Bins {
0796       MPlexQUH q0, q1, q2, p1, p2;
0797       mp::InitialStatePlex isp;
0798       mp::StatePlex sp1, sp2;
0799       int n_proc;
0800 
0801       MPlexQF dphi_track, dq_track;  // 3 sigma track errors at initial state
0802 
0803       // debug & ntuple dump -- to be local in functions
0804       MPlexQF phi_c, dphi;
0805       MPlexQF q_c, qmin, qmax;
0806 
0807       Bins(const MPlexLV &par, const MPlexQI &chg, int np = NN) : isp(par, chg), n_proc(np) {}
0808 
0809       void prop_to_limits(const LayerInfo &li) {
0810         // Positions 1 and 2 should really be by "propagation order", 1 is the closest/
0811         // This should also work for backward propagation so not exactly trivial.
0812         // Also, do not really need propagation to center.
0813         if (li.is_barrel()) {
0814           isp.propagate_to_r(mp::PA_Exact, li.rin(), sp1, true, n_proc);
0815           isp.propagate_to_r(mp::PA_Exact, li.rout(), sp2, true, n_proc);
0816         } else {
0817           isp.propagate_to_z(mp::PA_Exact, li.zmin(), sp1, true, n_proc);
0818           isp.propagate_to_z(mp::PA_Exact, li.zmax(), sp2, true, n_proc);
0819         }
0820       }
0821 
0822       void find_bin_ranges(const LayerInfo &li, const LayerOfHits &loh, const MPlexLS &err) {
0823         // Below made members for debugging
0824         // MPlexQF phi_c, dphi_min, dphi_max;
0825         // phi_c = mp::fast_atan2(isp.y, isp.x);  // calculated below as difference
0826 
0827         // Matriplex::min_max(sp1.dphi, sp2.dphi, dphi_min, dphi_max);
0828         // the above is wrong: dalpha is not dphi --> renamed variable in State
0829         MPlexQF xp1, xp2, pmin, pmax;
0830         xp1 = mp::fast_atan2(sp1.y, sp1.x);
0831         xp2 = mp::fast_atan2(sp2.y, sp2.x);
0832         Matriplex::min_max(xp1, xp2, pmin, pmax);
0833         // Matriplex::min_max(mp::fast_atan2(sp1.y, sp1.x), smp::fast_atan2(sp2.y, sp2.x), pmin, pmax);
0834         MPlexQF dp = pmax - pmin;
0835         phi_c = 0.5f * (pmax + pmin);
0836         for (int ii = 0; ii < NN; ++ii) {
0837           if (ii < n_proc) {
0838             if (dp[ii] > Const::PI) {
0839               std::swap(pmax[ii], pmin[ii]);
0840               dp[ii] = Const::TwoPI - dp[ii];
0841               phi_c[ii] = Const::PI - phi_c[ii];
0842             }
0843             dphi[ii] = 0.5f * dp[ii];
0844             // printf("phic: %f  p1: %f  p2: %f   pmin: %f  pmax: %f   dphi: %f\n",
0845             //       phi_c[ii], xp1[ii], xp2[ii], pmin[ii], pmax[ii], dphi[ii]);
0846           }
0847         }
0848 
0849         const auto calc_err_xy = [&](const MPlexQF &x, const MPlexQF &y) {
0850           return x * x * err.ReduceFixedIJ(0, 0) + y * y * err.ReduceFixedIJ(1, 1) +
0851                  2.0f * x * y * err.ReduceFixedIJ(0, 1);
0852         };
0853 
0854         // Calculate dphi_track, dq_track differs for barrel/endcap
0855         MPlexQF r2_c = isp.x * isp.x + isp.y * isp.y;
0856         MPlexQF r2inv_c = 1.0f / r2_c;
0857         MPlexQF dphidx_c = -isp.y * r2inv_c;
0858         MPlexQF dphidy_c = isp.x * r2inv_c;
0859         dphi_track = 3.0f * calc_err_xy(dphidx_c, dphidy_c).abs().sqrt();
0860 
0861         // MPlexQF qmin, qmax;
0862         if (li.is_barrel()) {
0863           Matriplex::min_max(sp1.z, sp2.z, qmin, qmax);
0864           q_c = isp.z;
0865           dq_track = 3.0f * err.ReduceFixedIJ(2, 2).abs().sqrt();
0866         } else {
0867           Matriplex::min_max(Matriplex::hypot(sp1.x, sp1.y), Matriplex::hypot(sp2.x, sp2.y), qmin, qmax);
0868           q_c = Matriplex::sqrt(r2_c);
0869           dq_track = 3.0f * (r2inv_c * calc_err_xy(isp.x, isp.y).abs()).sqrt();
0870         }
0871 
0872         for (int i = 0; i < p1.kTotSize; ++i) {
0873           // Clamp crazy sizes. This actually only happens when prop-fail flag is set.
0874           // const float dphi_clamp = 0.1;
0875           // if (dphi_min[i] > 0.0f || dphi_min[i] < -dphi_clamp) dphi_min[i] = -dphi_clamp;
0876           // if (dphi_max[i] < 0.0f || dphi_max[i] > dphi_clampf) dphi_max[i] = dphi_clamp;
0877           p1[i] = loh.phiBinChecked(pmin[i] - dphi_track[i] - PHI_BIN_EXTRA_FAC * 0.0123f);
0878           p2[i] = loh.phiBinChecked(pmax[i] + dphi_track[i] + PHI_BIN_EXTRA_FAC * 0.0123f);
0879 
0880           q0[i] = loh.qBinChecked(q_c[i]);
0881           q1[i] = loh.qBinChecked(qmin[i] - dq_track[i] - Q_BIN_EXTRA_FAC * 0.5f * li.q_bin());
0882           q2[i] = loh.qBinChecked(qmax[i] + dq_track[i] + Q_BIN_EXTRA_FAC * 0.5f * li.q_bin()) + 1;
0883         }
0884       }
0885     };
0886 
0887     Bins B(m_Par[iI], m_Chg, N_proc);
0888     B.prop_to_limits(LI);
0889     B.find_bin_ranges(LI, L, m_Err[iI]);
0890 
0891     for (int i = 0; i < NN; ++i) {
0892       if (i < N_proc) {
0893         m_XHitSize[i] = 0;
0894         // Notify failure. Ideally should be detected before selectHitIndices().
0895         if (m_FailFlag[i]) {
0896           m_XWsrResult[i].m_wsr = WSR_Failed;
0897         } else {
0898           if (LI.is_barrel()) {
0899             m_XWsrResult[i] = L.is_within_z_sensitive_region(B.q_c[i], 0.5f * (B.q2[i] - B.q1[i]));
0900           } else {
0901             m_XWsrResult[i] = L.is_within_r_sensitive_region(B.q_c[i], 0.5f * (B.q2[i] - B.q1[i]));
0902           }
0903         }
0904       }
0905     }
0906 
0907     // for (int i = 0; i < N_proc; ++i) {
0908     //   printf("BinCheck %c %+8.6f %+8.6f | %3d %3d - %3d %3d ||  | %2d %2d - %2d %2d\n", LI.is_barrel() ? 'B' : 'E',
0909     //          B.phi[i], B.dphi[i], B.p1[i], B.p2[i], pb1v[i], pb2v[i],
0910     //          B.q[i], B.dq[i], B.q1[i], B.q2[i], qb1v[i], qb2v[i]);
0911     // }
0912 
0913 #ifdef RNT_DUMP_MkF_SelHitIdcs
0914     for (auto i : rnt_shi.f_h_idcs) {
0915       CandInfo &ci = (*rnt_shi.ci)[rnt_shi.f_h_remap[i]];
0916       ci.bsn = BinSearch({B.phi_c[i],
0917                           B.dphi[i],
0918                           B.q_c[i],
0919                           0.5f * (B.q2[i] - B.q1[i]),
0920                           B.p1[i],
0921                           B.p2[i],
0922                           B.q1[i],
0923                           B.q2[i],
0924                           m_XWsrResult[i].m_wsr,
0925                           m_XWsrResult[i].m_in_gap,
0926                           false});
0927       ci.ps_min = statep2propstate(B.sp1, i);
0928       ci.ps_max = statep2propstate(B.sp2, i);
0929     }
0930 #endif
0931 
0932     struct PQE {
0933       float score;
0934       unsigned int hit_index;
0935     };
0936     auto pqe_cmp = [](const PQE &a, const PQE &b) { return a.score < b.score; };
0937     std::priority_queue<PQE, std::vector<PQE>, decltype(pqe_cmp)> pqueue(pqe_cmp);
0938     int pqueue_size = 0;
0939 
0940     // Vectorizing this makes it run slower!
0941     //#pragma omp simd
0942     for (int itrack = 0; itrack < NN; ++itrack) {
0943       if (itrack >= N_proc) {
0944         continue;
0945       }
0946 
0947       if (m_FailFlag[itrack]) {
0948         m_XWsrResult[itrack].m_wsr = WSR_Failed;
0949         continue;
0950       }
0951 
0952       if (m_XWsrResult[itrack].m_wsr == WSR_Outside) {
0953         continue;
0954       }
0955 
0956       // New binning -- known to be too restrictive, so scaled up. Probably esp. in stereo layers.
0957       // Also, could take track covariance dphi / dq extras + known tilt stuff.
0958       const bidx_t qb = B.q0[itrack];
0959       const bidx_t qb1 = B.q1[itrack];
0960       const bidx_t qb2 = B.q2[itrack];
0961       const bidx_t pb1 = B.p1[itrack];
0962       const bidx_t pb2 = B.p2[itrack];
0963 
0964       // clang-format off
0965       dprintf("  %2d/%2d: %6.3f %6.3f %6.6f %7.5f %3u %3u %4u %4u\n",
0966               L.layer_id(), itrack, B.q_c[itrack], B.phi_c[itrack],
0967               B.qmax[itrack] - B.qmin[itrack], B.dphi[itrack],
0968               qb1, qb2, pb1, pb2);
0969 #ifdef RNT_DUMP_MkF_SelHitIdcs
0970       int hit_out_idx = 0;
0971       // phi-sorted position of matched hits
0972       struct pos_match { float dphi, dq; int idx; bool matched; };
0973       std::vector<pos_match> pos_match_vec;
0974 #endif
0975       // clang-format on
0976 
0977       mp::InitialState mp_is(m_Par[iI], m_Chg, itrack);
0978       mp::State mp_s;
0979 
0980       for (bidx_t qi = qb1; qi != qb2; ++qi) {
0981         for (bidx_t pi = pb1; pi != pb2; pi = L.phiMaskApply(pi + 1)) {
0982           // Limit to central Q-bin
0983           if (qi == qb && L.isBinDead(pi, qi) == true) {
0984             dprint("dead module for track in layer=" << L.layer_id() << " qb=" << qi << " pi=" << pi
0985                                                      << " q=" << B.q_c[itrack] << " phi=" << B.phi_c[itrack]);
0986             m_XWsrResult[itrack].m_in_gap = true;
0987           }
0988 
0989           // It might make sense to make first loop to extract bin indices
0990           // and issue prefetches at the same time.
0991           // Then enter vectorized loop to actually collect the hits in proper order.
0992 
0993           //SK: ~20x1024 bin sizes give mostly 1 hit per bin. Commented out for 128 bins or less
0994           // #pragma nounroll
0995           auto pbi = L.phiQBinContent(pi, qi);
0996           for (bcnt_t hi = pbi.begin(); hi < pbi.end(); ++hi) {
0997             // MT: Access into m_hit_zs and m_hit_phis is 1% run-time each.
0998 
0999             const unsigned int hi_orig = L.getOriginalHitIndex(hi);
1000 
1001             if (m_iteration_hit_mask && (*m_iteration_hit_mask)[hi_orig]) {
1002               dprintf(
1003                   "Yay, denying masked hit on layer %u, hi %u, orig idx %u\n", L.layer_info().layer_id(), hi, hi_orig);
1004               continue;
1005             }
1006 
1007             float new_q, new_phi, new_ddphi, new_ddq;
1008             bool prop_fail;
1009 
1010             if (L.is_barrel()) {
1011               prop_fail = mp_is.propagate_to_r(mp::PA_Exact, L.hit_qbar(hi), mp_s, true);
1012               new_q = mp_s.z;
1013             } else {
1014               prop_fail = mp_is.propagate_to_z(mp::PA_Exact, L.hit_qbar(hi), mp_s, true);
1015               new_q = std::hypot(mp_s.x, mp_s.y);
1016             }
1017 
1018             new_phi = vdt::fast_atan2f(mp_s.y, mp_s.x);
1019             new_ddphi = cdist(std::abs(new_phi - L.hit_phi(hi)));
1020             new_ddq = std::abs(new_q - L.hit_q(hi));
1021 
1022             bool dqdphi_presel = new_ddq < B.dq_track[itrack] + DDQ_PRESEL_FAC * L.hit_q_half_length(hi) &&
1023                                  new_ddphi < B.dphi_track[itrack] + DDPHI_PRESEL_FAC * 0.0123f;
1024 
1025             // clang-format off
1026             dprintf("     SHI %3u %4u %5u  %6.3f %6.3f %6.4f %7.5f  PROP-%s  %s\n",
1027                     qi, pi, hi, L.hit_q(hi), L.hit_phi(hi),
1028                     new_ddq, new_ddphi, prop_fail ? "FAIL" : "OK", dqdphi_presel ? "PASS" : "REJECT");
1029 #ifdef RNT_DUMP_MkF_SelHitIdcs
1030             if (rnt_shi.f_h_remap[itrack] >= 0) {
1031               int sim_lbl = sim_lbls[itrack].label;
1032               const Hit &thishit = L.refHit(hi_orig);
1033               m_msErr.copyIn(itrack, thishit.errArray());
1034               m_msPar.copyIn(itrack, thishit.posArray());
1035 
1036               MPlexQF thisOutChi2;
1037               MPlexQI propFail(0);
1038               MPlexLV tmpPropPar;
1039               const FindingFoos &fnd_foos = FindingFoos::get_finding_foos(L.is_barrel());
1040               (*fnd_foos.m_compute_chi2_foo)(
1041                 m_Err[iI], m_Par[iI], m_Chg, m_msErr, m_msPar,
1042                 thisOutChi2, tmpPropPar, propFail, N_proc,
1043                 m_prop_config->finding_intra_layer_pflags,
1044                 m_prop_config->finding_requires_propagation_to_hit_pos);
1045               float hchi2 = thisOutChi2[itrack];
1046 
1047               CandInfo &ci = (*rnt_shi.ci)[rnt_shi.f_h_remap[itrack]];
1048 
1049               const MCHitInfo &mchinfo = m_event->simHitsInfo_[L.refHit(hi_orig).mcHitID()];
1050               int hit_lbl = mchinfo.mcTrackID();
1051               ci.hmi.emplace_back(HitMatchInfo{ HitInfo
1052                 { hit2pos(thishit),
1053                   new_q, L.hit_q_half_length(hi), L.hit_qbar(hi), new_phi,
1054                   hit_lbl },
1055                 state2pos(mp_s), state2mom(mp_s),
1056                 new_ddq, new_ddphi, hchi2, (int) hi_orig,
1057                 (sim_lbl == hit_lbl), dqdphi_presel, !prop_fail
1058               });
1059 
1060               bool new_dec = dqdphi_presel && !prop_fail;
1061               ++ci.n_all_hits;
1062               if (sim_lbl == hit_lbl) {
1063                 ++ci.n_hits_match;
1064                 if (new_dec) ++ci.n_hits_pass_match;
1065               }
1066               if (new_dec) ++ci.n_hits_pass;
1067               if (new_dec)
1068                 pos_match_vec.emplace_back(pos_match{ new_ddphi, new_ddq, hit_out_idx++,
1069                                              sim_lbl == hit_lbl });
1070             } // if cand is saved
1071 #endif
1072             // clang-format on
1073 
1074             if (prop_fail || !dqdphi_presel)
1075               continue;
1076             if (pqueue_size < NEW_MAX_HIT) {
1077               pqueue.push({new_ddphi, hi_orig});
1078               ++pqueue_size;
1079             } else if (new_ddphi < pqueue.top().score) {
1080               pqueue.pop();
1081               pqueue.push({new_ddphi, hi_orig});
1082             }
1083           }  //hi
1084         }    //pi
1085       }      //qi
1086 
1087       dprintf(" PQUEUE (%d)", pqueue_size);
1088 #ifdef RNT_DUMP_MkF_SelHitIdcs
1089       // clang-format off
1090       if (sim_lbls[itrack].is_set()) {
1091         // Find ord number of matched hits.
1092         std::sort(pos_match_vec.begin(), pos_match_vec.end(),
1093                   [](auto &a, auto &b){return a.dphi < b.dphi;});
1094         int pmvs = pos_match_vec.size();
1095 
1096         CandInfo &ci = (*rnt_shi.ci)[rnt_shi.f_h_remap[itrack]];
1097         for (int i = 0; i < pmvs; ++i) {
1098           if (pos_match_vec[i].matched) {
1099             ci.ord_first_match = i;
1100             ci.dphi_first_match = pos_match_vec[i].dphi;
1101             ci.dq_first_match = pos_match_vec[i].dq;
1102             break;
1103           }
1104         }
1105       }
1106       // clang-format off
1107 #endif
1108       // Reverse hits so best dphis/scores come first in the hit-index list.
1109       m_XHitSize[itrack] = pqueue_size;
1110       while (pqueue_size) {
1111         --pqueue_size;
1112         m_XHitArr.At(itrack, pqueue_size, 0) = pqueue.top().hit_index;
1113         dprintf("   %d: %f %d", pqueue_size, pqueue.top().score, pqueue.top().hit_index);
1114         pqueue.pop();
1115       }
1116       dprintf("\n");
1117 
1118     }  //itrack
1119   }
1120 
1121   //==============================================================================
1122   // AddBestHit - Best Hit Track Finding
1123   //==============================================================================
1124 
1125   void MkFinder::addBestHit(const LayerOfHits &layer_of_hits, const int N_proc, const FindingFoos &fnd_foos) {
1126     // debug = true;
1127 
1128     MatriplexHitPacker mhp(layer_of_hits.hitArray());
1129 
1130     float minChi2[NN];
1131     int bestHit[NN];
1132     int maxSize = 0;
1133 
1134     // Determine maximum number of hits for tracks in the collection.
1135     for (int it = 0; it < NN; ++it) {
1136       if (it < N_proc) {
1137         if (m_XHitSize[it] > 0) {
1138           maxSize = std::max(maxSize, m_XHitSize[it]);
1139         }
1140       }
1141 
1142       bestHit[it] = -1;
1143       minChi2[it] = getHitSelDynamicChi2Cut(it, iP);
1144     }
1145 
1146     for (int hit_cnt = 0; hit_cnt < maxSize; ++hit_cnt) {
1147       //fixme what if size is zero???
1148 
1149       mhp.reset();
1150 
1151       //#pragma omp simd doesn't vectorize with current compilers
1152       for (int itrack = 0; itrack < NN; ++itrack) {
1153         if (itrack < N_proc && hit_cnt < m_XHitSize[itrack]) {
1154           mhp.addInputAt(itrack, layer_of_hits.refHit(m_XHitArr.At(itrack, hit_cnt, 0)));
1155         }
1156       }
1157 
1158       mhp.pack(m_msErr, m_msPar);
1159 
1160       //now compute the chi2 of track state vs hit
1161       MPlexQF outChi2;
1162       MPlexLV tmpPropPar;
1163       clearFailFlag();
1164       (*fnd_foos.m_compute_chi2_foo)(m_Err[iP],
1165                                      m_Par[iP],
1166                                      m_Chg,
1167                                      m_msErr,
1168                                      m_msPar,
1169                                      outChi2,
1170                                      tmpPropPar,
1171                                      m_FailFlag,
1172                                      N_proc,
1173                                      m_prop_config->finding_intra_layer_pflags,
1174                                      m_prop_config->finding_requires_propagation_to_hit_pos);
1175 
1176       //update best hit in case chi2<minChi2
1177 #pragma omp simd
1178       for (int itrack = 0; itrack < NN; ++itrack) {
1179         if (itrack < N_proc && hit_cnt < m_XHitSize[itrack]) {
1180           const float chi2 = std::abs(outChi2[itrack]);  //fixme negative chi2 sometimes...
1181           dprint("chi2=" << chi2 << " minChi2[itrack]=" << minChi2[itrack]);
1182           if (chi2 < minChi2[itrack]) {
1183             minChi2[itrack] = chi2;
1184             bestHit[itrack] = m_XHitArr.At(itrack, hit_cnt, 0);
1185           }
1186         }
1187       }
1188     }  // end loop over hits
1189 
1190     //#pragma omp simd
1191     for (int itrack = 0; itrack < NN; ++itrack) {
1192       if (itrack >= N_proc) {
1193         continue;
1194       }
1195 
1196       if (m_XWsrResult[itrack].m_wsr == WSR_Outside) {
1197         // Why am I doing this?
1198         m_msErr.setDiagonal3x3(itrack, 666);
1199         m_msPar(itrack, 0, 0) = m_Par[iP](itrack, 0, 0);
1200         m_msPar(itrack, 1, 0) = m_Par[iP](itrack, 1, 0);
1201         m_msPar(itrack, 2, 0) = m_Par[iP](itrack, 2, 0);
1202 
1203         // XXXX If not in gap, should get back the old track params. But they are gone ...
1204         // Would actually have to do it right after SelectHitIndices where updated params are still ok.
1205         // Here they got screwed during hit matching.
1206         // So, I'd store them there (into propagated params) and retrieve them here.
1207         // Or we decide not to care ...
1208 
1209         continue;
1210       }
1211 
1212       //fixme decide what to do in case no hit found
1213       if (bestHit[itrack] >= 0) {
1214         const Hit &hit = layer_of_hits.refHit(bestHit[itrack]);
1215         const float chi2 = minChi2[itrack];
1216 
1217         dprint("ADD BEST HIT FOR TRACK #"
1218                << itrack << std::endl
1219                << "prop x=" << m_Par[iP].constAt(itrack, 0, 0) << " y=" << m_Par[iP].constAt(itrack, 1, 0) << std::endl
1220                << "copy in hit #" << bestHit[itrack] << " x=" << hit.position()[0] << " y=" << hit.position()[1]);
1221 
1222         m_msErr.copyIn(itrack, hit.errArray());
1223         m_msPar.copyIn(itrack, hit.posArray());
1224         m_Chi2(itrack, 0, 0) += chi2;
1225 
1226         add_hit(itrack, bestHit[itrack], layer_of_hits.layer_id());
1227       } else {
1228         int fake_hit_idx = Hit::kHitMissIdx;
1229 
1230         if (m_XWsrResult[itrack].m_wsr == WSR_Edge) {
1231           // YYYYYY Config::store_missed_layers
1232           fake_hit_idx = Hit::kHitEdgeIdx;
1233         } else if (num_all_minus_one_hits(itrack)) {
1234           fake_hit_idx = Hit::kHitStopIdx;
1235         }
1236 
1237         dprint("ADD FAKE HIT FOR TRACK #" << itrack << " withinBounds=" << (fake_hit_idx != Hit::kHitEdgeIdx)
1238                                           << " r=" << std::hypot(m_Par[iP](itrack, 0, 0), m_Par[iP](itrack, 1, 0)));
1239 
1240         m_msErr.setDiagonal3x3(itrack, 666);
1241         m_msPar(itrack, 0, 0) = m_Par[iP](itrack, 0, 0);
1242         m_msPar(itrack, 1, 0) = m_Par[iP](itrack, 1, 0);
1243         m_msPar(itrack, 2, 0) = m_Par[iP](itrack, 2, 0);
1244         // Don't update chi2
1245 
1246         add_hit(itrack, fake_hit_idx, layer_of_hits.layer_id());
1247       }
1248     }
1249 
1250     // Update the track parameters with this hit. (Note that some calculations
1251     // are already done when computing chi2. Not sure it's worth caching them?)
1252 
1253     dprint("update parameters");
1254     clearFailFlag();
1255     (*fnd_foos.m_update_param_foo)(m_Err[iP],
1256                                    m_Par[iP],
1257                                    m_Chg,
1258                                    m_msErr,
1259                                    m_msPar,
1260                                    m_Err[iC],
1261                                    m_Par[iC],
1262                                    m_FailFlag,
1263                                    N_proc,
1264                                    m_prop_config->finding_intra_layer_pflags,
1265                                    m_prop_config->finding_requires_propagation_to_hit_pos);
1266 
1267     dprint("m_Par[iP](0,0,0)=" << m_Par[iP](0, 0, 0) << " m_Par[iC](0,0,0)=" << m_Par[iC](0, 0, 0));
1268   }
1269 
1270   //=======================================================
1271   // isStripQCompatible : check if prop is consistent with the barrel/endcap strip length
1272   //=======================================================
1273   bool isStripQCompatible(
1274       int itrack, bool isBarrel, const MPlexLS &pErr, const MPlexLV &pPar, const MPlexHS &msErr, const MPlexHV &msPar) {
1275     //check module compatibility via long strip side = L/sqrt(12)
1276     if (isBarrel) {  //check z direction only
1277       const float res = std::abs(msPar.constAt(itrack, 2, 0) - pPar.constAt(itrack, 2, 0));
1278       const float hitHL = sqrt(msErr.constAt(itrack, 2, 2) * 3.f);  //half-length
1279       const float qErr = sqrt(pErr.constAt(itrack, 2, 2));
1280       dprint("qCompat " << hitHL << " + " << 3.f * qErr << " vs " << res);
1281       return hitHL + std::max(3.f * qErr, 0.5f) > res;
1282     } else {  //project on xy, assuming the strip Length >> Width
1283       const float res[2]{msPar.constAt(itrack, 0, 0) - pPar.constAt(itrack, 0, 0),
1284                          msPar.constAt(itrack, 1, 0) - pPar.constAt(itrack, 1, 0)};
1285       const float hitT2 = msErr.constAt(itrack, 0, 0) + msErr.constAt(itrack, 1, 1);
1286       const float hitT2inv = 1.f / hitT2;
1287       const float proj[3] = {msErr.constAt(itrack, 0, 0) * hitT2inv,
1288                              msErr.constAt(itrack, 0, 1) * hitT2inv,
1289                              msErr.constAt(itrack, 1, 1) * hitT2inv};
1290       const float qErr =
1291           sqrt(std::abs(pErr.constAt(itrack, 0, 0) * proj[0] + 2.f * pErr.constAt(itrack, 0, 1) * proj[1] +
1292                         pErr.constAt(itrack, 1, 1) * proj[2]));  //take abs to avoid non-pos-def cases
1293       const float resProj =
1294           sqrt(res[0] * proj[0] * res[0] + 2.f * res[1] * proj[1] * res[0] + res[1] * proj[2] * res[1]);
1295       dprint("qCompat " << sqrt(hitT2 * 3.f) << " + " << 3.f * qErr << " vs " << resProj);
1296       return sqrt(hitT2 * 3.f) + std::max(3.f * qErr, 0.5f) > resProj;
1297     }
1298   }
1299 
1300   //=======================================================
1301   // passStripChargePCMfromTrack : apply the slope correction to charge per cm and cut using hit err matrix
1302   //         the raw pcm = charge/L_normal
1303   //         the corrected qCorr = charge/L_path = charge/(L_normal*p/p_zLocal) = pcm*p_zLocal/p
1304   //=======================================================
1305   bool passStripChargePCMfromTrack(
1306       int itrack, bool isBarrel, unsigned int pcm, unsigned int pcmMin, const MPlexLV &pPar, const MPlexHS &msErr) {
1307     //skip the overflow case
1308     if (pcm >= Hit::maxChargePerCM())
1309       return true;
1310 
1311     float qSF;
1312     if (isBarrel) {  //project in x,y, assuming zero-error direction is in this plane
1313       const float hitT2 = msErr.constAt(itrack, 0, 0) + msErr.constAt(itrack, 1, 1);
1314       const float hitT2inv = 1.f / hitT2;
1315       const float proj[3] = {msErr.constAt(itrack, 0, 0) * hitT2inv,
1316                              msErr.constAt(itrack, 0, 1) * hitT2inv,
1317                              msErr.constAt(itrack, 1, 1) * hitT2inv};
1318       const bool detXY_OK =
1319           std::abs(proj[0] * proj[2] - proj[1] * proj[1]) < 0.1f;  //check that zero-direction is close
1320       const float cosP = cos(pPar.constAt(itrack, 4, 0));
1321       const float sinP = sin(pPar.constAt(itrack, 4, 0));
1322       const float sinT = std::abs(sin(pPar.constAt(itrack, 5, 0)));
1323       //qSF = sqrt[(px,py)*(1-proj)*(px,py)]/p = sinT*sqrt[(cosP,sinP)*(1-proj)*(cosP,sinP)].
1324       qSF = detXY_OK ? sinT * std::sqrt(std::abs(1.f + cosP * cosP * proj[0] + sinP * sinP * proj[2] -
1325                                                  2.f * cosP * sinP * proj[1]))
1326                      : 1.f;
1327     } else {  //project on z
1328       // p_zLocal/p = p_z/p = cosT
1329       qSF = std::abs(cos(pPar.constAt(itrack, 5, 0)));
1330     }
1331 
1332     const float qCorr = pcm * qSF;
1333     dprint("pcm " << pcm << " * " << qSF << " = " << qCorr << " vs " << pcmMin);
1334     return qCorr > pcmMin;
1335   }
1336 
1337   //==============================================================================
1338   // FindCandidates - Standard Track Finding
1339   //==============================================================================
1340 
1341   void MkFinder::findCandidates(const LayerOfHits &layer_of_hits,
1342                                 std::vector<std::vector<TrackCand>> &tmp_candidates,
1343                                 const int offset,
1344                                 const int N_proc,
1345                                 const FindingFoos &fnd_foos) {
1346     // bool debug = true;
1347 
1348     MatriplexHitPacker mhp(layer_of_hits.hitArray());
1349 
1350     int maxSize = 0;
1351 
1352     // Determine maximum number of hits for tracks in the collection.
1353     for (int it = 0; it < NN; ++it) {
1354       if (it < N_proc) {
1355         if (m_XHitSize[it] > 0) {
1356           maxSize = std::max(maxSize, m_XHitSize[it]);
1357         }
1358       }
1359     }
1360 
1361     dprintf("FindCandidates max hits to process=%d\n", maxSize);
1362 
1363     int nHitsAdded[NN]{};
1364     bool isTooLargeCluster[NN]{false};
1365 
1366     for (int hit_cnt = 0; hit_cnt < maxSize; ++hit_cnt) {
1367       mhp.reset();
1368 
1369       int charge_pcm[NN];
1370 
1371       //#pragma omp simd doesn't vectorize with current compilers
1372       for (int itrack = 0; itrack < NN; ++itrack) {
1373         if (itrack < N_proc && hit_cnt < m_XHitSize[itrack]) {
1374           const auto &hit = layer_of_hits.refHit(m_XHitArr.At(itrack, hit_cnt, 0));
1375           mhp.addInputAt(itrack, hit);
1376           charge_pcm[itrack] = hit.chargePerCM();
1377         }
1378       }
1379 
1380       mhp.pack(m_msErr, m_msPar);
1381 
1382       //now compute the chi2 of track state vs hit
1383       MPlexQF outChi2;
1384       MPlexLV propPar;
1385       clearFailFlag();
1386 
1387       if constexpr (Config::usePropToPlane) {
1388         // Maybe could use 2 matriplex packers ... ModuleInfo has 3 * SVector3 and uint
1389         MPlexHV norm, dir;
1390         packModuleNormDir(layer_of_hits, hit_cnt, norm, dir, N_proc);
1391         kalmanPropagateAndComputeChi2Plane(m_Err[iP],
1392                                            m_Par[iP],
1393                                            m_Chg,
1394                                            m_msErr,
1395                                            m_msPar,
1396                                            norm,
1397                                            dir,
1398                                            outChi2,
1399                                            propPar,
1400                                            m_FailFlag,
1401                                            N_proc,
1402                                            m_prop_config->finding_intra_layer_pflags,
1403                                            m_prop_config->finding_requires_propagation_to_hit_pos);
1404       } else {
1405         (*fnd_foos.m_compute_chi2_foo)(m_Err[iP],
1406                                        m_Par[iP],
1407                                        m_Chg,
1408                                        m_msErr,
1409                                        m_msPar,
1410                                        outChi2,
1411                                        propPar,
1412                                        m_FailFlag,
1413                                        N_proc,
1414                                        m_prop_config->finding_intra_layer_pflags,
1415                                        m_prop_config->finding_requires_propagation_to_hit_pos);
1416       }
1417 
1418       // Now update the track parameters with this hit (note that some
1419       // calculations are already done when computing chi2, to be optimized).
1420       // 1. This is not needed for candidates the hit is not added to, but it's
1421       // vectorized so doing it serially below should take the same time.
1422       // 2. Still it's a waste of time in case the hit is not added to any of the
1423       // candidates, so check beforehand that at least one cand needs update.
1424       bool oneCandPassCut = false;
1425       for (int itrack = 0; itrack < NN; ++itrack) {
1426         if (itrack >= N_proc) {
1427           continue;
1428         }
1429         float max_c2 = getHitSelDynamicChi2Cut(itrack, iP);
1430 
1431         if (hit_cnt < m_XHitSize[itrack]) {
1432           const float chi2 = std::abs(outChi2[itrack]);  //fixme negative chi2 sometimes...
1433           dprint("chi2=" << chi2);
1434           if (chi2 < max_c2) {
1435             bool isCompatible = true;
1436             if (!layer_of_hits.is_pixel()) {
1437               //check module compatibility via long strip side = L/sqrt(12)
1438               isCompatible =
1439                   isStripQCompatible(itrack, layer_of_hits.is_barrel(), m_Err[iP], propPar, m_msErr, m_msPar);
1440 
1441               //rescale strip charge to track parameters and reapply the cut
1442               isCompatible &= passStripChargePCMfromTrack(
1443                   itrack, layer_of_hits.is_barrel(), charge_pcm[itrack], Hit::minChargePerCM(), propPar, m_msErr);
1444             }
1445             // Select only SiStrip hits with cluster size < maxClusterSize
1446             if (!layer_of_hits.is_pixel()) {
1447               if (layer_of_hits.refHit(m_XHitArr.At(itrack, hit_cnt, 0)).spanRows() >=
1448                   m_iteration_params->maxClusterSize) {
1449                 isTooLargeCluster[itrack] = true;
1450                 isCompatible = false;
1451               }
1452             }
1453 
1454             if (isCompatible) {
1455               oneCandPassCut = true;
1456               break;
1457             }
1458           }
1459         }
1460       }
1461 
1462       if (oneCandPassCut) {
1463         MPlexQI tmpChg = m_Chg;
1464         clearFailFlag();
1465         (*fnd_foos.m_update_param_foo)(m_Err[iP],
1466                                        m_Par[iP],
1467                                        tmpChg,
1468                                        m_msErr,
1469                                        m_msPar,
1470                                        m_Err[iC],
1471                                        m_Par[iC],
1472                                        m_FailFlag,
1473                                        N_proc,
1474                                        m_prop_config->finding_intra_layer_pflags,
1475                                        m_prop_config->finding_requires_propagation_to_hit_pos);
1476 
1477         dprint("update parameters" << std::endl
1478                                    << "propagated track parameters x=" << m_Par[iP].constAt(0, 0, 0)
1479                                    << " y=" << m_Par[iP].constAt(0, 1, 0) << std::endl
1480                                    << "               hit position x=" << m_msPar.constAt(0, 0, 0)
1481                                    << " y=" << m_msPar.constAt(0, 1, 0) << std::endl
1482                                    << "   updated track parameters x=" << m_Par[iC].constAt(0, 0, 0)
1483                                    << " y=" << m_Par[iC].constAt(0, 1, 0));
1484 
1485         //create candidate with hit in case chi2 < max_c2
1486         //fixme: please vectorize me... (not sure it's possible in this case)
1487         for (int itrack = 0; itrack < NN; ++itrack) {
1488           if (itrack < N_proc && hit_cnt < m_XHitSize[itrack]) {
1489             const float max_c2 = getHitSelDynamicChi2Cut(itrack, iP);
1490             const float chi2 = std::abs(outChi2[itrack]);  //fixme negative chi2 sometimes...
1491             dprint("chi2=" << chi2);
1492             if (chi2 < max_c2) {
1493               bool isCompatible = true;
1494               if (!layer_of_hits.is_pixel()) {
1495                 // select only SiStrip hits with cluster size < maxClusterSize
1496                 if (layer_of_hits.refHit(m_XHitArr.At(itrack, hit_cnt, 0)).spanRows() >=
1497                     m_iteration_params->maxClusterSize) {
1498                   // isTooLargeCluster[itrack] = true; -- only in CloneEngine
1499                   isCompatible = false;
1500                 }
1501                 // check module compatibility via long strip side = L/sqrt(12)
1502                 if (isCompatible)
1503                   isCompatible =
1504                     isStripQCompatible(itrack, layer_of_hits.is_barrel(), m_Err[iP], propPar, m_msErr, m_msPar);
1505                 // rescale strip charge to track parameters and reapply the cut
1506                 if (isCompatible && layer_of_hits.layer_info().has_charge()) {
1507                   isCompatible = passStripChargePCMfromTrack(
1508                     itrack, layer_of_hits.is_barrel(), charge_pcm[itrack], Hit::minChargePerCM(), propPar, m_msErr);
1509                 }
1510               }
1511 
1512               if (isCompatible) {
1513                 bool hitExists = false;
1514                 int maxHits = m_NFoundHits(itrack, 0, 0);
1515                 if (layer_of_hits.is_pixel()) {
1516                   for (int i = 0; i <= maxHits; ++i) {
1517                     if (i > 2)
1518                       break;
1519                     if (m_HoTArrs[itrack][i].layer == layer_of_hits.layer_id()) {
1520                       hitExists = true;
1521                       break;
1522                     }
1523                   }
1524                 }
1525                 if (hitExists)
1526                   continue;
1527 
1528                 nHitsAdded[itrack]++;
1529                 dprint("chi2 cut passed, creating new candidate");
1530                 // Create a new candidate and fill the tmp_candidates output vector.
1531                 // QQQ only instantiate if it will pass, be better than N_best
1532 
1533                 const int hit_idx = m_XHitArr.At(itrack, hit_cnt, 0);
1534 
1535                 TrackCand newcand;
1536                 copy_out(newcand, itrack, iC);
1537                 newcand.setCharge(tmpChg(itrack, 0, 0));
1538                 newcand.addHitIdx(hit_idx, layer_of_hits.layer_id(), chi2);
1539                 newcand.setScore(getScoreCand(m_steering_params->m_track_scorer,
1540                                               newcand,
1541                                               true /*penalizeTailMissHits*/,
1542                                               true /*inFindCandidates*/));
1543                 newcand.setOriginIndex(m_CandIdx(itrack, 0, 0));
1544 
1545                 // To apply a fixed cut instead of dynamic cut for overlap: m_iteration_params->chi2CutOverlap
1546                 if (chi2 < max_c2) {
1547                   CombCandidate &ccand = *newcand.combCandidate();
1548                   ccand[m_CandIdx(itrack, 0, 0)].considerHitForOverlap(
1549                       hit_idx, layer_of_hits.refHit(hit_idx).detIDinLayer(), chi2);
1550                 }
1551 
1552                 dprint("updated track parameters x=" << newcand.parameters()[0] << " y=" << newcand.parameters()[1]
1553                                                      << " z=" << newcand.parameters()[2]
1554                                                      << " pt=" << 1. / newcand.parameters()[3]);
1555 
1556                 tmp_candidates[m_SeedIdx(itrack, 0, 0) - offset].emplace_back(newcand);
1557               }
1558             }
1559           }
1560         }
1561       }  //end if (oneCandPassCut)
1562 
1563     }  //end loop over hits
1564 
1565     //now add invalid hit
1566     //fixme: please vectorize me...
1567     for (int itrack = 0; itrack < NN; ++itrack) {
1568       // Cands that miss the layer are stashed away in MkBuilder(), before propagation,
1569       // and then merged back afterwards.
1570       if (itrack >= N_proc || m_XWsrResult[itrack].m_wsr == WSR_Outside) {
1571         continue;
1572       }
1573 
1574       int fake_hit_idx = ((num_all_minus_one_hits(itrack) < m_iteration_params->maxHolesPerCand) &&
1575                           (m_NTailMinusOneHits(itrack, 0, 0) < m_iteration_params->maxConsecHoles))
1576                              ? Hit::kHitMissIdx
1577                              : Hit::kHitStopIdx;
1578 
1579       if (m_XWsrResult[itrack].m_wsr == WSR_Edge) {
1580         // YYYYYY m_iteration_params->store_missed_layers
1581         fake_hit_idx = Hit::kHitEdgeIdx;
1582       }
1583       //now add fake hit for tracks that passsed through inactive modules
1584       else if (m_XWsrResult[itrack].m_in_gap == true && nHitsAdded[itrack] == 0) {
1585         fake_hit_idx = Hit::kHitInGapIdx;
1586       }
1587       //now add fake hit for cases where hit cluster size is larger than maxClusterSize
1588       else if (isTooLargeCluster[itrack] == true && nHitsAdded[itrack] == 0) {
1589         fake_hit_idx = Hit::kHitMaxClusterIdx;
1590       }
1591 
1592       dprint("ADD FAKE HIT FOR TRACK #" << itrack << " withinBounds=" << (fake_hit_idx != Hit::kHitEdgeIdx)
1593                                         << " r=" << std::hypot(m_Par[iP](itrack, 0, 0), m_Par[iP](itrack, 1, 0)));
1594 
1595       // QQQ as above, only create and add if score better
1596       TrackCand newcand;
1597       copy_out(newcand, itrack, iP);
1598       newcand.addHitIdx(fake_hit_idx, layer_of_hits.layer_id(), 0.);
1599       newcand.setScore(getScoreCand(
1600           m_steering_params->m_track_scorer, newcand, true /*penalizeTailMissHits*/, true /*inFindCandidates*/));
1601       // Only relevant when we actually add a hit
1602       // newcand.setOriginIndex(m_CandIdx(itrack, 0, 0));
1603       tmp_candidates[m_SeedIdx(itrack, 0, 0) - offset].emplace_back(newcand);
1604     }
1605   }
1606 
1607   //==============================================================================
1608   // FindCandidatesCloneEngine - Clone Engine Track Finding
1609   //==============================================================================
1610 
1611   void MkFinder::findCandidatesCloneEngine(const LayerOfHits &layer_of_hits,
1612                                            CandCloner &cloner,
1613                                            const int offset,
1614                                            const int N_proc,
1615                                            const FindingFoos &fnd_foos) {
1616     // bool debug = true;
1617 
1618     MatriplexHitPacker mhp(layer_of_hits.hitArray());
1619 
1620     int maxSize = 0;
1621 
1622     // Determine maximum number of hits for tracks in the collection.
1623 #pragma omp simd
1624     for (int it = 0; it < NN; ++it) {
1625       if (it < N_proc) {
1626         if (m_XHitSize[it] > 0) {
1627           maxSize = std::max(maxSize, m_XHitSize[it]);
1628         }
1629       }
1630     }
1631 
1632     dprintf("FindCandidatesCloneEngine max hits to process=%d\n", maxSize);
1633 
1634     int nHitsAdded[NN]{};
1635     bool isTooLargeCluster[NN]{false};
1636 
1637     for (int hit_cnt = 0; hit_cnt < maxSize; ++hit_cnt) {
1638       mhp.reset();
1639 
1640       int charge_pcm[NN];
1641 
1642       //#pragma omp simd doesn't vectorize with current compilers
1643       for (int itrack = 0; itrack < NN; ++itrack) {
1644         if (itrack < N_proc && hit_cnt < m_XHitSize[itrack]) {
1645           const auto &hit = layer_of_hits.refHit(m_XHitArr.At(itrack, hit_cnt, 0));
1646           mhp.addInputAt(itrack, hit);
1647           charge_pcm[itrack] = hit.chargePerCM();
1648         }
1649       }
1650 
1651       mhp.pack(m_msErr, m_msPar);
1652 
1653       //now compute the chi2 of track state vs hit
1654       MPlexQF outChi2;
1655       MPlexLV propPar;
1656       clearFailFlag();
1657 
1658       if constexpr (Config::usePropToPlane) {
1659         // Maybe could use 2 matriplex packers ... ModuleInfo has 3 * SVector3 and uint
1660         MPlexHV norm, dir;
1661         packModuleNormDir(layer_of_hits, hit_cnt, norm, dir, N_proc);
1662         kalmanPropagateAndComputeChi2Plane(m_Err[iP],
1663                                            m_Par[iP],
1664                                            m_Chg,
1665                                            m_msErr,
1666                                            m_msPar,
1667                                            norm,
1668                                            dir,
1669                                            outChi2,
1670                                            propPar,
1671                                            m_FailFlag,
1672                                            N_proc,
1673                                            m_prop_config->finding_intra_layer_pflags,
1674                                            m_prop_config->finding_requires_propagation_to_hit_pos);
1675       } else {
1676         (*fnd_foos.m_compute_chi2_foo)(m_Err[iP],
1677                                        m_Par[iP],
1678                                        m_Chg,
1679                                        m_msErr,
1680                                        m_msPar,
1681                                        outChi2,
1682                                        propPar,
1683                                        m_FailFlag,
1684                                        N_proc,
1685                                        m_prop_config->finding_intra_layer_pflags,
1686                                        m_prop_config->finding_requires_propagation_to_hit_pos);
1687       }
1688 
1689       //#pragma omp simd  // DOES NOT VECTORIZE AS IT IS NOW
1690       for (int itrack = 0; itrack < NN; ++itrack) {
1691         // We can be in failed state from the initial propagation before selectHitIndices
1692         // and there hit_count for track is set to -1 and WSR state to Failed, handled below.
1693         // Or we might have hit it here in propagate-to-hit.
1694         // PROP-FAIL-ENABLE FailFlag check to be enabled when propagation failure
1695         // detection is properly implemented in propagate-to-R/Z.
1696         if (/*!m_FailFlag[itrack] &&*/ itrack < N_proc && hit_cnt < m_XHitSize[itrack]) {
1697           // make sure the hit was in the compatiblity window for the candidate
1698           const float max_c2 = getHitSelDynamicChi2Cut(itrack, iP);
1699           const float chi2 = std::abs(outChi2[itrack]);  //fixme negative chi2 sometimes...
1700           // XXX-NUM-ERR assert(chi2 >= 0);
1701 
1702           dprintf("  chi2=%.3f (%.3f)  trkIdx=%d hitIdx=%d\n", chi2, max_c2, itrack,  m_XHitArr.At(itrack, hit_cnt, 0));
1703           if (chi2 < max_c2) {
1704             bool isCompatible = true;
1705             if (!layer_of_hits.is_pixel()) {
1706               // select only SiStrip hits with cluster size < maxClusterSize
1707               if (layer_of_hits.refHit(m_XHitArr.At(itrack, hit_cnt, 0)).spanRows() >=
1708                   m_iteration_params->maxClusterSize) {
1709                 isTooLargeCluster[itrack] = true;
1710                 isCompatible = false;
1711               }
1712               // check module compatibility via long strip side = L/sqrt(12)
1713               if (isCompatible)
1714                 isCompatible =
1715                   isStripQCompatible(itrack, layer_of_hits.is_barrel(), m_Err[iP], propPar, m_msErr, m_msPar);
1716               // rescale strip charge to track parameters and reapply the cut
1717               if (isCompatible && layer_of_hits.layer_info().has_charge()) {
1718                 isCompatible = passStripChargePCMfromTrack(
1719                   itrack, layer_of_hits.is_barrel(), charge_pcm[itrack], Hit::minChargePerCM(), propPar, m_msErr);
1720               }
1721             }
1722 
1723             if (isCompatible) {
1724               CombCandidate &ccand = cloner.combCandWithOriginalIndex(m_SeedIdx(itrack, 0, 0));
1725               bool hitExists = false;
1726               int maxHits = m_NFoundHits(itrack, 0, 0);
1727               if (layer_of_hits.is_pixel()) {
1728                 for (int i = 0; i <= maxHits; ++i) {
1729                   if (i > 2)
1730                     break;
1731                   if (ccand.hot(i).layer == layer_of_hits.layer_id()) {
1732                     hitExists = true;
1733                     break;
1734                   }
1735                 }
1736               }
1737               if (hitExists)
1738                 continue;
1739 
1740               nHitsAdded[itrack]++;
1741               const int hit_idx = m_XHitArr.At(itrack, hit_cnt, 0);
1742 
1743               // Register hit for overlap consideration, if chi2 cut is passed
1744               // To apply a fixed cut instead of dynamic cut for overlap: m_iteration_params->chi2CutOverlap
1745               if (chi2 < max_c2) {
1746                 ccand[m_CandIdx(itrack, 0, 0)].considerHitForOverlap(
1747                     hit_idx, layer_of_hits.refHit(hit_idx).detIDinLayer(), chi2);
1748               }
1749 
1750               IdxChi2List tmpList;
1751               tmpList.trkIdx = m_CandIdx(itrack, 0, 0);
1752               tmpList.hitIdx = hit_idx;
1753               tmpList.module = layer_of_hits.refHit(hit_idx).detIDinLayer();
1754               tmpList.nhits = m_NFoundHits(itrack, 0, 0) + 1;
1755               tmpList.ntailholes = 0;
1756               tmpList.noverlaps = m_NOverlapHits(itrack, 0, 0);
1757               tmpList.nholes = num_all_minus_one_hits(itrack);
1758               tmpList.pt = std::abs(1.0f / m_Par[iP].At(itrack, 3, 0));
1759               tmpList.chi2 = m_Chi2(itrack, 0, 0) + chi2;
1760               tmpList.chi2_hit = chi2;
1761               tmpList.score = getScoreStruct(m_steering_params->m_track_scorer, tmpList);
1762               cloner.add_cand(m_SeedIdx(itrack, 0, 0) - offset, tmpList);
1763 
1764               dprint("  adding hit with hit_cnt=" << hit_cnt << " for trkIdx=" << tmpList.trkIdx
1765                                                   << " orig Seed=" << m_Label(itrack, 0, 0));
1766 
1767 #ifdef RNT_DUMP_MkF_SelHitIdcs
1768               if (rnt_shi.f_h_remap[itrack] >= 0) {
1769                 CandInfo &ci = (*rnt_shi.ci)[rnt_shi.f_h_remap[itrack]];
1770                 ci.assignIdxChi2List(tmpList);
1771               }
1772 #endif
1773             }
1774           }
1775         }
1776       }
1777 
1778     }  //end loop over hits
1779 
1780     //now add invalid hit
1781     for (int itrack = 0; itrack < NN; ++itrack) {
1782       dprint("num_all_minus_one_hits(" << itrack << ")=" << num_all_minus_one_hits(itrack));
1783 
1784       // Cands that miss the layer are stashed away in MkBuilder(), before propagation,
1785       // and then merged back afterwards.
1786       if (itrack >= N_proc || m_XWsrResult[itrack].m_wsr == WSR_Outside) {
1787         continue;
1788       }
1789 
1790       // int fake_hit_idx = num_all_minus_one_hits(itrack) < m_iteration_params->maxHolesPerCand ? -1 : -2;
1791       int fake_hit_idx = ((num_all_minus_one_hits(itrack) < m_iteration_params->maxHolesPerCand) &&
1792                           (m_NTailMinusOneHits(itrack, 0, 0) < m_iteration_params->maxConsecHoles))
1793                              ? Hit::kHitMissIdx
1794                              : Hit::kHitStopIdx;
1795 
1796       if (m_XWsrResult[itrack].m_wsr == WSR_Edge) {
1797         fake_hit_idx = Hit::kHitEdgeIdx;
1798       }
1799       //now add fake hit for tracks that passsed through inactive modules
1800       else if (m_XWsrResult[itrack].m_in_gap == true && nHitsAdded[itrack] == 0) {
1801         fake_hit_idx = Hit::kHitInGapIdx;
1802       }
1803       //now add fake hit for cases where hit cluster size is larger than maxClusterSize
1804       else if (isTooLargeCluster[itrack] == true && nHitsAdded[itrack] == 0) {
1805         fake_hit_idx = Hit::kHitMaxClusterIdx;
1806       }
1807 
1808       // PROP-FAIL-ENABLE The following to be enabled when propagation failure
1809       // detection is properly implemented in propagate-to-R/Z.
1810       // // Override for failed propagation, this trumps all other cases.
1811       // if (m_XWsrResult[itrack].m_wsr == WSR_Failed) {
1812       //   fake_hit_idx = Hit::kHitStopIdx;
1813       // }
1814 
1815       IdxChi2List tmpList;
1816       tmpList.trkIdx = m_CandIdx(itrack, 0, 0);
1817       tmpList.hitIdx = fake_hit_idx;
1818       tmpList.module = -1;
1819       tmpList.nhits = m_NFoundHits(itrack, 0, 0);
1820       tmpList.ntailholes = (fake_hit_idx == Hit::kHitMissIdx ? m_NTailMinusOneHits(itrack, 0, 0) + 1
1821                                                              : m_NTailMinusOneHits(itrack, 0, 0));
1822       tmpList.noverlaps = m_NOverlapHits(itrack, 0, 0);
1823       tmpList.nholes = num_inside_minus_one_hits(itrack);
1824       tmpList.pt = std::abs(1.0f / m_Par[iP].At(itrack, 3, 0));
1825       tmpList.chi2 = m_Chi2(itrack, 0, 0);
1826       tmpList.chi2_hit = 0;
1827       tmpList.score = getScoreStruct(m_steering_params->m_track_scorer, tmpList);
1828       cloner.add_cand(m_SeedIdx(itrack, 0, 0) - offset, tmpList);
1829       dprint("adding invalid hit " << fake_hit_idx);
1830     }
1831   }
1832 
1833   //==============================================================================
1834   // UpdateWithLoadedHit
1835   //==============================================================================
1836 
1837   void MkFinder::updateWithLoadedHit(int N_proc, const LayerOfHits &layer_of_hits, const FindingFoos &fnd_foos) {
1838     // See comment in MkBuilder::find_tracks_in_layer() about intra / inter flags used here
1839     // for propagation to the hit.
1840     clearFailFlag();
1841     if constexpr (Config::usePropToPlane) {
1842       MPlexHV norm, dir;
1843       packModuleNormDir(layer_of_hits, 0, norm, dir, N_proc);
1844       kalmanPropagateAndUpdatePlane(m_Err[iP],
1845                                     m_Par[iP],
1846                                     m_Chg,
1847                                     m_msErr,
1848                                     m_msPar,
1849                                     norm,
1850                                     dir,
1851                                     m_Err[iC],
1852                                     m_Par[iC],
1853                                     m_FailFlag,
1854                                     N_proc,
1855                                     m_prop_config->finding_inter_layer_pflags,
1856                                     m_prop_config->finding_requires_propagation_to_hit_pos);
1857     } else {
1858       (*fnd_foos.m_update_param_foo)(m_Err[iP],
1859                                      m_Par[iP],
1860                                      m_Chg,
1861                                      m_msErr,
1862                                      m_msPar,
1863                                      m_Err[iC],
1864                                      m_Par[iC],
1865                                      m_FailFlag,
1866                                      N_proc,
1867                                      m_prop_config->finding_inter_layer_pflags,
1868                                      m_prop_config->finding_requires_propagation_to_hit_pos);
1869     }
1870 
1871     // PROP-FAIL-ENABLE The following to be enabled when propagation failure
1872     // detection is properly implemented in propagate-to-R/Z.
1873     // for (int i = 0; i < N_proc; ++i) {
1874     //   if (m_FailFlag[i]) {
1875     //     dprintf("MkFinder::updateWithLoadedHit fail in update, recovering.\n");
1876     //     m_Err[iC].copySlot(i, m_Err[iP]);
1877     //     m_Par[iC].copySlot(i, m_Par[iP]);
1878     //   }
1879     // }
1880   }
1881 
1882   void MkFinder::chi2OfLoadedHit(int N_proc, const FindingFoos &fnd_foos) {
1883     // We expect input in iC slots from above function.
1884     // See comment in MkBuilder::find_tracks_in_layer() about intra / inter flags used here
1885     // for propagation to the hit.
1886     clearFailFlag();
1887     (*fnd_foos.m_compute_chi2_foo)(m_Err[iC],
1888                                    m_Par[iC],
1889                                    m_Chg,
1890                                    m_msErr,
1891                                    m_msPar,
1892                                    m_Chi2,
1893                                    m_Par[iP],
1894                                    m_FailFlag,
1895                                    N_proc,
1896                                    m_prop_config->finding_inter_layer_pflags,
1897                                    m_prop_config->finding_requires_propagation_to_hit_pos);
1898 
1899     // PROP-FAIL-ENABLE .... removed here
1900   }
1901 
1902   //==============================================================================
1903   // CopyOutParErr
1904   //==============================================================================
1905 
1906   void MkFinder::copyOutParErr(std::vector<CombCandidate> &seed_cand_vec, int N_proc, bool outputProp) const {
1907     const int iO = outputProp ? iP : iC;
1908 
1909     for (int i = 0; i < NN; ++i) {
1910       if (i < N_proc) {
1911         TrackCand &cand = seed_cand_vec[m_SeedIdx(i, 0, 0)][m_CandIdx(i, 0, 0)];
1912 
1913         // Set the track state to the updated parameters
1914         m_Err[iO].copyOut(i, cand.errors_nc().Array());
1915         m_Par[iO].copyOut(i, cand.parameters_nc().Array());
1916         cand.setCharge(m_Chg(i, 0, 0));
1917 
1918         dprint((outputProp ? "propagated" : "updated")
1919                << " track parameters x=" << cand.parameters()[0] << " y=" << cand.parameters()[1]
1920                << " z=" << cand.parameters()[2] << " pt=" << 1. / cand.parameters()[3] << " posEta=" << cand.posEta());
1921       }
1922     }
1923   }
1924 
1925   //==============================================================================
1926   // Backward Fit hack
1927   //==============================================================================
1928 
1929   void MkFinder::bkFitInputTracks(TrackVec &cands, int beg, int end) {
1930     // Uses HitOnTrack vector from Track directly + a local cursor array to current hit.
1931 
1932     MatriplexTrackPacker mtp(&cands[beg]);
1933 
1934     int itrack = 0;
1935 
1936     for (int i = beg; i < end; ++i, ++itrack) {
1937       const Track &trk = cands[i];
1938 
1939       m_Chg(itrack, 0, 0) = trk.charge();
1940       m_CurHit[itrack] = trk.nTotalHits() - 1;
1941       m_HoTArr[itrack] = trk.getHitsOnTrackArray();
1942 
1943       mtp.addInput(trk);
1944     }
1945 
1946     m_Chi2.setVal(0);
1947 
1948     mtp.pack(m_Err[iC], m_Par[iC]);
1949 
1950     m_Err[iC].scale(100.0f);
1951   }
1952 
1953   void MkFinder::bkFitInputTracks(EventOfCombCandidates &eocss, int beg, int end) {
1954     // Could as well use HotArrays from tracks directly + a local cursor array to last hit.
1955 
1956     // XXXX - shall we assume only TrackCand-zero is needed and that we can freely
1957     // bork the HoTNode array?
1958 
1959     MatriplexTrackPacker mtp(&eocss[beg][0]);
1960 
1961     int itrack = 0;
1962 
1963     for (int i = beg; i < end; ++i, ++itrack) {
1964       const TrackCand &trk = eocss[i][0];
1965 
1966       m_Chg(itrack, 0, 0) = trk.charge();
1967       m_CurNode[itrack] = trk.lastCcIndex();
1968       m_HoTNodeArr[itrack] = trk.combCandidate()->hotsData();
1969 
1970       // XXXX Need TrackCand* to update num-hits. Unless I collect info elsewhere
1971       // and fix it in BkFitOutputTracks.
1972       m_TrkCand[itrack] = &eocss[i][0];
1973 
1974       mtp.addInput(trk);
1975     }
1976 
1977     m_Chi2.setVal(0);
1978 
1979     mtp.pack(m_Err[iC], m_Par[iC]);
1980 
1981     m_Err[iC].scale(100.0f);
1982   }
1983 
1984   //------------------------------------------------------------------------------
1985 
1986   void MkFinder::bkFitOutputTracks(TrackVec &cands, int beg, int end, bool outputProp) {
1987     // Only copy out track params / errors / chi2, all the rest is ok.
1988 
1989     const int iO = outputProp ? iP : iC;
1990 
1991     int itrack = 0;
1992     for (int i = beg; i < end; ++i, ++itrack) {
1993       Track &trk = cands[i];
1994 
1995       m_Err[iO].copyOut(itrack, trk.errors_nc().Array());
1996       m_Par[iO].copyOut(itrack, trk.parameters_nc().Array());
1997 
1998       trk.setChi2(m_Chi2(itrack, 0, 0));
1999       if (isFinite(trk.chi2())) {
2000         trk.setScore(getScoreCand(m_steering_params->m_track_scorer, trk));
2001       }
2002     }
2003   }
2004 
2005   void MkFinder::bkFitOutputTracks(EventOfCombCandidates &eocss, int beg, int end, bool outputProp) {
2006     // Only copy out track params / errors / chi2, all the rest is ok.
2007 
2008     // XXXX - where will rejected hits get removed?
2009 
2010     const int iO = outputProp ? iP : iC;
2011 
2012     int itrack = 0;
2013     for (int i = beg; i < end; ++i, ++itrack) {
2014       TrackCand &trk = eocss[i][0];
2015 
2016       m_Err[iO].copyOut(itrack, trk.errors_nc().Array());
2017       m_Par[iO].copyOut(itrack, trk.parameters_nc().Array());
2018 
2019       trk.setChi2(m_Chi2(itrack, 0, 0));
2020       if (isFinite(trk.chi2())) {
2021         trk.setScore(getScoreCand(m_steering_params->m_track_scorer, trk));
2022       }
2023     }
2024   }
2025 
2026   //------------------------------------------------------------------------------
2027 
2028 #if defined(DEBUG_BACKWARD_FIT) || defined(DEBUG_BACKWARD_FIT_BH)
2029   namespace {
2030     float e2s(float x) { return 1e4 * std::sqrt(x); }
2031   }  // namespace
2032 #endif
2033 
2034   void MkFinder::bkFitFitTracksBH(const EventOfHits &eventofhits,
2035                                   const SteeringParams &st_par,
2036                                   const int N_proc,
2037                                   bool chiDebug) {
2038     // Prototyping final backward fit.
2039     // This works with track-finding indices, before remapping.
2040     //
2041     // Layers should be collected during track finding and list all layers that have actual hits.
2042     // Then we could avoid checking which layers actually do have hits.
2043 
2044     MPlexQF tmp_chi2;
2045     float tmp_err[6] = {666, 0, 666, 0, 0, 666};
2046     float tmp_pos[3];
2047 
2048     for (auto lp_iter = st_par.m_layer_plan.rbegin(); lp_iter != st_par.m_layer_plan.rend(); ++lp_iter) {
2049       const int layer = lp_iter->m_layer;
2050 
2051       const LayerOfHits &L = eventofhits[layer];
2052       const LayerInfo &LI = L.layer_info();
2053 
2054       int count = 0;
2055       for (int i = 0; i < N_proc; ++i) {
2056         while (m_CurHit[i] >= 0 && m_HoTArr[i][m_CurHit[i]].index < 0)
2057           --m_CurHit[i];
2058 
2059         if (m_CurHit[i] >= 0 && m_HoTArr[i][m_CurHit[i]].layer == layer) {
2060           // Skip the overlap hits -- if they exist.
2061           // 1. Overlap hit gets placed *after* the original hit in TrackCand::exportTrack()
2062           // which is *before* in the reverse iteration that we are doing here.
2063           // 2. Seed-hit merging can result in more than two hits per layer.
2064           while (m_CurHit[i] > 0 && m_HoTArr[i][m_CurHit[i] - 1].layer == layer)
2065             --m_CurHit[i];
2066 
2067           const Hit &hit = L.refHit(m_HoTArr[i][m_CurHit[i]].index);
2068           m_msErr.copyIn(i, hit.errArray());
2069           m_msPar.copyIn(i, hit.posArray());
2070           ++count;
2071           --m_CurHit[i];
2072         } else {
2073           tmp_pos[0] = m_Par[iC](i, 0, 0);
2074           tmp_pos[1] = m_Par[iC](i, 1, 0);
2075           tmp_pos[2] = m_Par[iC](i, 2, 0);
2076           m_msErr.copyIn(i, tmp_err);
2077           m_msPar.copyIn(i, tmp_pos);
2078         }
2079       }
2080 
2081       if (count == 0)
2082         continue;
2083 
2084       // ZZZ Could add missing hits here, only if there are any actual matches.
2085 
2086       if (LI.is_barrel()) {
2087         propagateTracksToHitR(m_msPar, N_proc, m_prop_config->backward_fit_pflags);
2088 
2089         kalmanOperation(KFO_Calculate_Chi2 | KFO_Update_Params | KFO_Local_Cov,
2090                         m_Err[iP],
2091                         m_Par[iP],
2092                         m_msErr,
2093                         m_msPar,
2094                         m_Err[iC],
2095                         m_Par[iC],
2096                         tmp_chi2,
2097                         N_proc);
2098       } else {
2099         propagateTracksToHitZ(m_msPar, N_proc, m_prop_config->backward_fit_pflags);
2100 
2101         kalmanOperationEndcap(KFO_Calculate_Chi2 | KFO_Update_Params,
2102                               m_Err[iP],
2103                               m_Par[iP],
2104                               m_msErr,
2105                               m_msPar,
2106                               m_Err[iC],
2107                               m_Par[iC],
2108                               tmp_chi2,
2109                               N_proc);
2110       }
2111 
2112       //fixup invpt sign and charge
2113       for (int n = 0; n < NN; ++n) {
2114         if (n < N_proc && m_Par[iC].At(n, 3, 0) < 0) {
2115           m_Chg.At(n, 0, 0) = -m_Chg.At(n, 0, 0);
2116           m_Par[iC].At(n, 3, 0) = -m_Par[iC].At(n, 3, 0);
2117         }
2118       }
2119 
2120 #ifdef DEBUG_BACKWARD_FIT_BH
2121       // Dump per hit chi2
2122       for (int i = 0; i < N_proc; ++i) {
2123         float r_h = std::hypot(m_msPar.At(i, 0, 0), m_msPar.At(i, 1, 0));
2124         float r_t = std::hypot(m_Par[iC].At(i, 0, 0), m_Par[iC].At(i, 1, 0));
2125 
2126         // if ((std::isnan(tmp_chi2[i]) || std::isnan(r_t)))
2127         // if ( ! std::isnan(tmp_chi2[i]) && tmp_chi2[i] > 0) // && tmp_chi2[i] > 30)
2128         if (chiDebug) {
2129           int ti = iP;
2130           printf(
2131               "CHIHIT %3d %10g %10g %10g %10g %10g %11.5g %11.5g %11.5g %10g %10g %10g %10g %11.5g %11.5g %11.5g %10g "
2132               "%10g %10g %10g %10g %11.5g %11.5g\n",
2133               layer,
2134               tmp_chi2[i],
2135               m_msPar.At(i, 0, 0),
2136               m_msPar.At(i, 1, 0),
2137               m_msPar.At(i, 2, 0),
2138               r_h,  // x_h y_h z_h r_h -- hit pos
2139               e2s(m_msErr.At(i, 0, 0)),
2140               e2s(m_msErr.At(i, 1, 1)),
2141               e2s(m_msErr.At(i, 2, 2)),  // ex_h ey_h ez_h -- hit errors
2142               m_Par[ti].At(i, 0, 0),
2143               m_Par[ti].At(i, 1, 0),
2144               m_Par[ti].At(i, 2, 0),
2145               r_t,  // x_t y_t z_t r_t -- track pos
2146               e2s(m_Err[ti].At(i, 0, 0)),
2147               e2s(m_Err[ti].At(i, 1, 1)),
2148               e2s(m_Err[ti].At(i, 2, 2)),  // ex_t ey_t ez_t -- track errors
2149               1.0f / m_Par[ti].At(i, 3, 0),
2150               m_Par[ti].At(i, 4, 0),
2151               m_Par[ti].At(i, 5, 0),                                     // pt, phi, theta
2152               std::atan2(m_msPar.At(i, 1, 0), m_msPar.At(i, 0, 0)),      // phi_h
2153               std::atan2(m_Par[ti].At(i, 1, 0), m_Par[ti].At(i, 0, 0)),  // phi_t
2154               1e4f * std::hypot(m_msPar.At(i, 0, 0) - m_Par[ti].At(i, 0, 0),
2155                                 m_msPar.At(i, 1, 0) - m_Par[ti].At(i, 1, 0)),  // d_xy
2156               1e4f * (m_msPar.At(i, 2, 0) - m_Par[ti].At(i, 2, 0))             // d_z
2157               // e2s((m_msErr.At(i,0,0) + m_msErr.At(i,1,1)) / (r_h * r_h)),     // ephi_h
2158               // e2s((m_Err[ti].At(i,0,0) + m_Err[ti].At(i,1,1)) / (r_t * r_t))  // ephi_t
2159           );
2160         }
2161       }
2162 #endif
2163 
2164       // update chi2
2165       m_Chi2.add(tmp_chi2);
2166     }
2167   }
2168 
2169   //------------------------------------------------------------------------------
2170 
2171   void MkFinder::print_par_err(int corp, int mslot) const {
2172 #ifdef DEBUG
2173     printf("Parameters:\n");
2174     for (int i = 0; i < 6; ++i) {
2175       printf("  %12.4g", m_Par[corp].constAt(mslot, i, 0));
2176     }
2177     printf("\nError matrix\n");
2178     for (int i = 0; i < 6; ++i) {
2179       for (int j = 0; j < 6; ++j) {
2180         printf("  %12.4g", m_Err[corp].constAt(mslot, i, j));
2181       }
2182       printf("\n");
2183     }
2184 #endif
2185   }
2186 
2187   void MkFinder::bkFitFitTracks(const EventOfHits &eventofhits,
2188                                 const SteeringParams &st_par,
2189                                 const int N_proc,
2190                                 bool chiDebug) {
2191     // Prototyping final backward fit.
2192     // This works with track-finding indices, before remapping.
2193     //
2194     // Layers should be collected during track finding and list all layers that have actual hits.
2195     // Then we could avoid checking which layers actually do have hits.
2196 
2197     // bool debug = true;
2198 
2199     MPlexQF tmp_chi2;
2200     MPlexQI no_mat_effs;
2201     float tmp_err[6] = {666, 0, 666, 0, 0, 666};
2202     float tmp_pos[3];
2203 
2204 #if defined(DEBUG_PROP_UPDATE)
2205     const int DSLOT = 0;
2206     printf("bkfit entry, track in slot %d\n", DSLOT);
2207     print_par_err(iC, DSLOT);
2208 #endif
2209 
2210     for (auto lp_iter = st_par.make_iterator(SteeringParams::IT_BkwFit); lp_iter.is_valid(); ++lp_iter) {
2211       const int layer = lp_iter.layer();
2212 
2213       const LayerOfHits &L = eventofhits[layer];
2214       const LayerInfo &LI = L.layer_info();
2215 
2216 #if defined(DEBUG_BACKWARD_FIT)
2217       const Hit *last_hit_ptr[NN];
2218 #endif
2219 
2220       no_mat_effs.setVal(0);
2221       int done_count = 0;
2222       int here_count = 0;
2223       for (int i = 0; i < N_proc; ++i) {
2224         while (m_CurNode[i] >= 0 && m_HoTNodeArr[i][m_CurNode[i]].m_hot.index < 0) {
2225           m_CurNode[i] = m_HoTNodeArr[i][m_CurNode[i]].m_prev_idx;
2226         }
2227 
2228         if (m_CurNode[i] < 0)
2229           ++done_count;
2230 
2231         if (m_CurNode[i] >= 0 && m_HoTNodeArr[i][m_CurNode[i]].m_hot.layer == layer) {
2232           // Skip the overlap hits -- if they exist.
2233           // 1. Overlap hit gets placed *after* the original hit in TrackCand::exportTrack()
2234           // which is *before* in the reverse iteration that we are doing here.
2235           // 2. Seed-hit merging can result in more than two hits per layer.
2236           // while (m_CurHit[i] > 0 && m_HoTArr[ i ][ m_CurHit[i] - 1 ].layer == layer) --m_CurHit[i];
2237           while (m_HoTNodeArr[i][m_CurNode[i]].m_prev_idx >= 0 &&
2238                  m_HoTNodeArr[i][m_HoTNodeArr[i][m_CurNode[i]].m_prev_idx].m_hot.layer == layer)
2239             m_CurNode[i] = m_HoTNodeArr[i][m_CurNode[i]].m_prev_idx;
2240 
2241           const Hit &hit = L.refHit(m_HoTNodeArr[i][m_CurNode[i]].m_hot.index);
2242 
2243 #ifdef DEBUG_BACKWARD_FIT
2244           last_hit_ptr[i] = &hit;
2245 #endif
2246           m_msErr.copyIn(i, hit.errArray());
2247           m_msPar.copyIn(i, hit.posArray());
2248           ++here_count;
2249 
2250           m_CurNode[i] = m_HoTNodeArr[i][m_CurNode[i]].m_prev_idx;
2251         } else {
2252 #ifdef DEBUG_BACKWARD_FIT
2253           last_hit_ptr[i] = nullptr;
2254 #endif
2255           no_mat_effs[i] = 1;
2256           tmp_pos[0] = m_Par[iC](i, 0, 0);
2257           tmp_pos[1] = m_Par[iC](i, 1, 0);
2258           tmp_pos[2] = m_Par[iC](i, 2, 0);
2259           m_msErr.copyIn(i, tmp_err);
2260           m_msPar.copyIn(i, tmp_pos);
2261         }
2262       }
2263 
2264       if (done_count == N_proc)
2265         break;
2266       if (here_count == 0)
2267         continue;
2268 
2269       // ZZZ Could add missing hits here, only if there are any actual matches.
2270 
2271       clearFailFlag();
2272 
2273       // PROP-FAIL-ENABLE Once always "copy input to output on fail" is removed from
2274       // propagateToR one might want to enable this for barrel or endcap or both.
2275       if (LI.is_barrel()) {
2276         propagateTracksToHitR(m_msPar, N_proc, m_prop_config->backward_fit_pflags, &no_mat_effs);
2277 
2278         kalmanOperation(KFO_Calculate_Chi2 | KFO_Update_Params | KFO_Local_Cov,
2279                         m_Err[iP],
2280                         m_Par[iP],
2281                         m_msErr,
2282                         m_msPar,
2283                         m_Err[iC],
2284                         m_Par[iC],
2285                         tmp_chi2,
2286                         N_proc);
2287       } else {
2288         propagateTracksToHitZ(m_msPar, N_proc, m_prop_config->backward_fit_pflags, &no_mat_effs);
2289 
2290         kalmanOperationEndcap(KFO_Calculate_Chi2 | KFO_Update_Params,
2291                               m_Err[iP],
2292                               m_Par[iP],
2293                               m_msErr,
2294                               m_msPar,
2295                               m_Err[iC],
2296                               m_Par[iC],
2297                               tmp_chi2,
2298                               N_proc);
2299       }
2300 
2301 #if defined(DEBUG_PROP_UPDATE)
2302       printf("\nbkfit at layer %d, track in slot %d -- fail=%d, had hit=%d (%g, %g, %g)\n",
2303              LI.layer_id(),
2304              DSLOT,
2305              m_FailFlag[DSLOT],
2306              1 - no_mat_effs[DSLOT],
2307              m_msPar(DSLOT, 0, 0),
2308              m_msPar(DSLOT, 1, 0),
2309              m_msPar(DSLOT, 2, 0));
2310       printf("Propagated:\n");
2311       print_par_err(iP, DSLOT);
2312       printf("Updated:\n");
2313       print_par_err(iC, DSLOT);
2314 #endif
2315 
2316       // Fixup for failed propagation or invpt sign and charge.
2317       for (int i = 0; i < NN; ++i) {
2318         // PROP-FAIL-ENABLE The following to be enabled when propagation failure
2319         // detection is properly implemented in propagate-to-R/Z.
2320         // 1. The following code was only expecting barrel state to be restored.
2321         //      auto barrel_pf(m_prop_config->backward_fit_pflags);
2322         //      barrel_pf.copy_input_state_on_fail = true;
2323         // 2. There is also check on chi2, commented out to keep physics changes minimal.
2324         /*
2325         if (m_FailFlag[i] && LI.is_barrel()) {
2326           // Barrel pflags are set to include PF_copy_input_state_on_fail.
2327           // Endcap errors are immaterial here (relevant for fwd search), with prop error codes
2328           // one could do other things.
2329           // Are there also fail conditions in KalmanUpdate?
2330 #ifdef DEBUG
2331           if (debug && g_debug) {
2332             dprintf("MkFinder::bkFitFitTracks prop fail: chi2=%f, layer=%d, label=%d. Recovering.\n",
2333                     tmp_chi2[i], LI.layer_id(), m_Label[i]);
2334             print_par_err(iC, i);
2335           }
2336 #endif
2337           m_Err[iC].copySlot(i, m_Err[iP]);
2338           m_Par[iC].copySlot(i, m_Par[iP]);
2339         } else if (tmp_chi2[i] > 200 || tmp_chi2[i] < 0) {
2340 #ifdef DEBUG
2341           if (debug && g_debug) {
2342             dprintf("MkFinder::bkFitFitTracks chi2 fail: chi2=%f, layer=%d, label=%d. Recovering.\n",
2343                     tmp_chi2[i], LI.layer_id(), m_Label[i]);
2344             print_par_err(iC, i);
2345           }
2346 #endif
2347           // Go back to propagated state (at the current hit, the previous one is lost).
2348           m_Err[iC].copySlot(i, m_Err[iP]);
2349           m_Par[iC].copySlot(i, m_Par[iP]);
2350         }
2351         */
2352         // Fixup invpt sign and charge.
2353         if (i < N_proc && m_Par[iC].At(i, 3, 0) < 0) {
2354           m_Chg.At(i, 0, 0) = -m_Chg.At(i, 0, 0);
2355           m_Par[iC].At(i, 3, 0) = -m_Par[iC].At(i, 3, 0);
2356         }
2357       }
2358 
2359 #if defined(DEBUG_BACKWARD_FIT)
2360       // clang-format off
2361       bool debug = true;
2362       const char beg_cur_sep = '/'; // set to ' ' root parsable printouts
2363       for (int i = 0; i < N_proc; ++i) {
2364         if (chiDebug && last_hit_ptr[i]) {
2365           TrackCand &bb = *m_TrkCand[i];
2366           int ti = iP;
2367           float chi = tmp_chi2.At(i, 0, 0);
2368           float chi_prnt = std::isfinite(chi) ? chi : -9;
2369 
2370 #if defined(MKFIT_STANDALONE)
2371           const MCHitInfo &mchi = m_event->simHitsInfo_[last_hit_ptr[i]->mcHitID()];
2372 
2373           dprintf("BKF_OVERLAP %d %d %d %d %d %d %d "
2374                   "%f%c%f %f %f%c%f %f %f %f %d %d %d %d "
2375                   "%f %f %f %f %f\n",
2376               m_event->evtID(),
2377 #else
2378           dprintf("BKF_OVERLAP %d %d %d %d %d %d "
2379                   "%f%c%f %f %f%c%f %f %f %f %d %d %d "
2380                   "%f %f %f %f %f\n",
2381 #endif
2382               bb.label(), (int)bb.prodType(), bb.isFindable(),
2383               layer, L.is_stereo(), L.is_barrel(),
2384               bb.pT(), beg_cur_sep, 1.0f / m_Par[ti].At(i, 3, 0),
2385               bb.posEta(),
2386               bb.posPhi(), beg_cur_sep, std::atan2(m_Par[ti].At(i, 1, 0), m_Par[ti].At(i, 0, 0)),
2387               std::hypot(m_Par[ti].At(i, 0, 0), m_Par[ti].At(i, 1, 0)),
2388               m_Par[ti].At(i, 2, 0),
2389               chi_prnt,
2390               std::isnan(chi), std::isfinite(chi), chi > 0,
2391 #if defined(MKFIT_STANDALONE)
2392               mchi.mcTrackID(),
2393 #endif
2394               // The following three can get negative / prouce nans in e2s.
2395               // std::abs the args for FPE hunt.
2396               e2s(std::abs(m_Err[ti].At(i, 0, 0))),
2397               e2s(std::abs(m_Err[ti].At(i, 1, 1))),
2398               e2s(std::abs(m_Err[ti].At(i, 2, 2))),  // sx_t sy_t sz_t -- track errors
2399               1e4f * std::hypot(m_msPar.At(i, 0, 0) - m_Par[ti].At(i, 0, 0),
2400                                 m_msPar.At(i, 1, 0) - m_Par[ti].At(i, 1, 0)),  // d_xy
2401               1e4f * (m_msPar.At(i, 2, 0) - m_Par[ti].At(i, 2, 0))             // d_z
2402           );
2403         }
2404       }
2405       // clang-format on
2406 #endif
2407 
2408       // update chi2
2409       m_Chi2.add(tmp_chi2);
2410     }
2411   }
2412 
2413   //------------------------------------------------------------------------------
2414 
2415   void MkFinder::bkFitPropTracksToPCA(const int N_proc) {
2416     propagateTracksToPCAZ(N_proc, m_prop_config->pca_prop_pflags);
2417   }
2418 
2419 }  // end namespace mkfit