Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2025-01-18 03:42:20

0001 #include "MkFinder.h"
0002 
0003 #include "RecoTracker/MkFitCore/interface/cms_common_macros.h"
0004 #include "RecoTracker/MkFitCore/interface/IterationConfig.h"
0005 #include "CandCloner.h"
0006 #include "FindingFoos.h"
0007 #include "KalmanUtilsMPlex.h"
0008 #include "MatriplexPackers.h"
0009 #include "MiniPropagators.h"
0010 
0011 //#define DEBUG
0012 #include "Debug.h"
0013 
0014 #if defined(MKFIT_STANDALONE)
0015 #include "RecoTracker/MkFitCore/standalone/Event.h"
0016 #endif
0017 
0018 #ifdef RNT_DUMP_MkF_SelHitIdcs
0019 // declares struct RntIfc_selectHitIndices rnt_shi in unnamed namespace;
0020 #include "RecoTracker/MkFitCore/standalone/RntDumper/MkFinder_selectHitIndices.icc"
0021 #endif
0022 
0023 #include <vdt/atan2.h>
0024 #include <vdt/sin.h>
0025 #include <vdt/tan.h>
0026 
0027 #include <algorithm>
0028 #include <queue>
0029 
0030 namespace mkfit {
0031 
0032   void MkFinder::setup(const PropagationConfig &pc,
0033                        const IterationConfig &ic,
0034                        const IterationParams &ip,
0035                        const IterationLayerConfig &ilc,
0036                        const SteeringParams &sp,
0037                        const std::vector<bool> *ihm,
0038                        const Event *ev,
0039                        int region,
0040                        bool infwd) {
0041     m_prop_config = &pc;
0042     m_iteration_config = &ic;
0043     m_iteration_params = &ip;
0044     m_iteration_layer_config = &ilc;
0045     m_steering_params = &sp;
0046     m_iteration_hit_mask = ihm;
0047     m_event = ev;
0048     m_current_region = region;
0049     m_in_fwd = infwd;
0050   }
0051 
0052   void MkFinder::setup_bkfit(const PropagationConfig &pc, const SteeringParams &sp, const Event *ev) {
0053     m_prop_config = &pc;
0054     m_steering_params = &sp;
0055     m_event = ev;
0056   }
0057 
0058   void MkFinder::release() {
0059     m_prop_config = nullptr;
0060     m_iteration_config = nullptr;
0061     m_iteration_params = nullptr;
0062     m_iteration_layer_config = nullptr;
0063     m_steering_params = nullptr;
0064     m_iteration_hit_mask = nullptr;
0065     m_event = nullptr;
0066     m_current_region = -1;
0067     m_in_fwd = true;
0068   }
0069 
0070   void MkFinder::begin_layer(const LayerOfHits &layer_of_hits) {
0071 #ifdef RNT_DUMP_MkF_SelHitIdcs
0072     const LayerOfHits &L = layer_of_hits;
0073     const LayerInfo &LI = L.layer_info();
0074     rnt_shi.ResetH();
0075     rnt_shi.ResetF();
0076     *rnt_shi.h = {m_event->evtID(),
0077                   m_iteration_config->m_iteration_index,
0078                   m_iteration_config->m_track_algorithm,
0079                   m_current_region,
0080                   L.layer_id(),
0081                   L.is_barrel() ? LI.rin() : LI.zmin(),
0082                   LI.is_barrel() ? LI.rout() : LI.zmax(),
0083                   L.is_barrel(),
0084                   L.is_pixel(),
0085                   L.is_stereo()};
0086     *rnt_shi.f = *rnt_shi.h;
0087 #endif
0088   }
0089 
0090   void MkFinder::end_layer() {
0091 #ifdef RNT_DUMP_MkF_SelHitIdcs
0092     rnt_shi.FillH();
0093     rnt_shi.FillF();
0094 #endif
0095   }
0096 
0097   //==============================================================================
0098   // Input / Output TracksAndHitIdx
0099   //==============================================================================
0100 
0101   void MkFinder::inputTracksAndHitIdx(const std::vector<Track> &tracks, int beg, int end, bool inputProp) {
0102     // Assign track parameters to initial state and copy hit values in.
0103 
0104     // This might not be true for the last chunk!
0105     // assert(end - beg == NN);
0106 
0107     const int iI = inputProp ? iP : iC;
0108 
0109     for (int i = beg, imp = 0; i < end; ++i, ++imp) {
0110       copy_in(tracks[i], imp, iI);
0111     }
0112   }
0113 
0114   void MkFinder::inputTracksAndHitIdx(
0115       const std::vector<Track> &tracks, const std::vector<int> &idxs, int beg, int end, bool inputProp, int mp_offset) {
0116     // Assign track parameters to initial state and copy hit values in.
0117 
0118     // This might not be true for the last chunk!
0119     // assert(end - beg == NN);
0120 
0121     const int iI = inputProp ? iP : iC;
0122 
0123     for (int i = beg, imp = mp_offset; i < end; ++i, ++imp) {
0124       copy_in(tracks[idxs[i]], imp, iI);
0125     }
0126   }
0127 
0128   void MkFinder::inputTracksAndHitIdx(const std::vector<CombCandidate> &tracks,
0129                                       const std::vector<std::pair<int, int>> &idxs,
0130                                       int beg,
0131                                       int end,
0132                                       bool inputProp) {
0133     // Assign track parameters to initial state and copy hit values in.
0134 
0135     // This might not be true for the last chunk!
0136     // assert(end - beg == NN);
0137 
0138     const int iI = inputProp ? iP : iC;
0139 
0140     for (int i = beg, imp = 0; i < end; ++i, ++imp) {
0141       const TrackCand &trk = tracks[idxs[i].first][idxs[i].second];
0142 
0143       copy_in(trk, imp, iI);
0144 
0145       m_SeedIdx(imp, 0, 0) = idxs[i].first;
0146       m_CandIdx(imp, 0, 0) = idxs[i].second;
0147       m_SeedOriginIdx[imp] = tracks[idxs[i].first].seed_origin_index();
0148     }
0149   }
0150 
0151   void MkFinder::inputTracksAndHits(const std::vector<CombCandidate> &tracks,
0152                                     const LayerOfHits &layer_of_hits,
0153                                     const std::vector<UpdateIndices> &idxs,
0154                                     int beg,
0155                                     int end,
0156                                     bool inputProp) {
0157     // Assign track parameters to initial state and copy hit values in.
0158 
0159     // This might not be true for the last chunk!
0160     // assert(end - beg == NN);
0161 
0162     const int iI = inputProp ? iP : iC;
0163 
0164     for (int i = beg, imp = 0; i < end; ++i, ++imp) {
0165       const TrackCand &trk = tracks[idxs[i].seed_idx][idxs[i].cand_idx];
0166 
0167       copy_in(trk, imp, iI);
0168 
0169       m_SeedIdx(imp, 0, 0) = idxs[i].seed_idx;
0170       m_CandIdx(imp, 0, 0) = idxs[i].cand_idx;
0171       m_SeedOriginIdx[imp] = tracks[idxs[i].seed_idx].seed_origin_index();
0172 
0173       // Reuse selectHitIndices() arrays -- used also in packModuleNormDir()
0174       m_XHitArr(imp, 0, 0) = idxs[i].hit_idx;
0175       m_XHitSize(imp, 0, 0) = 1;
0176 
0177       const Hit &hit = layer_of_hits.refHit(idxs[i].hit_idx);
0178       m_msErr.copyIn(imp, hit.errArray());
0179       m_msPar.copyIn(imp, hit.posArray());
0180     }
0181   }
0182 
0183   void MkFinder::inputOverlapHits(const LayerOfHits &layer_of_hits,
0184                                   const std::vector<UpdateIndices> &idxs,
0185                                   int beg,
0186                                   int end) {
0187     // Copy overlap hit values in.
0188 
0189     for (int i = beg, imp = 0; i < end; ++i, ++imp) {
0190       const Hit &hit = layer_of_hits.refHit(idxs[i].ovlp_idx);
0191       m_msErr.copyIn(imp, hit.errArray());
0192       m_msPar.copyIn(imp, hit.posArray());
0193     }
0194   }
0195 
0196   void MkFinder::inputTracksAndHitIdx(const std::vector<CombCandidate> &tracks,
0197                                       const std::vector<std::pair<int, IdxChi2List>> &idxs,
0198                                       int beg,
0199                                       int end,
0200                                       bool inputProp) {
0201     // Assign track parameters to initial state and copy hit values in.
0202 
0203     // This might not be true for the last chunk!
0204     // assert(end - beg == NN);
0205 
0206     const int iI = inputProp ? iP : iC;
0207 
0208     for (int i = beg, imp = 0; i < end; ++i, ++imp) {
0209       const TrackCand &trk = tracks[idxs[i].first][idxs[i].second.trkIdx];
0210 
0211       copy_in(trk, imp, iI);
0212 
0213       m_SeedIdx(imp, 0, 0) = idxs[i].first;
0214       m_CandIdx(imp, 0, 0) = idxs[i].second.trkIdx;
0215       m_SeedOriginIdx[imp] = tracks[idxs[i].first].seed_origin_index();
0216     }
0217   }
0218 
0219   void MkFinder::outputTracksAndHitIdx(std::vector<Track> &tracks, int beg, int end, bool outputProp) const {
0220     // Copies requested track parameters into Track objects.
0221     // The tracks vector should be resized to allow direct copying.
0222 
0223     const int iO = outputProp ? iP : iC;
0224 
0225     for (int i = beg, imp = 0; i < end; ++i, ++imp) {
0226       copy_out(tracks[i], imp, iO);
0227     }
0228   }
0229 
0230   void MkFinder::outputTracksAndHitIdx(
0231       std::vector<Track> &tracks, const std::vector<int> &idxs, int beg, int end, bool outputProp) const {
0232     // Copies requested track parameters into Track objects.
0233     // The tracks vector should be resized to allow direct copying.
0234 
0235     const int iO = outputProp ? iP : iC;
0236 
0237     for (int i = beg, imp = 0; i < end; ++i, ++imp) {
0238       copy_out(tracks[idxs[i]], imp, iO);
0239     }
0240   }
0241 
0242   void MkFinder::packModuleNormDirPnt(
0243       const LayerOfHits &layer_of_hits, int hit_cnt, MPlexHV &norm, MPlexHV &dir, MPlexHV &pnt, int N_proc) const {
0244     for (int itrack = 0; itrack < NN; ++itrack) {
0245       if (itrack < N_proc && hit_cnt < m_XHitSize[itrack]) {
0246         const auto &hit = layer_of_hits.refHit(m_XHitArr.constAt(itrack, hit_cnt, 0));
0247         unsigned int mid = hit.detIDinLayer();
0248         const ModuleInfo &mi = layer_of_hits.layer_info().module_info(mid);
0249         norm.At(itrack, 0, 0) = mi.zdir[0];
0250         norm.At(itrack, 1, 0) = mi.zdir[1];
0251         norm.At(itrack, 2, 0) = mi.zdir[2];
0252         dir.At(itrack, 0, 0) = mi.xdir[0];
0253         dir.At(itrack, 1, 0) = mi.xdir[1];
0254         dir.At(itrack, 2, 0) = mi.xdir[2];
0255         pnt.At(itrack, 0, 0) = mi.pos[0];
0256         pnt.At(itrack, 1, 0) = mi.pos[1];
0257         pnt.At(itrack, 2, 0) = mi.pos[2];
0258         //std::cout << "packModuleNormDirPnt id=" << hit_cnt << " norm=(" << mi.zdir[0] << ", " << mi.zdir[1] << ", " << mi.zdir[2] << ") - dir=(" << mi.xdir[0] << ", " << mi.xdir[1] << ", " << mi.xdir[2] << ") -  pnt=(" << mi.pos[0] << ", " << mi.pos[1] << ", " << mi.pos[2] << ")" << std::endl;
0259       }
0260     }
0261   }
0262 
0263   //==============================================================================
0264   // getHitSelDynamicWindows
0265   //==============================================================================
0266   // From HitSelectionWindows.h: track-related config on hit selection windows
0267 
0268   void MkFinder::getHitSelDynamicWindows(
0269       const float invpt, const float theta, float &min_dq, float &max_dq, float &min_dphi, float &max_dphi) {
0270     float max_invpt = std::min(invpt, 10.0f);  // => pT>0.1 GeV
0271 
0272     enum SelWinParameters_e { dp_sf = 0, dp_0, dp_1, dp_2, dq_sf, dq_0, dq_1, dq_2 };
0273     auto &v = m_iteration_layer_config->get_window_params(m_in_fwd, true);
0274 
0275     if (!v.empty()) {
0276       // dq hit selection window
0277       const float this_dq = v[dq_sf] * (v[dq_0] * max_invpt + v[dq_1] * theta + v[dq_2]);
0278       // In case value is below 0 (bad window derivation or other reasons), leave original limits
0279       if (this_dq > 0.f) {
0280         min_dq = this_dq;
0281         max_dq = 2.0f * min_dq;
0282       }
0283 
0284       // dphi hit selection window
0285       const float this_dphi = v[dp_sf] * (v[dp_0] * max_invpt + v[dp_1] * theta + v[dp_2]);
0286       // In case value is too low (bad window derivation or other reasons), leave original limits
0287       if (this_dphi > min_dphi) {
0288         min_dphi = this_dphi;
0289         max_dphi = 2.0f * min_dphi;
0290       }
0291     }
0292   }
0293 
0294   //==============================================================================
0295   // getHitSelDynamicChi2Cut
0296   //==============================================================================
0297   // From HitSelectionWindows.h: track-related config on hit selection windows
0298 
0299   inline float MkFinder::getHitSelDynamicChi2Cut(const int itrk, const int ipar) {
0300     const float minChi2Cut = m_iteration_params->chi2Cut_min;
0301     const float invpt = m_Par[ipar].At(itrk, 3, 0);
0302     const float theta = std::abs(m_Par[ipar].At(itrk, 5, 0) - Const::PIOver2);
0303 
0304     float max_invpt = std::min(invpt, 10.0f);  // => pT>0.1 GeV
0305 
0306     enum SelWinParameters_e { c2_sf = 8, c2_0, c2_1, c2_2 };
0307     auto &v = m_iteration_layer_config->get_window_params(m_in_fwd, true);
0308 
0309     if (!v.empty()) {
0310       float this_c2 = v[c2_sf] * (v[c2_0] * max_invpt + v[c2_1] * theta + v[c2_2]);
0311       // In case value is too low (bad window derivation or other reasons), leave original limits
0312       if (this_c2 > minChi2Cut)
0313         return this_c2;
0314     }
0315     return minChi2Cut;
0316   }
0317 
0318   //==============================================================================
0319   // SelectHitIndices
0320   //==============================================================================
0321 
0322   void MkFinder::selectHitIndices(const LayerOfHits &layer_of_hits, const int N_proc, bool fill_binsearch_only) {
0323     // bool debug = true;
0324     using bidx_t = LayerOfHits::bin_index_t;
0325     using bcnt_t = LayerOfHits::bin_content_t;
0326     const LayerOfHits &L = layer_of_hits;
0327     const IterationLayerConfig &ILC = *m_iteration_layer_config;
0328 
0329     const int iI = iP;
0330     const float nSigmaPhi = 3;
0331     const float nSigmaZ = 3;
0332     const float nSigmaR = 3;
0333 
0334     dprintf("LayerOfHits::SelectHitIndices %s layer=%d N_proc=%d\n",
0335             L.is_barrel() ? "barrel" : "endcap",
0336             L.layer_id(),
0337             N_proc);
0338 
0339     float dqv[NN], dphiv[NN], qv[NN], phiv[NN];
0340     bidx_t qb1v[NN], qb2v[NN], qbv[NN], pb1v[NN], pb2v[NN];
0341 
0342     const auto assignbins = [&](int itrack,
0343                                 float q,
0344                                 float dq,
0345                                 float phi,
0346                                 float dphi,
0347                                 float min_dq,
0348                                 float max_dq,
0349                                 float min_dphi,
0350                                 float max_dphi) {
0351       dphi = std::clamp(std::abs(dphi), min_dphi, max_dphi);
0352       dq = std::clamp(dq, min_dq, max_dq);
0353       //
0354       qv[itrack] = q;
0355       phiv[itrack] = phi;
0356       dphiv[itrack] = dphi;
0357       dqv[itrack] = dq;
0358       //
0359       qbv[itrack] = L.qBinChecked(q);
0360       qb1v[itrack] = L.qBinChecked(q - dq);
0361       qb2v[itrack] = L.qBinChecked(q + dq) + 1;
0362       pb1v[itrack] = L.phiBinChecked(phi - dphi);
0363       pb2v[itrack] = L.phiMaskApply(L.phiBin(phi + dphi) + 1);
0364     };
0365 
0366     const auto calcdphi2 = [&](int itrack, float dphidx, float dphidy) {
0367       return dphidx * dphidx * m_Err[iI].constAt(itrack, 0, 0) + dphidy * dphidy * m_Err[iI].constAt(itrack, 1, 1) +
0368              2 * dphidx * dphidy * m_Err[iI].constAt(itrack, 0, 1);
0369     };
0370 
0371     const auto calcdphi = [&](float dphi2, float min_dphi) {
0372       return std::max(nSigmaPhi * std::sqrt(std::abs(dphi2)), min_dphi);
0373     };
0374 
0375     if (L.is_barrel()) {
0376       // Pull out the part of the loop that vectorizes with icc and gcc
0377       // In llvm16 clang issues a warning that it can't vectorize
0378       // the loop.  Unfortunately, there doesn't seem to be a
0379       // pragma to suppress the warning, so we ifdef it out.  This
0380       // should be rechecked if llvm vectorization improves.
0381 #if !defined(__clang__)
0382 #pragma omp simd
0383 #endif
0384       for (int itrack = 0; itrack < NN; ++itrack) {
0385         if (itrack >= N_proc)
0386           continue;
0387         m_XHitSize[itrack] = 0;
0388 
0389         float min_dq = ILC.min_dq();
0390         float max_dq = ILC.max_dq();
0391         float min_dphi = ILC.min_dphi();
0392         float max_dphi = ILC.max_dphi();
0393 
0394         const float invpt = m_Par[iI].At(itrack, 3, 0);
0395         const float theta = std::fabs(m_Par[iI].At(itrack, 5, 0) - Const::PIOver2);
0396         getHitSelDynamicWindows(invpt, theta, min_dq, max_dq, min_dphi, max_dphi);
0397 
0398         const float x = m_Par[iI].constAt(itrack, 0, 0);
0399         const float y = m_Par[iI].constAt(itrack, 1, 0);
0400         const float r2 = x * x + y * y;
0401         const float dphidx = -y / r2, dphidy = x / r2;
0402         const float dphi2 = calcdphi2(itrack, dphidx, dphidy);
0403 #ifdef HARD_CHECK
0404         assert(dphi2 >= 0);
0405 #endif
0406 
0407         const float phi = getPhi(x, y);
0408         float dphi = calcdphi(dphi2, min_dphi);
0409 
0410         const float z = m_Par[iI].constAt(itrack, 2, 0);
0411         const float dz = std::abs(nSigmaZ * std::sqrt(m_Err[iI].constAt(itrack, 2, 2)));
0412         const float edgeCorr = std::abs(0.5f * (L.layer_info().rout() - L.layer_info().rin()) /
0413                                         vdt::fast_tanf(m_Par[iI].constAt(itrack, 5, 0)));
0414         // XXX-NUM-ERR above, m_Err(2,2) gets negative!
0415 
0416         m_XWsrResult[itrack] = L.is_within_z_sensitive_region(z, std::sqrt(dz * dz + edgeCorr * edgeCorr));
0417         assignbins(itrack, z, dz, phi, dphi, min_dq, max_dq, min_dphi, max_dphi);
0418 
0419         // Relax propagation-fail detection to be in line with pre-43145.
0420         if (m_FailFlag[itrack] && r2 >= sqr(L.layer_info().rin())) {
0421           m_FailFlag[itrack] = 0;
0422         }
0423       }
0424     } else  // endcap
0425     {
0426       //layer half-thikness for dphi spread calculation; only for very restrictive iters
0427       const float layerD = std::abs(L.layer_info().zmax() - L.layer_info().zmin()) * 0.5f *
0428                            (m_iteration_params->maxConsecHoles == 0 || m_iteration_params->maxHolesPerCand == 0);
0429       // Pull out the part of the loop that vectorizes with icc and gcc
0430 #if !defined(__clang__)
0431 #pragma omp simd
0432 #endif
0433       for (int itrack = 0; itrack < NN; ++itrack) {
0434         m_XHitSize[itrack] = 0;
0435 
0436         float min_dq = ILC.min_dq();
0437         float max_dq = ILC.max_dq();
0438         float min_dphi = ILC.min_dphi();
0439         float max_dphi = ILC.max_dphi();
0440 
0441         const float invpt = m_Par[iI].At(itrack, 3, 0);
0442         const float theta = std::fabs(m_Par[iI].At(itrack, 5, 0) - Const::PIOver2);
0443         getHitSelDynamicWindows(invpt, theta, min_dq, max_dq, min_dphi, max_dphi);
0444 
0445         const float x = m_Par[iI].constAt(itrack, 0, 0);
0446         const float y = m_Par[iI].constAt(itrack, 1, 0);
0447         const float r2 = x * x + y * y;
0448         const float r2Inv = 1.f / r2;
0449         const float dphidx = -y * r2Inv, dphidy = x * r2Inv;
0450         const float phi = getPhi(x, y);
0451         const float tanT = vdt::fast_tanf(m_Par[iI].At(itrack, 5, 0));
0452         const float dphi2 = calcdphi2(itrack, dphidx, dphidy)
0453                             //range from finite layer thickness
0454                             + std::pow(layerD * tanT * vdt::fast_sinf(m_Par[iI].At(itrack, 4, 0) - phi), 2) * r2Inv;
0455 #ifdef HARD_CHECK
0456         assert(dphi2 >= 0);
0457 #endif
0458 
0459         float dphi = calcdphi(dphi2, min_dphi);
0460 
0461         const float r = std::sqrt(r2);
0462         const float dr = nSigmaR * std::sqrt(std::abs(x * x * m_Err[iI].constAt(itrack, 0, 0) +
0463                                                       y * y * m_Err[iI].constAt(itrack, 1, 1) +
0464                                                       2 * x * y * m_Err[iI].constAt(itrack, 0, 1)) /
0465                                              r2);
0466         const float edgeCorr = std::abs(0.5f * (L.layer_info().zmax() - L.layer_info().zmin()) * tanT);
0467 
0468         m_XWsrResult[itrack] = L.is_within_r_sensitive_region(r, std::sqrt(dr * dr + edgeCorr * edgeCorr));
0469         assignbins(itrack, r, dr, phi, dphi, min_dq, max_dq, min_dphi, max_dphi);
0470       }
0471     }
0472 
0473 #ifdef RNT_DUMP_MkF_SelHitIdcs
0474     if (fill_binsearch_only) {
0475       // XXX loop over good indices (prepared in V2) and put in V1 BinSearch results
0476       for (auto i : rnt_shi.f_h_idcs) {
0477         CandInfo &ci = (*rnt_shi.ci)[rnt_shi.f_h_remap[i]];
0478         ci.bso = BinSearch({phiv[i],
0479                             dphiv[i],
0480                             qv[i],
0481                             dqv[i],
0482                             pb1v[i],
0483                             pb2v[i],
0484                             qb1v[i],
0485                             qb2v[i],
0486                             m_XWsrResult[i].m_wsr,
0487                             m_XWsrResult[i].m_in_gap,
0488                             false});
0489       }
0490       return;
0491     }
0492 #endif
0493 
0494     // Vectorizing this makes it run slower!
0495     //#pragma omp simd
0496     for (int itrack = 0; itrack < NN; ++itrack) {
0497       if (itrack >= N_proc) {
0498         continue;
0499       }
0500       // PROP-FAIL-ENABLE The following to be enabled when propagation failure
0501       // detection is properly implemented in propagate-to-R/Z.
0502       if (m_FailFlag[itrack]) {
0503         m_XWsrResult[itrack].m_wsr = WSR_Failed;
0504         continue;
0505       }
0506 
0507       if (m_XWsrResult[itrack].m_wsr == WSR_Outside) {
0508         continue;
0509       }
0510 
0511       const bidx_t qb = qbv[itrack];
0512       const bidx_t qb1 = qb1v[itrack];
0513       const bidx_t qb2 = qb2v[itrack];
0514       const bidx_t pb1 = pb1v[itrack];
0515       const bidx_t pb2 = pb2v[itrack];
0516 
0517       const float q = qv[itrack];
0518       const float phi = phiv[itrack];
0519       const float dphi = dphiv[itrack];
0520       const float dq = dqv[itrack];
0521 
0522       // clang-format off
0523       dprintf("  %2d/%2d: %6.3f %6.3f %6.6f %7.5f %3u %3u %4u %4u\n",
0524               L.layer_id(), itrack, q, phi, dq, dphi,
0525               qb1, qb2, pb1, pb2);
0526       // clang-format on
0527 
0528 #if defined(DUMPHITWINDOW) && defined(MKFIT_STANDALONE)
0529       const auto ngr = [](float f) { return isFinite(f) ? f : -999.0f; };
0530 
0531       const int seed_lbl = m_event->currentSeed(m_SeedOriginIdx[itrack]).label();
0532       Event::SimLabelFromHits slfh = m_event->simLabelForCurrentSeed(m_SeedOriginIdx[itrack]);
0533       const int seed_mcid = (slfh.is_set() && slfh.good_frac() > 0.7f) ? slfh.label : -999999;
0534 #endif
0535 
0536       for (bidx_t qi = qb1; qi != qb2; ++qi) {
0537         for (bidx_t pi = pb1; pi != pb2; pi = L.phiMaskApply(pi + 1)) {
0538           // Limit to central Q-bin
0539           if (qi == qb && L.isBinDead(pi, qi) == true) {
0540             dprint("dead module for track in layer=" << L.layer_id() << " qb=" << qi << " pi=" << pi << " q=" << q
0541                                                      << " phi=" << phi);
0542             m_XWsrResult[itrack].m_in_gap = true;
0543           }
0544 
0545           // MT: The following line is the biggest hog (4% total run time).
0546           // This comes from cache misses, I presume.
0547           // It might make sense to make first loop to extract bin indices
0548           // and issue prefetches at the same time.
0549           // Then enter vectorized loop to actually collect the hits in proper order.
0550 
0551           //SK: ~20x1024 bin sizes give mostly 1 hit per bin. Commented out for 128 bins or less
0552           // #pragma nounroll
0553           auto pbi = L.phiQBinContent(pi, qi);
0554           for (bcnt_t hi = pbi.begin(); hi < pbi.end(); ++hi) {
0555             // MT: Access into m_hit_zs and m_hit_phis is 1% run-time each.
0556 
0557             const unsigned int hi_orig = L.getOriginalHitIndex(hi);
0558 
0559             if (m_iteration_hit_mask && (*m_iteration_hit_mask)[hi_orig]) {
0560               dprintf(
0561                   "Yay, denying masked hit on layer %u, hi %u, orig idx %u\n", L.layer_info().layer_id(), hi, hi_orig);
0562               continue;
0563             }
0564 
0565             if (Config::usePhiQArrays) {
0566               if (m_XHitSize[itrack] >= MPlexHitIdxMax)
0567                 break;
0568 
0569               const float ddq = std::abs(q - L.hit_q(hi));
0570               const float ddphi = cdist(std::abs(phi - L.hit_phi(hi)));
0571 
0572               // clang-format off
0573               dprintf("     SHI %3u %4u %5u  %6.3f %6.3f %6.4f %7.5f   %s\n",
0574                       qi, pi, hi, L.hit_q(hi), L.hit_phi(hi),
0575                       ddq, ddphi, (ddq < dq && ddphi < dphi) ? "PASS" : "FAIL");
0576               // clang-format on
0577 
0578 #if defined(DUMPHITWINDOW) && defined(MKFIT_STANDALONE)
0579               // clang-format off
0580               MPlexQF thisOutChi2;
0581               {
0582                 const MCHitInfo &mchinfo = m_event->simHitsInfo_[L.refHit(hi).mcHitID()];
0583                 int mchid = mchinfo.mcTrackID();
0584                 int st_isfindable = 0;
0585                 int st_label = -999999;
0586                 int st_prodtype = 0;
0587                 int st_nhits = -1;
0588                 int st_charge = 0;
0589                 float st_r = -999.;
0590                 float st_z = -999.;
0591                 float st_pt = -999.;
0592                 float st_eta = -999.;
0593                 float st_phi = -999.;
0594                 if (mchid >= 0) {
0595                   Track simtrack = m_event->simTracks_[mchid];
0596                   st_isfindable = (int)simtrack.isFindable();
0597                   st_label = simtrack.label();
0598                   st_prodtype = (int)simtrack.prodType();
0599                   st_pt = simtrack.pT();
0600                   st_eta = simtrack.momEta();
0601                   st_phi = simtrack.momPhi();
0602                   st_nhits = simtrack.nTotalHits();
0603                   st_charge = simtrack.charge();
0604                   st_r = simtrack.posR();
0605                   st_z = simtrack.z();
0606                 }
0607 
0608                 const Hit &thishit = L.refHit(hi_orig);
0609                 m_msErr.copyIn(itrack, thishit.errArray());
0610                 m_msPar.copyIn(itrack, thishit.posArray());
0611 
0612                 MPlexQI propFail;
0613                 MPlexLV tmpPropPar;
0614                 const FindingFoos &fnd_foos = FindingFoos::get_finding_foos(L.is_barrel());
0615                 (*fnd_foos.m_compute_chi2_foo)(m_Err[iI],
0616                                                m_Par[iI],
0617                                                m_Chg,
0618                                                m_msErr,
0619                                                m_msPar,
0620                                                thisOutChi2,
0621                                                tmpPropPar,
0622                                                propFail,
0623                                                N_proc,
0624                                                m_prop_config->finding_intra_layer_pflags,
0625                                                m_prop_config->finding_requires_propagation_to_hit_pos);
0626 
0627                 float hx = thishit.x();
0628                 float hy = thishit.y();
0629                 float hz = thishit.z();
0630                 float hr = hipo(hx, hy);
0631                 float hphi = vdt::fast_atan2f(hy, hx);
0632                 float hex = ngr( std::sqrt(thishit.exx()) );
0633                 float hey = ngr( std::sqrt(thishit.eyy()) );
0634                 float hez = ngr( std::sqrt(thishit.ezz()) );
0635                 float her = ngr( std::sqrt(
0636                     (hx * hx * thishit.exx() + hy * hy * thishit.eyy() + 2.0f * hx * hy * m_msErr.At(itrack, 0, 1)) /
0637                     (hr * hr)) );
0638                 float hephi = ngr( std::sqrt(thishit.ephi()) );
0639                 float hchi2 = ngr( thisOutChi2[itrack] );
0640                 float tx = m_Par[iI].At(itrack, 0, 0);
0641                 float ty = m_Par[iI].At(itrack, 1, 0);
0642                 float tz = m_Par[iI].At(itrack, 2, 0);
0643                 float tr = hipo(tx, ty);
0644                 float tphi = std::atan2(ty, tx);
0645                 // float tchi2 = ngr( m_Chi2(itrack, 0, 0) ); // unused
0646                 float tex = ngr( std::sqrt(m_Err[iI].At(itrack, 0, 0)) );
0647                 float tey = ngr( std::sqrt(m_Err[iI].At(itrack, 1, 1)) );
0648                 float tez = ngr( std::sqrt(m_Err[iI].At(itrack, 2, 2)) );
0649                 float ter = ngr( std::sqrt(
0650                     (tx * tx * tex * tex + ty * ty * tey * tey + 2.0f * tx * ty * m_Err[iI].At(itrack, 0, 1)) /
0651                     (tr * tr)) );
0652                 float tephi = ngr( std::sqrt(
0653                     (ty * ty * tex * tex + tx * tx * tey * tey - 2.0f * tx * ty * m_Err[iI].At(itrack, 0, 1)) /
0654                     (tr * tr * tr * tr)) );
0655                 float ht_dxy = hipo(hx - tx, hy - ty);
0656                 float ht_dz = hz - tz;
0657                 float ht_dphi = cdist(std::abs(hphi - tphi));
0658 
0659                 static bool first = true;
0660                 if (first) {
0661                   printf(
0662                       "HITWINDOWSEL "
0663                       "evt_id/I:track_algo/I:"
0664                       "lyr_id/I:lyr_isbrl/I:hit_idx/I:"
0665                       "trk_cnt/I:trk_idx/I:trk_label/I:"
0666                       "trk_pt/F:trk_eta/F:trk_mphi/F:trk_chi2/F:"
0667                       "nhits/I:"
0668                       "seed_idx/I:seed_label/I:seed_mcid/I:"
0669                       "hit_mcid/I:"
0670                       "st_isfindable/I:st_prodtype/I:st_label/I:"
0671                       "st_pt/F:st_eta/F:st_phi/F:"
0672                       "st_nhits/I:st_charge/I:st_r/F:st_z/F:"
0673                       "trk_q/F:hit_q/F:dq_trkhit/F:dq_cut/F:trk_phi/F:hit_phi/F:dphi_trkhit/F:dphi_cut/F:"
0674                       "t_x/F:t_y/F:t_r/F:t_phi/F:t_z/F:"
0675                       "t_ex/F:t_ey/F:t_er/F:t_ephi/F:t_ez/F:"
0676                       "h_x/F:h_y/F:h_r/F:h_phi/F:h_z/F:"
0677                       "h_ex/F:h_ey/F:h_er/F:h_ephi/F:h_ez/F:"
0678                       "ht_dxy/F:ht_dz/F:ht_dphi/F:"
0679                       "h_chi2/F"
0680                       "\n");
0681                   first = false;
0682                 }
0683 
0684                 if (!(std::isnan(phi)) && !(std::isnan(getEta(m_Par[iI].At(itrack, 5, 0))))) {
0685                   //|| std::isnan(ter) || std::isnan(her) || std::isnan(m_Chi2(itrack, 0, 0)) || std::isnan(hchi2)))
0686                   printf("HITWINDOWSEL "
0687                          "%d %d"
0688                          "%d %d %d "
0689                          "%d %d %d "
0690                          "%6.3f %6.3f %6.3f %6.3f "
0691                          "%d "
0692                          "%d %d %d "
0693                          "%d "
0694                          "%d %d %d "
0695                          "%6.3f %6.3f %6.3f "
0696                          "%d %d %6.3f %6.3f "
0697                          "%6.3f %6.3f %6.3f %6.3f %6.3f %6.3f %6.3f %6.3f "
0698                          "%6.3f %6.3f %6.3f %6.3f %6.3f "
0699                          "%6.6f %6.6f %6.6f %6.6f %6.6f "
0700                          "%6.3f %6.3f %6.3f %6.3f %6.3f "
0701                          "%6.6f %6.6f %6.6f %6.6f %6.6f "
0702                          "%6.3f %6.3f %6.3f "
0703                          "%6.3f"
0704                          "\n",
0705                          m_event->evtID(), m_iteration_config->m_track_algorithm,
0706                          L.layer_id(), L.is_barrel(), hi_orig,
0707                          itrack, m_CandIdx(itrack, 0, 0), m_Label(itrack, 0, 0),
0708                          1.0f / m_Par[iI].At(itrack, 3, 0), getEta(m_Par[iI].At(itrack, 5, 0)), m_Par[iI].At(itrack, 4, 0), m_Chi2(itrack, 0, 0),
0709                          m_NFoundHits(itrack, 0, 0),
0710                          m_SeedOriginIdx(itrack, 0, 0), seed_lbl, seed_mcid,
0711                          mchid,
0712                          st_isfindable, st_prodtype, st_label,
0713                          st_pt, st_eta, st_phi,
0714                          st_nhits, st_charge, st_r, st_z,
0715                          q, L.hit_q(hi), ddq, dq, phi, L.hit_phi(hi), ddphi, dphi,
0716                          tx, ty, tr, tphi, tz,
0717                          tex, tey, ter, tephi, tez,
0718                          hx, hy, hr, hphi, hz,
0719                          hex, hey, her, hephi, hez,
0720                          ht_dxy, ht_dz, ht_dphi,
0721                          hchi2);
0722                 }
0723               }
0724               // clang-format on
0725 #endif
0726 
0727               if (ddq >= dq)
0728                 continue;
0729               if (ddphi >= dphi)
0730                 continue;
0731 
0732               // MT: Removing extra check gives full efficiency ...
0733               //     and means our error estimations are wrong!
0734               // Avi says we should have *minimal* search windows per layer.
0735               // Also ... if bins are sufficiently small, we do not need the extra
0736               // checks, see above.
0737               m_XHitArr.At(itrack, m_XHitSize[itrack]++, 0) = hi_orig;
0738             } else {
0739               // MT: The following check alone makes more sense with spiral traversal,
0740               // we'd be taking in closest hits first.
0741 
0742               // Hmmh -- there used to be some more checks here.
0743               // Or, at least, the phi binning was much smaller and no further checks were done.
0744               assert(false && "this code has not been used in a while -- see comments in code");
0745 
0746               if (m_XHitSize[itrack] < MPlexHitIdxMax) {
0747                 m_XHitArr.At(itrack, m_XHitSize[itrack]++, 0) = hi_orig;
0748               }
0749             }
0750           }  //hi
0751         }  //pi
0752       }  //qi
0753     }  //itrack
0754   }
0755 
0756   //==============================================================================
0757   // SelectHitIndicesV2
0758   //==============================================================================
0759 
0760   void MkFinder::selectHitIndicesV2(const LayerOfHits &layer_of_hits, const int N_proc) {
0761     // bool debug = true;
0762     using bidx_t = LayerOfHits::bin_index_t;
0763     using bcnt_t = LayerOfHits::bin_content_t;
0764     const LayerOfHits &L = layer_of_hits;
0765     const LayerInfo &LI = L.layer_info();
0766 
0767     const int iI = iP;
0768 
0769     dprintf("LayerOfHits::SelectHitIndicesV2 %s layer=%d N_proc=%d\n",
0770             L.is_barrel() ? "barrel" : "endcap",
0771             L.layer_id(),
0772             N_proc);
0773 
0774 #ifdef RNT_DUMP_MkF_SelHitIdcs
0775     rnt_shi.InnerIdcsReset(N_proc);
0776     Event::SimLabelFromHits sim_lbls[NN];
0777     for (int i = 0; i < N_proc; ++i) {
0778       sim_lbls[i] = m_event->simLabelForCurrentSeed(m_SeedOriginIdx[i]);
0779       if (m_FailFlag[i]) {
0780         rnt_shi.RegisterFailedProp(i, m_Par[1 - iI], m_Par[iI], m_event, m_SeedOriginIdx[i]);
0781       } else if (sim_lbls[i].is_set()) {
0782         /* CandInfo &ci = */ rnt_shi.RegisterGoodProp(i, m_Par[iI], m_event, m_SeedOriginIdx[i]);
0783       }  // else ... could do something about the bad seeds ... probably better to collect elsewhere.
0784     }
0785     // Get BinSearch result from V1. Note -- it can clear m_FailFlag for some cands!
0786     auto ff_stash = m_FailFlag;
0787     selectHitIndices(layer_of_hits, N_proc, true);
0788     m_FailFlag = ff_stash;
0789 #endif
0790 
0791     constexpr int NEW_MAX_HIT = 6;  // 4 - 6 give about the same # of tracks in quality-val
0792     constexpr float DDPHI_PRESEL_FAC = 2.0f;
0793     constexpr float DDQ_PRESEL_FAC = 1.2f;
0794     constexpr float PHI_BIN_EXTRA_FAC = 2.75f;
0795     constexpr float Q_BIN_EXTRA_FAC = 1.6f;
0796 
0797     namespace mp = mini_propagators;
0798     struct Bins {
0799       MPlexQUH q0, q1, q2, p1, p2;
0800       mp::InitialStatePlex isp;
0801       mp::StatePlex sp1, sp2;
0802       int n_proc;
0803 
0804       MPlexQF dphi_track, dq_track;  // 3 sigma track errors at initial state
0805 
0806       // debug & ntuple dump -- to be local in functions
0807       MPlexQF phi_c, dphi;
0808       MPlexQF q_c, qmin, qmax;
0809 
0810       Bins(const MPlexLV &par, const MPlexQI &chg, int np = NN) : isp(par, chg), n_proc(np) {}
0811 
0812       void prop_to_limits(const LayerInfo &li) {
0813         // Positions 1 and 2 should really be by "propagation order", 1 is the closest/
0814         // This should also work for backward propagation so not exactly trivial.
0815         // Also, do not really need propagation to center.
0816         if (li.is_barrel()) {
0817           isp.propagate_to_r(mp::PA_Exact, li.rin(), sp1, true, n_proc);
0818           isp.propagate_to_r(mp::PA_Exact, li.rout(), sp2, true, n_proc);
0819         } else {
0820           isp.propagate_to_z(mp::PA_Exact, li.zmin(), sp1, true, n_proc);
0821           isp.propagate_to_z(mp::PA_Exact, li.zmax(), sp2, true, n_proc);
0822         }
0823       }
0824 
0825       void find_bin_ranges(const LayerInfo &li, const LayerOfHits &loh, const MPlexLS &err) {
0826         // Below made members for debugging
0827         // MPlexQF phi_c, dphi_min, dphi_max;
0828         // phi_c = mp::fast_atan2(isp.y, isp.x);  // calculated below as difference
0829 
0830         // Matriplex::min_max(sp1.dphi, sp2.dphi, dphi_min, dphi_max);
0831         // the above is wrong: dalpha is not dphi --> renamed variable in State
0832         MPlexQF xp1, xp2, pmin, pmax;
0833         xp1 = mp::fast_atan2(sp1.y, sp1.x);
0834         xp2 = mp::fast_atan2(sp2.y, sp2.x);
0835         Matriplex::min_max(xp1, xp2, pmin, pmax);
0836         // Matriplex::min_max(mp::fast_atan2(sp1.y, sp1.x), smp::fast_atan2(sp2.y, sp2.x), pmin, pmax);
0837         MPlexQF dp = pmax - pmin;
0838         phi_c = 0.5f * (pmax + pmin);
0839         for (int ii = 0; ii < NN; ++ii) {
0840           if (ii < n_proc) {
0841             if (dp[ii] > Const::PI) {
0842               std::swap(pmax[ii], pmin[ii]);
0843               dp[ii] = Const::TwoPI - dp[ii];
0844               phi_c[ii] = Const::PI - phi_c[ii];
0845             }
0846             dphi[ii] = 0.5f * dp[ii];
0847             // printf("phic: %f  p1: %f  p2: %f   pmin: %f  pmax: %f   dphi: %f\n",
0848             //       phi_c[ii], xp1[ii], xp2[ii], pmin[ii], pmax[ii], dphi[ii]);
0849           }
0850         }
0851 
0852         const auto calc_err_xy = [&](const MPlexQF &x, const MPlexQF &y) {
0853           return x * x * err.ReduceFixedIJ(0, 0) + y * y * err.ReduceFixedIJ(1, 1) +
0854                  2.0f * x * y * err.ReduceFixedIJ(0, 1);
0855         };
0856 
0857         // Calculate dphi_track, dq_track differs for barrel/endcap
0858         MPlexQF r2_c = isp.x * isp.x + isp.y * isp.y;
0859         MPlexQF r2inv_c = 1.0f / r2_c;
0860         MPlexQF dphidx_c = -isp.y * r2inv_c;
0861         MPlexQF dphidy_c = isp.x * r2inv_c;
0862         dphi_track = 3.0f * calc_err_xy(dphidx_c, dphidy_c).abs().sqrt();
0863 
0864         // MPlexQF qmin, qmax;
0865         if (li.is_barrel()) {
0866           Matriplex::min_max(sp1.z, sp2.z, qmin, qmax);
0867           q_c = isp.z;
0868           dq_track = 3.0f * err.ReduceFixedIJ(2, 2).abs().sqrt();
0869         } else {
0870           Matriplex::min_max(Matriplex::hypot(sp1.x, sp1.y), Matriplex::hypot(sp2.x, sp2.y), qmin, qmax);
0871           q_c = Matriplex::sqrt(r2_c);
0872           dq_track = 3.0f * (r2inv_c * calc_err_xy(isp.x, isp.y).abs()).sqrt();
0873         }
0874 
0875         for (int i = 0; i < p1.kTotSize; ++i) {
0876           // Clamp crazy sizes. This actually only happens when prop-fail flag is set.
0877           // const float dphi_clamp = 0.1;
0878           // if (dphi_min[i] > 0.0f || dphi_min[i] < -dphi_clamp) dphi_min[i] = -dphi_clamp;
0879           // if (dphi_max[i] < 0.0f || dphi_max[i] > dphi_clampf) dphi_max[i] = dphi_clamp;
0880           p1[i] = loh.phiBinChecked(pmin[i] - dphi_track[i] - PHI_BIN_EXTRA_FAC * 0.0123f);
0881           p2[i] = loh.phiBinChecked(pmax[i] + dphi_track[i] + PHI_BIN_EXTRA_FAC * 0.0123f);
0882 
0883           q0[i] = loh.qBinChecked(q_c[i]);
0884           q1[i] = loh.qBinChecked(qmin[i] - dq_track[i] - Q_BIN_EXTRA_FAC * 0.5f * li.q_bin());
0885           q2[i] = loh.qBinChecked(qmax[i] + dq_track[i] + Q_BIN_EXTRA_FAC * 0.5f * li.q_bin()) + 1;
0886         }
0887       }
0888     };
0889 
0890     Bins B(m_Par[iI], m_Chg, N_proc);
0891     B.prop_to_limits(LI);
0892     B.find_bin_ranges(LI, L, m_Err[iI]);
0893 
0894     for (int i = 0; i < NN; ++i) {
0895       if (i < N_proc) {
0896         m_XHitSize[i] = 0;
0897         // Notify failure. Ideally should be detected before selectHitIndices().
0898         if (m_FailFlag[i]) {
0899           m_XWsrResult[i].m_wsr = WSR_Failed;
0900         } else {
0901           if (LI.is_barrel()) {
0902             m_XWsrResult[i] = L.is_within_z_sensitive_region(B.q_c[i], 0.5f * (B.q2[i] - B.q1[i]));
0903           } else {
0904             m_XWsrResult[i] = L.is_within_r_sensitive_region(B.q_c[i], 0.5f * (B.q2[i] - B.q1[i]));
0905           }
0906         }
0907       }
0908     }
0909 
0910     // for (int i = 0; i < N_proc; ++i) {
0911     //   printf("BinCheck %c %+8.6f %+8.6f | %3d %3d - %3d %3d ||  | %2d %2d - %2d %2d\n", LI.is_barrel() ? 'B' : 'E',
0912     //          B.phi[i], B.dphi[i], B.p1[i], B.p2[i], pb1v[i], pb2v[i],
0913     //          B.q[i], B.dq[i], B.q1[i], B.q2[i], qb1v[i], qb2v[i]);
0914     // }
0915 
0916 #ifdef RNT_DUMP_MkF_SelHitIdcs
0917     for (auto i : rnt_shi.f_h_idcs) {
0918       CandInfo &ci = (*rnt_shi.ci)[rnt_shi.f_h_remap[i]];
0919       ci.bsn = BinSearch({B.phi_c[i],
0920                           B.dphi[i],
0921                           B.q_c[i],
0922                           0.5f * (B.q2[i] - B.q1[i]),
0923                           B.p1[i],
0924                           B.p2[i],
0925                           B.q1[i],
0926                           B.q2[i],
0927                           m_XWsrResult[i].m_wsr,
0928                           m_XWsrResult[i].m_in_gap,
0929                           false});
0930       ci.ps_min = statep2propstate(B.sp1, i);
0931       ci.ps_max = statep2propstate(B.sp2, i);
0932     }
0933 #endif
0934 
0935     struct PQE {
0936       float score;
0937       unsigned int hit_index;
0938     };
0939     auto pqe_cmp = [](const PQE &a, const PQE &b) { return a.score < b.score; };
0940     std::priority_queue<PQE, std::vector<PQE>, decltype(pqe_cmp)> pqueue(pqe_cmp);
0941     int pqueue_size = 0;
0942 
0943     // Vectorizing this makes it run slower!
0944     //#pragma omp simd
0945     for (int itrack = 0; itrack < NN; ++itrack) {
0946       if (itrack >= N_proc) {
0947         continue;
0948       }
0949 
0950       if (m_FailFlag[itrack]) {
0951         m_XWsrResult[itrack].m_wsr = WSR_Failed;
0952         continue;
0953       }
0954 
0955       if (m_XWsrResult[itrack].m_wsr == WSR_Outside) {
0956         continue;
0957       }
0958 
0959       // New binning -- known to be too restrictive, so scaled up. Probably esp. in stereo layers.
0960       // Also, could take track covariance dphi / dq extras + known tilt stuff.
0961       const bidx_t qb = B.q0[itrack];
0962       const bidx_t qb1 = B.q1[itrack];
0963       const bidx_t qb2 = B.q2[itrack];
0964       const bidx_t pb1 = B.p1[itrack];
0965       const bidx_t pb2 = B.p2[itrack];
0966 
0967       // clang-format off
0968       dprintf("  %2d/%2d: %6.3f %6.3f %6.6f %7.5f %3u %3u %4u %4u\n",
0969               L.layer_id(), itrack, B.q_c[itrack], B.phi_c[itrack],
0970               B.qmax[itrack] - B.qmin[itrack], B.dphi[itrack],
0971               qb1, qb2, pb1, pb2);
0972 #ifdef RNT_DUMP_MkF_SelHitIdcs
0973       int hit_out_idx = 0;
0974       // phi-sorted position of matched hits
0975       struct pos_match { float dphi, dq; int idx; bool matched; };
0976       std::vector<pos_match> pos_match_vec;
0977 #endif
0978       // clang-format on
0979 
0980       mp::InitialState mp_is(m_Par[iI], m_Chg, itrack);
0981       mp::State mp_s;
0982 
0983       for (bidx_t qi = qb1; qi != qb2; ++qi) {
0984         for (bidx_t pi = pb1; pi != pb2; pi = L.phiMaskApply(pi + 1)) {
0985           // Limit to central Q-bin
0986           if (qi == qb && L.isBinDead(pi, qi) == true) {
0987             dprint("dead module for track in layer=" << L.layer_id() << " qb=" << qi << " pi=" << pi
0988                                                      << " q=" << B.q_c[itrack] << " phi=" << B.phi_c[itrack]);
0989             m_XWsrResult[itrack].m_in_gap = true;
0990           }
0991 
0992           // It might make sense to make first loop to extract bin indices
0993           // and issue prefetches at the same time.
0994           // Then enter vectorized loop to actually collect the hits in proper order.
0995 
0996           //SK: ~20x1024 bin sizes give mostly 1 hit per bin. Commented out for 128 bins or less
0997           // #pragma nounroll
0998           auto pbi = L.phiQBinContent(pi, qi);
0999           for (bcnt_t hi = pbi.begin(); hi < pbi.end(); ++hi) {
1000             // MT: Access into m_hit_zs and m_hit_phis is 1% run-time each.
1001 
1002             const unsigned int hi_orig = L.getOriginalHitIndex(hi);
1003 
1004             if (m_iteration_hit_mask && (*m_iteration_hit_mask)[hi_orig]) {
1005               dprintf(
1006                   "Yay, denying masked hit on layer %u, hi %u, orig idx %u\n", L.layer_info().layer_id(), hi, hi_orig);
1007               continue;
1008             }
1009 
1010             float new_q, new_phi, new_ddphi, new_ddq;
1011             bool prop_fail;
1012 
1013             if (L.is_barrel()) {
1014               const Hit &hit = L.refHit(hi_orig);
1015               unsigned int mid = hit.detIDinLayer();
1016               const ModuleInfo &mi = LI.module_info(mid);
1017 
1018               // Original condition, for phase2
1019               // if (L.layer_id() >= 4 && L.layer_id() <= 9 && std::abs(mp_is.z) > 10.f) {
1020 
1021               // This could work well instead of prop-to-r, too. Limit to 0.05 rad, 2.85 deg.
1022               if (std::abs(mi.zdir(2)) > 0.05f) {
1023                 prop_fail = mp_is.propagate_to_plane(mp::PA_Line, mi, mp_s, true);
1024                 new_q = mp_s.z;
1025                 /*
1026                 // This for calculating ddq on the dector plane, along the "strip" direction.
1027                 // NOTE -- should take full covariance and project it onto ydir.
1028                 SVector3 ydir = mi.calc_ydir();
1029                 new_ddq = (mp_s.x - mi.pos(0)) * ydir(0) +
1030                           (mp_s.y - mi.pos(1)) * ydir(1) +
1031                           (mp_s.z - mi.pos(2)) * ydir(2);
1032                 new_ddq = std::abs(new_ddq);
1033                 */
1034                 new_ddq = std::abs(new_q - L.hit_q(hi));
1035                 // dq from z direction is actually projected, so just take plain dz.
1036 
1037               } else {
1038                 prop_fail = mp_is.propagate_to_r(mp::PA_Exact, L.hit_qbar(hi), mp_s, true);
1039                 new_q = mp_s.z;
1040                 new_ddq = std::abs(new_q - L.hit_q(hi));
1041               }
1042             } else {
1043               prop_fail = mp_is.propagate_to_z(mp::PA_Exact, L.hit_qbar(hi), mp_s, true);
1044               new_q = hipo(mp_s.x, mp_s.y);
1045               new_ddq = std::abs(new_q - L.hit_q(hi));
1046             }
1047 
1048             new_phi = vdt::fast_atan2f(mp_s.y, mp_s.x);
1049             new_ddphi = cdist(std::abs(new_phi - L.hit_phi(hi)));
1050             bool dqdphi_presel = new_ddq < B.dq_track[itrack] + DDQ_PRESEL_FAC * L.hit_q_half_length(hi) &&
1051                                  new_ddphi < B.dphi_track[itrack] + DDPHI_PRESEL_FAC * 0.0123f;
1052 
1053             // clang-format off
1054             dprintf("     SHI %3u %4u %5u  %6.3f %6.3f %6.4f %7.5f  PROP-%s  %s\n",
1055                     qi, pi, hi, L.hit_q(hi), L.hit_phi(hi),
1056                     new_ddq, new_ddphi, prop_fail ? "FAIL" : "OK", dqdphi_presel ? "PASS" : "REJECT");
1057 #ifdef RNT_DUMP_MkF_SelHitIdcs
1058             if (rnt_shi.f_h_remap[itrack] >= 0) {
1059               int sim_lbl = sim_lbls[itrack].label;
1060               const Hit &thishit = L.refHit(hi_orig);
1061               m_msErr.copyIn(itrack, thishit.errArray());
1062               m_msPar.copyIn(itrack, thishit.posArray());
1063 
1064               MPlexQF thisOutChi2;
1065               MPlexQI propFail(0);
1066               MPlexLV tmpPropPar;
1067               const FindingFoos &fnd_foos = FindingFoos::get_finding_foos(L.is_barrel());
1068               (*fnd_foos.m_compute_chi2_foo)(
1069                 m_Err[iI], m_Par[iI], m_Chg, m_msErr, m_msPar,
1070                 thisOutChi2, tmpPropPar, propFail, N_proc,
1071                 m_prop_config->finding_intra_layer_pflags,
1072                 m_prop_config->finding_requires_propagation_to_hit_pos);
1073               float hchi2 = thisOutChi2[itrack];
1074 
1075               CandInfo &ci = (*rnt_shi.ci)[rnt_shi.f_h_remap[itrack]];
1076 
1077               const MCHitInfo &mchinfo = m_event->simHitsInfo_[L.refHit(hi_orig).mcHitID()];
1078               int hit_lbl = mchinfo.mcTrackID();
1079               ci.hmi.emplace_back(HitMatchInfo{ HitInfo
1080                 { hit2pos(thishit),
1081                   new_q, L.hit_q_half_length(hi), L.hit_qbar(hi), new_phi,
1082                   hit_lbl },
1083                 state2pos(mp_s), state2mom(mp_s),
1084                 new_ddq, new_ddphi, hchi2, (int) hi_orig,
1085                 (sim_lbl == hit_lbl), dqdphi_presel, !prop_fail,
1086                 false, IdxChi2List()
1087               });
1088               ci.hmi.back().ic2list.reset(); // zero initialize
1089 
1090               bool new_dec = dqdphi_presel && !prop_fail;
1091               ++ci.n_all_hits;
1092               if (sim_lbl == hit_lbl) {
1093                 ++ci.n_hits_match;
1094                 if (new_dec) ++ci.n_hits_pass_match;
1095               }
1096               if (new_dec) ++ci.n_hits_pass;
1097               if (new_dec)
1098                 pos_match_vec.emplace_back(pos_match{ new_ddphi, new_ddq, hit_out_idx++,
1099                                              sim_lbl == hit_lbl });
1100             } // if cand is saved
1101 #endif
1102             // clang-format on
1103 
1104             if (prop_fail || !dqdphi_presel)
1105               continue;
1106             if (pqueue_size < NEW_MAX_HIT) {
1107               pqueue.push({new_ddphi, hi_orig});
1108               ++pqueue_size;
1109             } else if (new_ddphi < pqueue.top().score) {
1110               pqueue.pop();
1111               pqueue.push({new_ddphi, hi_orig});
1112             }
1113           }  //hi
1114         }  //pi
1115       }  //qi
1116 
1117       dprintf(" PQUEUE (%d)", pqueue_size);
1118 #ifdef RNT_DUMP_MkF_SelHitIdcs
1119       // clang-format off
1120       if (sim_lbls[itrack].is_set()) {
1121         // Find ord number of matched hits.
1122         std::sort(pos_match_vec.begin(), pos_match_vec.end(),
1123                   [](auto &a, auto &b){return a.dphi < b.dphi;});
1124         int pmvs = pos_match_vec.size();
1125 
1126         CandInfo &ci = (*rnt_shi.ci)[rnt_shi.f_h_remap[itrack]];
1127         for (int i = 0; i < pmvs; ++i) {
1128           if (pos_match_vec[i].matched) {
1129             ci.ord_first_match = i;
1130             ci.dphi_first_match = pos_match_vec[i].dphi;
1131             ci.dq_first_match = pos_match_vec[i].dq;
1132             break;
1133           }
1134         }
1135       }
1136       // clang-format off
1137 #endif
1138       // Reverse hits so best dphis/scores come first in the hit-index list.
1139       m_XHitSize[itrack] = pqueue_size;
1140       while (pqueue_size) {
1141         --pqueue_size;
1142         m_XHitArr.At(itrack, pqueue_size, 0) = pqueue.top().hit_index;
1143         dprintf("   %d: %f %d", pqueue_size, pqueue.top().score, pqueue.top().hit_index);
1144         pqueue.pop();
1145       }
1146       dprintf("\n");
1147 
1148     }  //itrack
1149   }
1150 
1151   //==============================================================================
1152   // AddBestHit - Best Hit Track Finding
1153   //==============================================================================
1154 
1155   void MkFinder::addBestHit(const LayerOfHits &layer_of_hits, const int N_proc, const FindingFoos &fnd_foos) {
1156     // debug = true;
1157 
1158     MatriplexHitPacker mhp(layer_of_hits.hitArray());
1159 
1160     float minChi2[NN];
1161     int bestHit[NN];
1162     int maxSize = 0;
1163 
1164     // Determine maximum number of hits for tracks in the collection.
1165     for (int it = 0; it < NN; ++it) {
1166       if (it < N_proc) {
1167         if (m_XHitSize[it] > 0) {
1168           maxSize = std::max(maxSize, m_XHitSize[it]);
1169         }
1170       }
1171 
1172       bestHit[it] = -1;
1173       minChi2[it] = getHitSelDynamicChi2Cut(it, iP);
1174     }
1175 
1176     for (int hit_cnt = 0; hit_cnt < maxSize; ++hit_cnt) {
1177       //fixme what if size is zero???
1178 
1179       mhp.reset();
1180 
1181       //#pragma omp simd doesn't vectorize with current compilers
1182       for (int itrack = 0; itrack < NN; ++itrack) {
1183         if (itrack < N_proc && hit_cnt < m_XHitSize[itrack]) {
1184           mhp.addInputAt(itrack, layer_of_hits.refHit(m_XHitArr.At(itrack, hit_cnt, 0)));
1185         }
1186       }
1187 
1188       mhp.pack(m_msErr, m_msPar);
1189 
1190       //now compute the chi2 of track state vs hit
1191       MPlexQF outChi2;
1192       MPlexLV tmpPropPar;
1193       clearFailFlag();
1194       (*fnd_foos.m_compute_chi2_foo)(m_Err[iP],
1195                                      m_Par[iP],
1196                                      m_Chg,
1197                                      m_msErr,
1198                                      m_msPar,
1199                                      outChi2,
1200                                      tmpPropPar,
1201                                      m_FailFlag,
1202                                      N_proc,
1203                                      m_prop_config->finding_intra_layer_pflags,
1204                                      m_prop_config->finding_requires_propagation_to_hit_pos);
1205 
1206       //update best hit in case chi2<minChi2
1207 #pragma omp simd
1208       for (int itrack = 0; itrack < NN; ++itrack) {
1209         if (itrack < N_proc && hit_cnt < m_XHitSize[itrack]) {
1210           const float chi2 = std::abs(outChi2[itrack]);  //fixme negative chi2 sometimes...
1211           dprint("chi2=" << chi2 << " minChi2[itrack]=" << minChi2[itrack]);
1212           if (chi2 < minChi2[itrack]) {
1213             minChi2[itrack] = chi2;
1214             bestHit[itrack] = m_XHitArr.At(itrack, hit_cnt, 0);
1215           }
1216         }
1217       }
1218     }  // end loop over hits
1219 
1220     //#pragma omp simd
1221     for (int itrack = 0; itrack < NN; ++itrack) {
1222       if (itrack >= N_proc) {
1223         continue;
1224       }
1225 
1226       if (m_XWsrResult[itrack].m_wsr == WSR_Outside) {
1227         // Why am I doing this?
1228         m_msErr.setDiagonal3x3(itrack, 666);
1229         m_msPar(itrack, 0, 0) = m_Par[iP](itrack, 0, 0);
1230         m_msPar(itrack, 1, 0) = m_Par[iP](itrack, 1, 0);
1231         m_msPar(itrack, 2, 0) = m_Par[iP](itrack, 2, 0);
1232 
1233         // XXXX If not in gap, should get back the old track params. But they are gone ...
1234         // Would actually have to do it right after SelectHitIndices where updated params are still ok.
1235         // Here they got screwed during hit matching.
1236         // So, I'd store them there (into propagated params) and retrieve them here.
1237         // Or we decide not to care ...
1238 
1239         continue;
1240       }
1241 
1242       //fixme decide what to do in case no hit found
1243       if (bestHit[itrack] >= 0) {
1244         const Hit &hit = layer_of_hits.refHit(bestHit[itrack]);
1245         const float chi2 = minChi2[itrack];
1246 
1247         dprint("ADD BEST HIT FOR TRACK #"
1248                << itrack << std::endl
1249                << "prop x=" << m_Par[iP].constAt(itrack, 0, 0) << " y=" << m_Par[iP].constAt(itrack, 1, 0) << std::endl
1250                << "copy in hit #" << bestHit[itrack] << " x=" << hit.position()[0] << " y=" << hit.position()[1]);
1251 
1252         m_msErr.copyIn(itrack, hit.errArray());
1253         m_msPar.copyIn(itrack, hit.posArray());
1254         m_Chi2(itrack, 0, 0) += chi2;
1255 
1256         add_hit(itrack, bestHit[itrack], layer_of_hits.layer_id());
1257       } else {
1258         int fake_hit_idx = Hit::kHitMissIdx;
1259 
1260         if (m_XWsrResult[itrack].m_wsr == WSR_Edge) {
1261           // YYYYYY Config::store_missed_layers
1262           fake_hit_idx = Hit::kHitEdgeIdx;
1263         } else if (num_all_minus_one_hits(itrack)) {
1264           fake_hit_idx = Hit::kHitStopIdx;
1265         }
1266 
1267         dprint("ADD FAKE HIT FOR TRACK #" << itrack << " withinBounds=" << (fake_hit_idx != Hit::kHitEdgeIdx)
1268                                           << " r=" << hipo(m_Par[iP](itrack, 0, 0), m_Par[iP](itrack, 1, 0)));
1269 
1270         m_msErr.setDiagonal3x3(itrack, 666);
1271         m_msPar(itrack, 0, 0) = m_Par[iP](itrack, 0, 0);
1272         m_msPar(itrack, 1, 0) = m_Par[iP](itrack, 1, 0);
1273         m_msPar(itrack, 2, 0) = m_Par[iP](itrack, 2, 0);
1274         // Don't update chi2
1275 
1276         add_hit(itrack, fake_hit_idx, layer_of_hits.layer_id());
1277       }
1278     }
1279 
1280     // Update the track parameters with this hit. (Note that some calculations
1281     // are already done when computing chi2. Not sure it's worth caching them?)
1282 
1283     dprint("update parameters");
1284     clearFailFlag();
1285     (*fnd_foos.m_update_param_foo)(m_Err[iP],
1286                                    m_Par[iP],
1287                                    m_Chg,
1288                                    m_msErr,
1289                                    m_msPar,
1290                                    m_Err[iC],
1291                                    m_Par[iC],
1292                                    m_FailFlag,
1293                                    N_proc,
1294                                    m_prop_config->finding_intra_layer_pflags,
1295                                    m_prop_config->finding_requires_propagation_to_hit_pos);
1296 
1297     dprint("m_Par[iP](0,0,0)=" << m_Par[iP](0, 0, 0) << " m_Par[iC](0,0,0)=" << m_Par[iC](0, 0, 0));
1298   }
1299 
1300   //=======================================================
1301   // isStripQCompatible : check if prop is consistent with the barrel/endcap strip length
1302   //=======================================================
1303   bool isStripQCompatible(
1304       int itrack, bool isBarrel, const MPlexLS &pErr, const MPlexLV &pPar, const MPlexHS &msErr, const MPlexHV &msPar) {
1305     //check module compatibility via long strip side = L/sqrt(12)
1306     if (isBarrel) {  //check z direction only
1307       const float res = std::abs(msPar.constAt(itrack, 2, 0) - pPar.constAt(itrack, 2, 0));
1308       const float hitHL = sqrt(msErr.constAt(itrack, 2, 2) * 3.f);  //half-length
1309       const float qErr = sqrt(pErr.constAt(itrack, 2, 2));
1310       dprint("qCompat " << hitHL << " + " << 3.f * qErr << " vs " << res);
1311       return hitHL + std::max(3.f * qErr, 0.5f) > res;
1312     } else {  //project on xy, assuming the strip Length >> Width
1313       const float res[2]{msPar.constAt(itrack, 0, 0) - pPar.constAt(itrack, 0, 0),
1314                          msPar.constAt(itrack, 1, 0) - pPar.constAt(itrack, 1, 0)};
1315       const float hitT2 = msErr.constAt(itrack, 0, 0) + msErr.constAt(itrack, 1, 1);
1316       const float hitT2inv = 1.f / hitT2;
1317       const float proj[3] = {msErr.constAt(itrack, 0, 0) * hitT2inv,
1318                              msErr.constAt(itrack, 0, 1) * hitT2inv,
1319                              msErr.constAt(itrack, 1, 1) * hitT2inv};
1320       const float qErr =
1321           sqrt(std::abs(pErr.constAt(itrack, 0, 0) * proj[0] + 2.f * pErr.constAt(itrack, 0, 1) * proj[1] +
1322                         pErr.constAt(itrack, 1, 1) * proj[2]));  //take abs to avoid non-pos-def cases
1323       const float resProj =
1324           sqrt(res[0] * proj[0] * res[0] + 2.f * res[1] * proj[1] * res[0] + res[1] * proj[2] * res[1]);
1325       dprint("qCompat " << sqrt(hitT2 * 3.f) << " + " << 3.f * qErr << " vs " << resProj);
1326       return sqrt(hitT2 * 3.f) + std::max(3.f * qErr, 0.5f) > resProj;
1327     }
1328   }
1329 
1330   //=======================================================
1331   // passStripChargePCMfromTrack : apply the slope correction to charge per cm and cut using hit err matrix
1332   //         the raw pcm = charge/L_normal
1333   //         the corrected qCorr = charge/L_path = charge/(L_normal*p/p_zLocal) = pcm*p_zLocal/p
1334   //=======================================================
1335   bool passStripChargePCMfromTrack(
1336       int itrack, bool isBarrel, unsigned int pcm, unsigned int pcmMin, const MPlexLV &pPar, const MPlexHS &msErr) {
1337     //skip the overflow case
1338     if (pcm >= Hit::maxChargePerCM())
1339       return true;
1340 
1341     float qSF;
1342     if (isBarrel) {  //project in x,y, assuming zero-error direction is in this plane
1343       const float hitT2 = msErr.constAt(itrack, 0, 0) + msErr.constAt(itrack, 1, 1);
1344       const float hitT2inv = 1.f / hitT2;
1345       const float proj[3] = {msErr.constAt(itrack, 0, 0) * hitT2inv,
1346                              msErr.constAt(itrack, 0, 1) * hitT2inv,
1347                              msErr.constAt(itrack, 1, 1) * hitT2inv};
1348       const bool detXY_OK =
1349           std::abs(proj[0] * proj[2] - proj[1] * proj[1]) < 0.1f;  //check that zero-direction is close
1350       float sinP;
1351       float cosP;
1352       vdt::fast_sincosf(pPar.constAt(itrack, 4, 0), sinP, cosP);
1353       const float sinT = std::abs(vdt::fast_sinf(pPar.constAt(itrack, 5, 0)));
1354       //qSF = sqrt[(px,py)*(1-proj)*(px,py)]/p = sinT*sqrt[(cosP,sinP)*(1-proj)*(cosP,sinP)].
1355       qSF = detXY_OK ? sinT * std::sqrt(std::abs(1.f + cosP * cosP * proj[0] + sinP * sinP * proj[2] -
1356                                                  2.f * cosP * sinP * proj[1]))
1357                      : 1.f;
1358     } else {  //project on z
1359       // p_zLocal/p = p_z/p = cosT
1360       qSF = std::abs(vdt::fast_cosf(pPar.constAt(itrack, 5, 0)));
1361     }
1362 
1363     const float qCorr = pcm * qSF;
1364     dprint("pcm " << pcm << " * " << qSF << " = " << qCorr << " vs " << pcmMin);
1365     return qCorr > pcmMin;
1366   }
1367 
1368   //==============================================================================
1369   // FindCandidates - Standard Track Finding
1370   //==============================================================================
1371 
1372   void MkFinder::findCandidates(const LayerOfHits &layer_of_hits,
1373                                 std::vector<std::vector<TrackCand>> &tmp_candidates,
1374                                 const int offset,
1375                                 const int N_proc,
1376                                 const FindingFoos &fnd_foos) {
1377     // bool debug = true;
1378 
1379     MatriplexHitPacker mhp(layer_of_hits.hitArray());
1380 
1381     int maxSize = 0;
1382 
1383     // Determine maximum number of hits for tracks in the collection.
1384     for (int it = 0; it < NN; ++it) {
1385       if (it < N_proc) {
1386         if (m_XHitSize[it] > 0) {
1387           maxSize = std::max(maxSize, m_XHitSize[it]);
1388         }
1389       }
1390     }
1391 
1392     dprintf("FindCandidates max hits to process=%d\n", maxSize);
1393 
1394     int nHitsAdded[NN]{};
1395     bool isTooLargeCluster[NN]{false};
1396 
1397     for (int hit_cnt = 0; hit_cnt < maxSize; ++hit_cnt) {
1398       mhp.reset();
1399 
1400       int charge_pcm[NN];
1401 
1402       //#pragma omp simd doesn't vectorize with current compilers
1403       for (int itrack = 0; itrack < NN; ++itrack) {
1404         if (itrack < N_proc && hit_cnt < m_XHitSize[itrack]) {
1405           const auto &hit = layer_of_hits.refHit(m_XHitArr.At(itrack, hit_cnt, 0));
1406           mhp.addInputAt(itrack, hit);
1407           charge_pcm[itrack] = hit.chargePerCM();
1408         }
1409       }
1410 
1411       mhp.pack(m_msErr, m_msPar);
1412 
1413       //now compute the chi2 of track state vs hit
1414       MPlexQF outChi2;
1415       MPlexLV propPar;
1416       clearFailFlag();
1417 
1418       if /*constexpr*/ (Config::usePropToPlane) {
1419         // Maybe could use 2 matriplex packers ... ModuleInfo has 3 * SVector3 and uint
1420     MPlexHV norm, dir, pnt;
1421     packModuleNormDirPnt(layer_of_hits, hit_cnt, norm, dir, pnt, N_proc);
1422         kalmanPropagateAndComputeChi2Plane(m_Err[iP],
1423                                            m_Par[iP],
1424                                            m_Chg,
1425                                            m_msErr,
1426                                            m_msPar,
1427                                                  norm,
1428                                            dir,
1429                                            pnt,
1430                                            outChi2,
1431                                            propPar,
1432                                            m_FailFlag,
1433                                            N_proc,
1434                                            m_prop_config->finding_intra_layer_pflags,
1435                          m_prop_config->finding_requires_propagation_to_hit_pos);
1436       } else {
1437         (*fnd_foos.m_compute_chi2_foo)(m_Err[iP],
1438                                        m_Par[iP],
1439                                        m_Chg,
1440                                        m_msErr,
1441                                        m_msPar,
1442                                        outChi2,
1443                                        propPar,
1444                                        m_FailFlag,
1445                                        N_proc,
1446                                        m_prop_config->finding_intra_layer_pflags,
1447                                        m_prop_config->finding_requires_propagation_to_hit_pos);
1448       }
1449 
1450       // Now update the track parameters with this hit (note that some
1451       // calculations are already done when computing chi2, to be optimized).
1452       // 1. This is not needed for candidates the hit is not added to, but it's
1453       // vectorized so doing it serially below should take the same time.
1454       // 2. Still it's a waste of time in case the hit is not added to any of the
1455       // candidates, so check beforehand that at least one cand needs update.
1456       bool oneCandPassCut = false;
1457       for (int itrack = 0; itrack < NN; ++itrack) {
1458         if (itrack >= N_proc) {
1459           continue;
1460         }
1461         float max_c2 = getHitSelDynamicChi2Cut(itrack, iP);
1462 
1463         if (hit_cnt < m_XHitSize[itrack]) {
1464           const float chi2 = std::abs(outChi2[itrack]);  //fixme negative chi2 sometimes...
1465           dprint("chi2=" << chi2);
1466           if (chi2 < max_c2) {
1467             bool isCompatible = true;
1468             if (!layer_of_hits.is_pixel()) {
1469               //check module compatibility via long strip side = L/sqrt(12)
1470               isCompatible =
1471                   isStripQCompatible(itrack, layer_of_hits.is_barrel(), m_Err[iP], propPar, m_msErr, m_msPar);
1472 
1473               //rescale strip charge to track parameters and reapply the cut
1474           if (isCompatible && layer_of_hits.layer_info().has_charge()) {
1475                 isCompatible = passStripChargePCMfromTrack(
1476                   itrack, layer_of_hits.is_barrel(), charge_pcm[itrack], Hit::minChargePerCM(), propPar, m_msErr);
1477           }
1478         }
1479             // Select only SiStrip hits with cluster size < maxClusterSize
1480             if (!layer_of_hits.is_pixel()) {
1481               if (layer_of_hits.refHit(m_XHitArr.At(itrack, hit_cnt, 0)).spanRows() >=
1482                   m_iteration_params->maxClusterSize) {
1483                 isTooLargeCluster[itrack] = true;
1484                 isCompatible = false;
1485               }
1486             }
1487 
1488             if (isCompatible) {
1489               oneCandPassCut = true;
1490               break;
1491             }
1492           }
1493         }
1494       }
1495 
1496       if (oneCandPassCut) {
1497         MPlexQI tmpChg = m_Chg;
1498         clearFailFlag();
1499         (*fnd_foos.m_update_param_foo)(m_Err[iP],
1500                                        m_Par[iP],
1501                                        tmpChg,
1502                                        m_msErr,
1503                                        m_msPar,
1504                                        m_Err[iC],
1505                                        m_Par[iC],
1506                                        m_FailFlag,
1507                                        N_proc,
1508                                        m_prop_config->finding_intra_layer_pflags,
1509                                        m_prop_config->finding_requires_propagation_to_hit_pos);
1510 
1511         dprint("update parameters" << std::endl
1512                                    << "propagated track parameters x=" << m_Par[iP].constAt(0, 0, 0)
1513                                    << " y=" << m_Par[iP].constAt(0, 1, 0) << std::endl
1514                                    << "               hit position x=" << m_msPar.constAt(0, 0, 0)
1515                                    << " y=" << m_msPar.constAt(0, 1, 0) << std::endl
1516                                    << "   updated track parameters x=" << m_Par[iC].constAt(0, 0, 0)
1517                                    << " y=" << m_Par[iC].constAt(0, 1, 0));
1518 
1519         //create candidate with hit in case chi2 < max_c2
1520         //fixme: please vectorize me... (not sure it's possible in this case)
1521         for (int itrack = 0; itrack < NN; ++itrack) {
1522           if (itrack < N_proc && hit_cnt < m_XHitSize[itrack]) {
1523             const float max_c2 = getHitSelDynamicChi2Cut(itrack, iP);
1524             const float chi2 = std::abs(outChi2[itrack]);  //fixme negative chi2 sometimes...
1525             dprint("chi2=" << chi2);
1526             if (chi2 < max_c2) {
1527               bool isCompatible = true;
1528               if (!layer_of_hits.is_pixel()) {
1529                 // select only SiStrip hits with cluster size < maxClusterSize
1530                 if (layer_of_hits.refHit(m_XHitArr.At(itrack, hit_cnt, 0)).spanRows() >=
1531                     m_iteration_params->maxClusterSize) {
1532                   // isTooLargeCluster[itrack] = true; -- only in CloneEngine
1533                   isCompatible = false;
1534                 }
1535                 // check module compatibility via long strip side = L/sqrt(12)
1536                 if (isCompatible)
1537                   isCompatible =
1538                     isStripQCompatible(itrack, layer_of_hits.is_barrel(), m_Err[iP], propPar, m_msErr, m_msPar);
1539                 // rescale strip charge to track parameters and reapply the cut
1540                 if (isCompatible && layer_of_hits.layer_info().has_charge()) {
1541                   isCompatible = passStripChargePCMfromTrack(
1542                     itrack, layer_of_hits.is_barrel(), charge_pcm[itrack], Hit::minChargePerCM(), propPar, m_msErr);
1543                 }
1544               }
1545 
1546               if (isCompatible) {
1547                 bool hitExists = false;
1548                 int maxHits = m_NFoundHits(itrack, 0, 0);
1549                 if (layer_of_hits.is_pixel()) {
1550                   for (int i = 0; i <= maxHits; ++i) {
1551                     if (i > 2)
1552                       break;
1553                     if (m_HoTArrs[itrack][i].layer == layer_of_hits.layer_id()) {
1554                       hitExists = true;
1555                       break;
1556                     }
1557                   }
1558                 }
1559                 if (hitExists)
1560                   continue;
1561 
1562                 nHitsAdded[itrack]++;
1563                 dprint("chi2 cut passed, creating new candidate");
1564                 // Create a new candidate and fill the tmp_candidates output vector.
1565                 // QQQ only instantiate if it will pass, be better than N_best
1566 
1567                 const int hit_idx = m_XHitArr.At(itrack, hit_cnt, 0);
1568 
1569                 TrackCand newcand;
1570                 copy_out(newcand, itrack, iC);
1571                 newcand.setCharge(tmpChg(itrack, 0, 0));
1572                 newcand.addHitIdx(hit_idx, layer_of_hits.layer_id(), chi2);
1573                 newcand.setScore(getScoreCand(m_steering_params->m_track_scorer,
1574                                               newcand,
1575                                               true /*penalizeTailMissHits*/,
1576                                               true /*inFindCandidates*/));
1577                 newcand.setOriginIndex(m_CandIdx(itrack, 0, 0));
1578 
1579                 // To apply a fixed cut instead of dynamic cut for overlap: m_iteration_params->chi2CutOverlap
1580                 if (chi2 < max_c2) {
1581                   CombCandidate &ccand = *newcand.combCandidate();
1582                   ccand[m_CandIdx(itrack, 0, 0)].considerHitForOverlap(
1583                       hit_idx, layer_of_hits.refHit(hit_idx).detIDinLayer(), chi2);
1584                 }
1585 
1586                 dprint("updated track parameters x=" << newcand.parameters()[0] << " y=" << newcand.parameters()[1]
1587                                                      << " z=" << newcand.parameters()[2]
1588                                                      << " pt=" << 1. / newcand.parameters()[3]);
1589 
1590                 tmp_candidates[m_SeedIdx(itrack, 0, 0) - offset].emplace_back(newcand);
1591               }
1592             }
1593           }
1594         }
1595       }  //end if (oneCandPassCut)
1596 
1597     }  //end loop over hits
1598 
1599     //now add invalid hit
1600     //fixme: please vectorize me...
1601     for (int itrack = 0; itrack < NN; ++itrack) {
1602       // Cands that miss the layer are stashed away in MkBuilder(), before propagation,
1603       // and then merged back afterwards.
1604       if (itrack >= N_proc || m_XWsrResult[itrack].m_wsr == WSR_Outside) {
1605         continue;
1606       }
1607 
1608       int fake_hit_idx = ((num_all_minus_one_hits(itrack) < m_iteration_params->maxHolesPerCand) &&
1609                           (m_NTailMinusOneHits(itrack, 0, 0) < m_iteration_params->maxConsecHoles))
1610                              ? Hit::kHitMissIdx
1611                              : Hit::kHitStopIdx;
1612 
1613       if (m_XWsrResult[itrack].m_wsr == WSR_Edge) {
1614         // YYYYYY m_iteration_params->store_missed_layers
1615         fake_hit_idx = Hit::kHitEdgeIdx;
1616       }
1617       //now add fake hit for tracks that passsed through inactive modules
1618       else if (m_XWsrResult[itrack].m_in_gap == true && nHitsAdded[itrack] == 0) {
1619         fake_hit_idx = Hit::kHitInGapIdx;
1620       }
1621       //now add fake hit for cases where hit cluster size is larger than maxClusterSize
1622       else if (isTooLargeCluster[itrack] == true && nHitsAdded[itrack] == 0) {
1623         fake_hit_idx = Hit::kHitMaxClusterIdx;
1624       }
1625 
1626       dprint("ADD FAKE HIT FOR TRACK #" << itrack << " withinBounds=" << (fake_hit_idx != Hit::kHitEdgeIdx)
1627                                         << " r=" << hipo(m_Par[iP](itrack, 0, 0), m_Par[iP](itrack, 1, 0)));
1628 
1629       // QQQ as above, only create and add if score better
1630       TrackCand newcand;
1631       copy_out(newcand, itrack, iP);
1632       newcand.addHitIdx(fake_hit_idx, layer_of_hits.layer_id(), 0.);
1633       newcand.setScore(getScoreCand(
1634           m_steering_params->m_track_scorer, newcand, true /*penalizeTailMissHits*/, true /*inFindCandidates*/));
1635       // Only relevant when we actually add a hit
1636       // newcand.setOriginIndex(m_CandIdx(itrack, 0, 0));
1637       tmp_candidates[m_SeedIdx(itrack, 0, 0) - offset].emplace_back(newcand);
1638     }
1639   }
1640 
1641   //==============================================================================
1642   // FindCandidatesCloneEngine - Clone Engine Track Finding
1643   //==============================================================================
1644 
1645   void MkFinder::findCandidatesCloneEngine(const LayerOfHits &layer_of_hits,
1646                                            CandCloner &cloner,
1647                                            const int offset,
1648                                            const int N_proc,
1649                                            const FindingFoos &fnd_foos) {
1650     // bool debug = true;
1651 
1652     MatriplexHitPacker mhp(layer_of_hits.hitArray());
1653 
1654     int maxSize = 0;
1655 
1656     // Determine maximum number of hits for tracks in the collection.
1657 #pragma omp simd
1658     for (int it = 0; it < NN; ++it) {
1659       if (it < N_proc) {
1660         if (m_XHitSize[it] > 0) {
1661           maxSize = std::max(maxSize, m_XHitSize[it]);
1662         }
1663       }
1664     }
1665 
1666     dprintf("FindCandidatesCloneEngine max hits to process=%d\n", maxSize);
1667 
1668     int nHitsAdded[NN]{};
1669     bool isTooLargeCluster[NN]{false};
1670 
1671     for (int hit_cnt = 0; hit_cnt < maxSize; ++hit_cnt) {
1672       mhp.reset();
1673 
1674       int charge_pcm[NN];
1675 
1676       //#pragma omp simd doesn't vectorize with current compilers
1677       for (int itrack = 0; itrack < NN; ++itrack) {
1678         if (itrack < N_proc && hit_cnt < m_XHitSize[itrack]) {
1679           const auto &hit = layer_of_hits.refHit(m_XHitArr.At(itrack, hit_cnt, 0));
1680           mhp.addInputAt(itrack, hit);
1681           charge_pcm[itrack] = hit.chargePerCM();
1682         }
1683       }
1684 
1685       mhp.pack(m_msErr, m_msPar);
1686 
1687       //now compute the chi2 of track state vs hit
1688       MPlexQF outChi2;
1689       MPlexLV propPar;
1690       clearFailFlag();
1691 
1692       if /*constexpr*/ (Config::usePropToPlane) {
1693         // Maybe could use 2 matriplex packers ... ModuleInfo has 3 * SVector3 and uint
1694         MPlexHV norm, dir, pnt;
1695         packModuleNormDirPnt(layer_of_hits, hit_cnt, norm, dir, pnt, N_proc);
1696         kalmanPropagateAndComputeChi2Plane(m_Err[iC],
1697                                      m_Par[iC],
1698                                      m_Chg,
1699                                      m_msErr,
1700                                      m_msPar,
1701                                      norm, dir, pnt,
1702                                      outChi2,
1703                                      propPar,
1704                                      m_FailFlag,
1705                                      N_proc,
1706                                      m_prop_config->finding_intra_layer_pflags,
1707                                      m_prop_config->finding_requires_propagation_to_hit_pos);
1708       } else {
1709         (*fnd_foos.m_compute_chi2_foo)(m_Err[iP],
1710                                        m_Par[iP],
1711                                        m_Chg,
1712                                        m_msErr,
1713                                        m_msPar,
1714                                        outChi2,
1715                                        propPar,
1716                                        m_FailFlag,
1717                                        N_proc,
1718                                        m_prop_config->finding_intra_layer_pflags,
1719                                        m_prop_config->finding_requires_propagation_to_hit_pos);
1720       }
1721 
1722       //#pragma omp simd  // DOES NOT VECTORIZE AS IT IS NOW
1723       for (int itrack = 0; itrack < NN; ++itrack) {
1724         // We can be in failed state from the initial propagation before selectHitIndices
1725         // and there hit_count for track is set to -1 and WSR state to Failed, handled below.
1726         // Or we might have hit it here in propagate-to-hit.
1727         // PROP-FAIL-ENABLE FailFlag check to be enabled when propagation failure
1728         // detection is properly implemented in propagate-to-R/Z.
1729         if (/*!m_FailFlag[itrack] &&*/ itrack < N_proc && hit_cnt < m_XHitSize[itrack]) {
1730           // make sure the hit was in the compatiblity window for the candidate
1731           const float max_c2 = getHitSelDynamicChi2Cut(itrack, iP);
1732           const float chi2 = std::abs(outChi2[itrack]);  //fixme negative chi2 sometimes...
1733           // XXX-NUM-ERR assert(chi2 >= 0);
1734 
1735           dprintf("  chi2=%.3f (%.3f)  trkIdx=%d hitIdx=%d\n", chi2, max_c2, itrack,  m_XHitArr.At(itrack, hit_cnt, 0));
1736           if (chi2 < max_c2) {
1737             bool isCompatible = true;
1738             if (!layer_of_hits.is_pixel()) {
1739               // select only SiStrip hits with cluster size < maxClusterSize
1740               if (layer_of_hits.refHit(m_XHitArr.At(itrack, hit_cnt, 0)).spanRows() >=
1741                   m_iteration_params->maxClusterSize) {
1742                 isTooLargeCluster[itrack] = true;
1743                 isCompatible = false;
1744               }
1745               // check module compatibility via long strip side = L/sqrt(12)
1746               if (isCompatible)
1747                 isCompatible =
1748                   isStripQCompatible(itrack, layer_of_hits.is_barrel(), m_Err[iP], propPar, m_msErr, m_msPar);
1749               // rescale strip charge to track parameters and reapply the cut
1750               if (isCompatible && layer_of_hits.layer_info().has_charge()) {
1751                 isCompatible = passStripChargePCMfromTrack(
1752                   itrack, layer_of_hits.is_barrel(), charge_pcm[itrack], Hit::minChargePerCM(), propPar, m_msErr);
1753               }
1754             }
1755 
1756             if (isCompatible) {
1757               CombCandidate &ccand = cloner.combCandWithOriginalIndex(m_SeedIdx(itrack, 0, 0));
1758               bool hitExists = false;
1759               int maxHits = m_NFoundHits(itrack, 0, 0);
1760               if (layer_of_hits.is_pixel()) {
1761                 for (int i = 0; i <= maxHits; ++i) {
1762                   if (i > 2)
1763                     break;
1764                   if (ccand.hot(i).layer == layer_of_hits.layer_id()) {
1765                     hitExists = true;
1766                     break;
1767                   }
1768                 }
1769               }
1770               if (hitExists)
1771                 continue;
1772 
1773               nHitsAdded[itrack]++;
1774               const int hit_idx = m_XHitArr.At(itrack, hit_cnt, 0);
1775 
1776               // Register hit for overlap consideration, if chi2 cut is passed
1777               // To apply a fixed cut instead of dynamic cut for overlap: m_iteration_params->chi2CutOverlap
1778               if (chi2 < max_c2) {
1779                 ccand[m_CandIdx(itrack, 0, 0)].considerHitForOverlap(
1780                     hit_idx, layer_of_hits.refHit(hit_idx).detIDinLayer(), chi2);
1781               }
1782 
1783               IdxChi2List tmpList;
1784               tmpList.trkIdx = m_CandIdx(itrack, 0, 0);
1785               tmpList.hitIdx = hit_idx;
1786               tmpList.module = layer_of_hits.refHit(hit_idx).detIDinLayer();
1787               tmpList.nhits = m_NFoundHits(itrack, 0, 0) + 1;
1788               tmpList.ntailholes = 0;
1789               tmpList.noverlaps = m_NOverlapHits(itrack, 0, 0);
1790               tmpList.nholes = num_all_minus_one_hits(itrack);
1791               tmpList.pt = std::abs(1.0f / m_Par[iP].At(itrack, 3, 0));
1792               tmpList.chi2 = m_Chi2(itrack, 0, 0) + chi2;
1793               tmpList.chi2_hit = chi2;
1794               tmpList.score = getScoreStruct(m_steering_params->m_track_scorer, tmpList);
1795               cloner.add_cand(m_SeedIdx(itrack, 0, 0) - offset, tmpList);
1796 
1797               dprint("  adding hit with hit_cnt=" << hit_cnt << " for trkIdx=" << tmpList.trkIdx
1798                                                   << " orig Seed=" << m_Label(itrack, 0, 0));
1799 
1800 #ifdef RNT_DUMP_MkF_SelHitIdcs
1801               if (rnt_shi.f_h_remap[itrack] >= 0) {
1802                 CandInfo &ci = (*rnt_shi.ci)[rnt_shi.f_h_remap[itrack]];
1803                 ci.assignIdxChi2List(tmpList);
1804               }
1805 #endif
1806             }
1807           }
1808         }
1809       }
1810 
1811     }  //end loop over hits
1812 
1813     //now add invalid hit
1814     for (int itrack = 0; itrack < NN; ++itrack) {
1815       dprint("num_all_minus_one_hits(" << itrack << ")=" << num_all_minus_one_hits(itrack));
1816 
1817       // Cands that miss the layer are stashed away in MkBuilder(), before propagation,
1818       // and then merged back afterwards.
1819       if (itrack >= N_proc || m_XWsrResult[itrack].m_wsr == WSR_Outside) {
1820         continue;
1821       }
1822 
1823       // int fake_hit_idx = num_all_minus_one_hits(itrack) < m_iteration_params->maxHolesPerCand ? -1 : -2;
1824       int fake_hit_idx = ((num_all_minus_one_hits(itrack) < m_iteration_params->maxHolesPerCand) &&
1825                           (m_NTailMinusOneHits(itrack, 0, 0) < m_iteration_params->maxConsecHoles))
1826                              ? Hit::kHitMissIdx
1827                              : Hit::kHitStopIdx;
1828 
1829       if (m_XWsrResult[itrack].m_wsr == WSR_Edge) {
1830         fake_hit_idx = Hit::kHitEdgeIdx;
1831       }
1832       //now add fake hit for tracks that passsed through inactive modules
1833       else if (m_XWsrResult[itrack].m_in_gap == true && nHitsAdded[itrack] == 0) {
1834         fake_hit_idx = Hit::kHitInGapIdx;
1835       }
1836       //now add fake hit for cases where hit cluster size is larger than maxClusterSize
1837       else if (isTooLargeCluster[itrack] == true && nHitsAdded[itrack] == 0) {
1838         fake_hit_idx = Hit::kHitMaxClusterIdx;
1839       }
1840 
1841       // PROP-FAIL-ENABLE The following to be enabled when propagation failure
1842       // detection is properly implemented in propagate-to-R/Z.
1843       // // Override for failed propagation, this trumps all other cases.
1844       // if (m_XWsrResult[itrack].m_wsr == WSR_Failed) {
1845       //   fake_hit_idx = Hit::kHitStopIdx;
1846       // }
1847 
1848       IdxChi2List tmpList;
1849       tmpList.trkIdx = m_CandIdx(itrack, 0, 0);
1850       tmpList.hitIdx = fake_hit_idx;
1851       tmpList.module = -1;
1852       tmpList.nhits = m_NFoundHits(itrack, 0, 0);
1853       tmpList.ntailholes = (fake_hit_idx == Hit::kHitMissIdx ? m_NTailMinusOneHits(itrack, 0, 0) + 1
1854                                                              : m_NTailMinusOneHits(itrack, 0, 0));
1855       tmpList.noverlaps = m_NOverlapHits(itrack, 0, 0);
1856       tmpList.nholes = num_inside_minus_one_hits(itrack);
1857       tmpList.pt = std::abs(1.0f / m_Par[iP].At(itrack, 3, 0));
1858       tmpList.chi2 = m_Chi2(itrack, 0, 0);
1859       tmpList.chi2_hit = 0;
1860       tmpList.score = getScoreStruct(m_steering_params->m_track_scorer, tmpList);
1861       cloner.add_cand(m_SeedIdx(itrack, 0, 0) - offset, tmpList);
1862       dprint("adding invalid hit " << fake_hit_idx);
1863     }
1864   }
1865 
1866   //==============================================================================
1867   // UpdateWithLoadedHit
1868   //==============================================================================
1869 
1870   void MkFinder::updateWithLoadedHit(int N_proc, const LayerOfHits &layer_of_hits, const FindingFoos &fnd_foos) {
1871     // See comment in MkBuilder::find_tracks_in_layer() about intra / inter flags used here
1872     // for propagation to the hit.
1873     clearFailFlag();
1874     if /*constexpr*/ (Config::usePropToPlane) {
1875       MPlexHV norm, dir, pnt;
1876       packModuleNormDirPnt(layer_of_hits, 0, norm, dir, pnt, N_proc);
1877       kalmanPropagateAndUpdatePlane(m_Err[iP],
1878                                    m_Par[iP],
1879                                    m_Chg,
1880                                    m_msErr,
1881                                    m_msPar,
1882                                    norm, dir, pnt,
1883                                    m_Err[iC],
1884                                    m_Par[iC],
1885                                    m_FailFlag,
1886                                    N_proc,
1887                                    m_prop_config->finding_inter_layer_pflags,
1888                                    m_prop_config->finding_requires_propagation_to_hit_pos);
1889     } else {
1890       (*fnd_foos.m_update_param_foo)(m_Err[iP],
1891                                      m_Par[iP],
1892                                      m_Chg,
1893                                      m_msErr,
1894                                      m_msPar,
1895                                      m_Err[iC],
1896                                      m_Par[iC],
1897                                      m_FailFlag,
1898                                      N_proc,
1899                                      m_prop_config->finding_inter_layer_pflags,
1900                                      m_prop_config->finding_requires_propagation_to_hit_pos);
1901     }
1902 
1903     // PROP-FAIL-ENABLE The following to be enabled when propagation failure
1904     // detection is properly implemented in propagate-to-R/Z.
1905     // for (int i = 0; i < N_proc; ++i) {
1906     //   if (m_FailFlag[i]) {
1907     //     dprintf("MkFinder::updateWithLoadedHit fail in update, recovering.\n");
1908     //     m_Err[iC].copySlot(i, m_Err[iP]);
1909     //     m_Par[iC].copySlot(i, m_Par[iP]);
1910     //   }
1911     // }
1912   }
1913 
1914   void MkFinder::chi2OfLoadedHit(int N_proc, const FindingFoos &fnd_foos) {
1915     // We expect input in iC slots from above function.
1916     // See comment in MkBuilder::find_tracks_in_layer() about intra / inter flags used here
1917     // for propagation to the hit.
1918     clearFailFlag();
1919     (*fnd_foos.m_compute_chi2_foo)(m_Err[iC],
1920                                    m_Par[iC],
1921                                    m_Chg,
1922                                    m_msErr,
1923                                    m_msPar,
1924                                    m_Chi2,
1925                                    m_Par[iP],
1926                                    m_FailFlag,
1927                                    N_proc,
1928                                    m_prop_config->finding_inter_layer_pflags,
1929                                    m_prop_config->finding_requires_propagation_to_hit_pos);
1930 
1931     // PROP-FAIL-ENABLE .... removed here
1932   }
1933 
1934   //==============================================================================
1935   // CopyOutParErr
1936   //==============================================================================
1937 
1938   void MkFinder::copyOutParErr(std::vector<CombCandidate> &seed_cand_vec, int N_proc, bool outputProp) const {
1939     const int iO = outputProp ? iP : iC;
1940 
1941     for (int i = 0; i < NN; ++i) {
1942       if (i < N_proc) {
1943         TrackCand &cand = seed_cand_vec[m_SeedIdx(i, 0, 0)][m_CandIdx(i, 0, 0)];
1944 
1945         // Set the track state to the updated parameters
1946         m_Err[iO].copyOut(i, cand.errors_nc().Array());
1947         m_Par[iO].copyOut(i, cand.parameters_nc().Array());
1948         cand.setCharge(m_Chg(i, 0, 0));
1949 
1950         dprint((outputProp ? "propagated" : "updated")
1951                << " track parameters x=" << cand.parameters()[0] << " y=" << cand.parameters()[1]
1952                << " z=" << cand.parameters()[2] << " pt=" << 1. / cand.parameters()[3] << " posEta=" << cand.posEta());
1953       }
1954     }
1955   }
1956 
1957   //==============================================================================
1958   // Backward Fit hack
1959   //==============================================================================
1960 
1961   void MkFinder::bkFitInputTracks(TrackVec &cands, int beg, int end) {
1962     // Uses HitOnTrack vector from Track directly + a local cursor array to current hit.
1963 
1964     MatriplexTrackPacker mtp(&cands[beg]);
1965 
1966     int itrack = 0;
1967 
1968     for (int i = beg; i < end; ++i, ++itrack) {
1969       const Track &trk = cands[i];
1970 
1971       m_Chg(itrack, 0, 0) = trk.charge();
1972       m_CurHit[itrack] = trk.nTotalHits() - 1;
1973       m_HoTArr[itrack] = trk.getHitsOnTrackArray();
1974 
1975       mtp.addInput(trk);
1976     }
1977 
1978     m_Chi2.setVal(0);
1979 
1980     mtp.pack(m_Err[iC], m_Par[iC]);
1981 
1982     m_Err[iC].scale(100.0f);
1983   }
1984 
1985   void MkFinder::bkFitInputTracks(EventOfCombCandidates &eocss, int beg, int end) {
1986     // Could as well use HotArrays from tracks directly + a local cursor array to last hit.
1987 
1988     // XXXX - shall we assume only TrackCand-zero is needed and that we can freely
1989     // bork the HoTNode array?
1990 
1991     MatriplexTrackPacker mtp(&eocss[beg][0]);
1992 
1993     int itrack = 0;
1994 
1995     for (int i = beg; i < end; ++i, ++itrack) {
1996       const TrackCand &trk = eocss[i][0];
1997 
1998       m_Chg(itrack, 0, 0) = trk.charge();
1999       m_CurNode[itrack] = trk.lastCcIndex();
2000       m_HoTNodeArr[itrack] = trk.combCandidate()->hotsData();
2001 
2002       // XXXX Need TrackCand* to update num-hits. Unless I collect info elsewhere
2003       // and fix it in BkFitOutputTracks.
2004       m_TrkCand[itrack] = &eocss[i][0];
2005 
2006       mtp.addInput(trk);
2007     }
2008 
2009     m_Chi2.setVal(0);
2010 
2011     mtp.pack(m_Err[iC], m_Par[iC]);
2012 
2013     m_Err[iC].scale(100.0f);
2014   }
2015 
2016   //------------------------------------------------------------------------------
2017 
2018   void MkFinder::bkFitOutputTracks(TrackVec &cands, int beg, int end, bool outputProp) {
2019     // Only copy out track params / errors / chi2, all the rest is ok.
2020 
2021     const int iO = outputProp ? iP : iC;
2022 
2023     int itrack = 0;
2024     for (int i = beg; i < end; ++i, ++itrack) {
2025       Track &trk = cands[i];
2026 
2027       m_Err[iO].copyOut(itrack, trk.errors_nc().Array());
2028       m_Par[iO].copyOut(itrack, trk.parameters_nc().Array());
2029 
2030       trk.setChi2(m_Chi2(itrack, 0, 0));
2031       if (isFinite(trk.chi2())) {
2032         trk.setScore(getScoreCand(m_steering_params->m_track_scorer, trk));
2033       }
2034     }
2035   }
2036 
2037   void MkFinder::bkFitOutputTracks(EventOfCombCandidates &eocss, int beg, int end, bool outputProp) {
2038     // Only copy out track params / errors / chi2, all the rest is ok.
2039 
2040     // XXXX - where will rejected hits get removed?
2041 
2042     const int iO = outputProp ? iP : iC;
2043 
2044     int itrack = 0;
2045     for (int i = beg; i < end; ++i, ++itrack) {
2046       TrackCand &trk = eocss[i][0];
2047 
2048       m_Err[iO].copyOut(itrack, trk.errors_nc().Array());
2049       m_Par[iO].copyOut(itrack, trk.parameters_nc().Array());
2050 
2051       trk.setChi2(m_Chi2(itrack, 0, 0));
2052       if (isFinite(trk.chi2())) {
2053         trk.setScore(getScoreCand(m_steering_params->m_track_scorer, trk));
2054       }
2055     }
2056   }
2057 
2058   //------------------------------------------------------------------------------
2059 
2060 #if defined(DEBUG_BACKWARD_FIT) || defined(DEBUG_BACKWARD_FIT_BH)
2061   namespace {
2062     float e2s(float x) { return 1e4 * std::sqrt(x); }
2063   }  // namespace
2064 #endif
2065 
2066   void MkFinder::bkFitFitTracksBH(const EventOfHits &eventofhits,
2067                                   const SteeringParams &st_par,
2068                                   const int N_proc,
2069                                   bool chiDebug) {
2070     // Prototyping final backward fit.
2071     // This works with track-finding indices, before remapping.
2072     //
2073     // Layers should be collected during track finding and list all layers that have actual hits.
2074     // Then we could avoid checking which layers actually do have hits.
2075 
2076     MPlexQF tmp_chi2;
2077     float tmp_err[6] = {666, 0, 666, 0, 0, 666};
2078     float tmp_pos[3];
2079 
2080     for (auto lp_iter = st_par.m_layer_plan.rbegin(); lp_iter != st_par.m_layer_plan.rend(); ++lp_iter) {
2081       const int layer = lp_iter->m_layer;
2082 
2083       const LayerOfHits &L = eventofhits[layer];
2084       const LayerInfo &LI = L.layer_info();
2085 
2086       int count = 0;
2087       for (int i = 0; i < N_proc; ++i) {
2088         while (m_CurHit[i] >= 0 && m_HoTArr[i][m_CurHit[i]].index < 0)
2089           --m_CurHit[i];
2090 
2091         if (m_CurHit[i] >= 0 && m_HoTArr[i][m_CurHit[i]].layer == layer) {
2092           // Skip the overlap hits -- if they exist.
2093           // 1. Overlap hit gets placed *after* the original hit in TrackCand::exportTrack()
2094           // which is *before* in the reverse iteration that we are doing here.
2095           // 2. Seed-hit merging can result in more than two hits per layer.
2096           while (m_CurHit[i] > 0 && m_HoTArr[i][m_CurHit[i] - 1].layer == layer)
2097             --m_CurHit[i];
2098 
2099           const Hit &hit = L.refHit(m_HoTArr[i][m_CurHit[i]].index);
2100           m_msErr.copyIn(i, hit.errArray());
2101           m_msPar.copyIn(i, hit.posArray());
2102           ++count;
2103           --m_CurHit[i];
2104         } else {
2105           tmp_pos[0] = m_Par[iC](i, 0, 0);
2106           tmp_pos[1] = m_Par[iC](i, 1, 0);
2107           tmp_pos[2] = m_Par[iC](i, 2, 0);
2108           m_msErr.copyIn(i, tmp_err);
2109           m_msPar.copyIn(i, tmp_pos);
2110         }
2111       }
2112 
2113       if (count == 0)
2114         continue;
2115 
2116       // ZZZ Could add missing hits here, only if there are any actual matches.
2117 
2118       if (LI.is_barrel()) {
2119         propagateTracksToHitR(m_msPar, N_proc, m_prop_config->backward_fit_pflags);
2120 
2121         kalmanOperation(KFO_Calculate_Chi2 | KFO_Update_Params | KFO_Local_Cov,
2122                         m_Err[iP],
2123                         m_Par[iP],
2124                         m_msErr,
2125                         m_msPar,
2126                         m_Err[iC],
2127                         m_Par[iC],
2128                         tmp_chi2,
2129                         N_proc);
2130       } else {
2131         propagateTracksToHitZ(m_msPar, N_proc, m_prop_config->backward_fit_pflags);
2132 
2133         kalmanOperationEndcap(KFO_Calculate_Chi2 | KFO_Update_Params,
2134                               m_Err[iP],
2135                               m_Par[iP],
2136                               m_msErr,
2137                               m_msPar,
2138                               m_Err[iC],
2139                               m_Par[iC],
2140                               tmp_chi2,
2141                               N_proc);
2142       }
2143 
2144       //fixup invpt sign and charge
2145       for (int n = 0; n < NN; ++n) {
2146         if (n < N_proc && m_Par[iC].At(n, 3, 0) < 0) {
2147           m_Chg.At(n, 0, 0) = -m_Chg.At(n, 0, 0);
2148           m_Par[iC].At(n, 3, 0) = -m_Par[iC].At(n, 3, 0);
2149         }
2150       }
2151 
2152 #ifdef DEBUG_BACKWARD_FIT_BH
2153       // Dump per hit chi2
2154       for (int i = 0; i < N_proc; ++i) {
2155         float r_h = hipo(m_msPar.At(i, 0, 0), m_msPar.At(i, 1, 0));
2156         float r_t = hipo(m_Par[iC].At(i, 0, 0), m_Par[iC].At(i, 1, 0));
2157 
2158         // if ((std::isnan(tmp_chi2[i]) || std::isnan(r_t)))
2159         // if ( ! std::isnan(tmp_chi2[i]) && tmp_chi2[i] > 0) // && tmp_chi2[i] > 30)
2160         if (chiDebug) {
2161           int ti = iP;
2162           printf(
2163               "CHIHIT %3d %10g %10g %10g %10g %10g %11.5g %11.5g %11.5g %10g %10g %10g %10g %11.5g %11.5g %11.5g %10g "
2164               "%10g %10g %10g %10g %11.5g %11.5g\n",
2165               layer,
2166               tmp_chi2[i],
2167               m_msPar.At(i, 0, 0),
2168               m_msPar.At(i, 1, 0),
2169               m_msPar.At(i, 2, 0),
2170               r_h,  // x_h y_h z_h r_h -- hit pos
2171               e2s(m_msErr.At(i, 0, 0)),
2172               e2s(m_msErr.At(i, 1, 1)),
2173               e2s(m_msErr.At(i, 2, 2)),  // ex_h ey_h ez_h -- hit errors
2174               m_Par[ti].At(i, 0, 0),
2175               m_Par[ti].At(i, 1, 0),
2176               m_Par[ti].At(i, 2, 0),
2177               r_t,  // x_t y_t z_t r_t -- track pos
2178               e2s(m_Err[ti].At(i, 0, 0)),
2179               e2s(m_Err[ti].At(i, 1, 1)),
2180               e2s(m_Err[ti].At(i, 2, 2)),  // ex_t ey_t ez_t -- track errors
2181               1.0f / m_Par[ti].At(i, 3, 0),
2182               m_Par[ti].At(i, 4, 0),
2183               m_Par[ti].At(i, 5, 0),                                     // pt, phi, theta
2184               std::atan2(m_msPar.At(i, 1, 0), m_msPar.At(i, 0, 0)),      // phi_h
2185               std::atan2(m_Par[ti].At(i, 1, 0), m_Par[ti].At(i, 0, 0)),  // phi_t
2186               1e4f * hipo(m_msPar.At(i, 0, 0) - m_Par[ti].At(i, 0, 0),
2187                                 m_msPar.At(i, 1, 0) - m_Par[ti].At(i, 1, 0)),  // d_xy
2188               1e4f * (m_msPar.At(i, 2, 0) - m_Par[ti].At(i, 2, 0))             // d_z
2189               // e2s((m_msErr.At(i,0,0) + m_msErr.At(i,1,1)) / (r_h * r_h)),     // ephi_h
2190               // e2s((m_Err[ti].At(i,0,0) + m_Err[ti].At(i,1,1)) / (r_t * r_t))  // ephi_t
2191           );
2192         }
2193       }
2194 #endif
2195 
2196       // update chi2
2197       m_Chi2.add(tmp_chi2);
2198     }
2199   }
2200 
2201   //------------------------------------------------------------------------------
2202 
2203   void MkFinder::print_par_err(int corp, int mslot) const {
2204 #ifdef DEBUG
2205     printf("Parameters:\n");
2206     for (int i = 0; i < 6; ++i) {
2207       printf("  %12.4g", m_Par[corp].constAt(mslot, i, 0));
2208     }
2209     printf("\nError matrix\n");
2210     for (int i = 0; i < 6; ++i) {
2211       for (int j = 0; j < 6; ++j) {
2212         printf("  %12.4g", m_Err[corp].constAt(mslot, i, j));
2213       }
2214       printf("\n");
2215     }
2216 #endif
2217   }
2218 
2219   void MkFinder::bkFitFitTracks(const EventOfHits &eventofhits,
2220                                 const SteeringParams &st_par,
2221                                 const int N_proc,
2222                                 bool chiDebug) {
2223     // Prototyping final backward fit.
2224     // This works with track-finding indices, before remapping.
2225     //
2226     // Layers should be collected during track finding and list all layers that have actual hits.
2227     // Then we could avoid checking which layers actually do have hits.
2228 
2229     // bool debug = true;
2230 
2231     MPlexQF tmp_chi2;
2232     MPlexQI no_mat_effs;
2233     float tmp_err[6] = {666, 0, 666, 0, 0, 666};
2234     float tmp_pos[3];
2235 
2236 #if defined(DEBUG_PROP_UPDATE)
2237     const int DSLOT = 0;
2238     printf("bkfit entry, track in slot %d\n", DSLOT);
2239     print_par_err(iC, DSLOT);
2240 #endif
2241 
2242     for (auto lp_iter = st_par.make_iterator(SteeringParams::IT_BkwFit); lp_iter.is_valid(); ++lp_iter) {
2243       const int layer = lp_iter.layer();
2244 
2245       const LayerOfHits &L = eventofhits[layer];
2246       const LayerInfo &LI = L.layer_info();
2247 
2248 #if defined(DEBUG_BACKWARD_FIT)
2249       const Hit *last_hit_ptr[NN];
2250 #endif
2251 
2252       no_mat_effs.setVal(0);
2253       int done_count = 0;
2254       int here_count = 0;
2255       for (int i = 0; i < N_proc; ++i) {
2256         while (m_CurNode[i] >= 0 && m_HoTNodeArr[i][m_CurNode[i]].m_hot.index < 0) {
2257           m_CurNode[i] = m_HoTNodeArr[i][m_CurNode[i]].m_prev_idx;
2258         }
2259 
2260         if (m_CurNode[i] < 0)
2261           ++done_count;
2262 
2263         if (m_CurNode[i] >= 0 && m_HoTNodeArr[i][m_CurNode[i]].m_hot.layer == layer) {
2264           // Skip the overlap hits -- if they exist.
2265           // 1. Overlap hit gets placed *after* the original hit in TrackCand::exportTrack()
2266           // which is *before* in the reverse iteration that we are doing here.
2267           // 2. Seed-hit merging can result in more than two hits per layer.
2268           // while (m_CurHit[i] > 0 && m_HoTArr[ i ][ m_CurHit[i] - 1 ].layer == layer) --m_CurHit[i];
2269           while (m_HoTNodeArr[i][m_CurNode[i]].m_prev_idx >= 0 &&
2270                  m_HoTNodeArr[i][m_HoTNodeArr[i][m_CurNode[i]].m_prev_idx].m_hot.layer == layer)
2271             m_CurNode[i] = m_HoTNodeArr[i][m_CurNode[i]].m_prev_idx;
2272 
2273           const Hit &hit = L.refHit(m_HoTNodeArr[i][m_CurNode[i]].m_hot.index);
2274 
2275 #ifdef DEBUG_BACKWARD_FIT
2276           last_hit_ptr[i] = &hit;
2277 #endif
2278           m_msErr.copyIn(i, hit.errArray());
2279           m_msPar.copyIn(i, hit.posArray());
2280           ++here_count;
2281 
2282           m_CurNode[i] = m_HoTNodeArr[i][m_CurNode[i]].m_prev_idx;
2283         } else {
2284 #ifdef DEBUG_BACKWARD_FIT
2285           last_hit_ptr[i] = nullptr;
2286 #endif
2287           no_mat_effs[i] = 1;
2288           tmp_pos[0] = m_Par[iC](i, 0, 0);
2289           tmp_pos[1] = m_Par[iC](i, 1, 0);
2290           tmp_pos[2] = m_Par[iC](i, 2, 0);
2291           m_msErr.copyIn(i, tmp_err);
2292           m_msPar.copyIn(i, tmp_pos);
2293         }
2294       }
2295 
2296       if (done_count == N_proc)
2297         break;
2298       if (here_count == 0)
2299         continue;
2300 
2301       // ZZZ Could add missing hits here, only if there are any actual matches.
2302 
2303       clearFailFlag();
2304 
2305       // PROP-FAIL-ENABLE Once always "copy input to output on fail" is removed from
2306       // propagateToR one might want to enable this for barrel or endcap or both.
2307       if (LI.is_barrel()) {
2308         propagateTracksToHitR(m_msPar, N_proc, m_prop_config->backward_fit_pflags, &no_mat_effs);
2309 
2310         kalmanOperation(KFO_Calculate_Chi2 | KFO_Update_Params | KFO_Local_Cov,
2311                         m_Err[iP],
2312                         m_Par[iP],
2313                         m_msErr,
2314                         m_msPar,
2315                         m_Err[iC],
2316                         m_Par[iC],
2317                         tmp_chi2,
2318                         N_proc);
2319       } else {
2320         propagateTracksToHitZ(m_msPar, N_proc, m_prop_config->backward_fit_pflags, &no_mat_effs);
2321 
2322         kalmanOperationEndcap(KFO_Calculate_Chi2 | KFO_Update_Params,
2323                               m_Err[iP],
2324                               m_Par[iP],
2325                               m_msErr,
2326                               m_msPar,
2327                               m_Err[iC],
2328                               m_Par[iC],
2329                               tmp_chi2,
2330                               N_proc);
2331       }
2332 
2333 #if defined(DEBUG_PROP_UPDATE)
2334       printf("\nbkfit at layer %d, track in slot %d -- fail=%d, had hit=%d (%g, %g, %g)\n",
2335              LI.layer_id(),
2336              DSLOT,
2337              m_FailFlag[DSLOT],
2338              1 - no_mat_effs[DSLOT],
2339              m_msPar(DSLOT, 0, 0),
2340              m_msPar(DSLOT, 1, 0),
2341              m_msPar(DSLOT, 2, 0));
2342       printf("Propagated:\n");
2343       print_par_err(iP, DSLOT);
2344       printf("Updated:\n");
2345       print_par_err(iC, DSLOT);
2346 #endif
2347 
2348       // Fixup for failed propagation or invpt sign and charge.
2349       for (int i = 0; i < NN; ++i) {
2350         // PROP-FAIL-ENABLE The following to be enabled when propagation failure
2351         // detection is properly implemented in propagate-to-R/Z.
2352         // 1. The following code was only expecting barrel state to be restored.
2353         //      auto barrel_pf(m_prop_config->backward_fit_pflags);
2354         //      barrel_pf.copy_input_state_on_fail = true;
2355         // 2. There is also check on chi2, commented out to keep physics changes minimal.
2356         /*
2357         if (m_FailFlag[i] && LI.is_barrel()) {
2358           // Barrel pflags are set to include PF_copy_input_state_on_fail.
2359           // Endcap errors are immaterial here (relevant for fwd search), with prop error codes
2360           // one could do other things.
2361           // Are there also fail conditions in KalmanUpdate?
2362 #ifdef DEBUG
2363           if (debug && g_debug) {
2364             dprintf("MkFinder::bkFitFitTracks prop fail: chi2=%f, layer=%d, label=%d. Recovering.\n",
2365                     tmp_chi2[i], LI.layer_id(), m_Label[i]);
2366             print_par_err(iC, i);
2367           }
2368 #endif
2369           m_Err[iC].copySlot(i, m_Err[iP]);
2370           m_Par[iC].copySlot(i, m_Par[iP]);
2371         } else if (tmp_chi2[i] > 200 || tmp_chi2[i] < 0) {
2372 #ifdef DEBUG
2373           if (debug && g_debug) {
2374             dprintf("MkFinder::bkFitFitTracks chi2 fail: chi2=%f, layer=%d, label=%d. Recovering.\n",
2375                     tmp_chi2[i], LI.layer_id(), m_Label[i]);
2376             print_par_err(iC, i);
2377           }
2378 #endif
2379           // Go back to propagated state (at the current hit, the previous one is lost).
2380           m_Err[iC].copySlot(i, m_Err[iP]);
2381           m_Par[iC].copySlot(i, m_Par[iP]);
2382         }
2383         */
2384         // Fixup invpt sign and charge.
2385         if (i < N_proc && m_Par[iC].At(i, 3, 0) < 0) {
2386           m_Chg.At(i, 0, 0) = -m_Chg.At(i, 0, 0);
2387           m_Par[iC].At(i, 3, 0) = -m_Par[iC].At(i, 3, 0);
2388         }
2389       }
2390 
2391 #if defined(DEBUG_BACKWARD_FIT)
2392       // clang-format off
2393       bool debug = true;
2394       const char beg_cur_sep = '/'; // set to ' ' root parsable printouts
2395       for (int i = 0; i < N_proc; ++i) {
2396         if (chiDebug && last_hit_ptr[i]) {
2397           TrackCand &bb = *m_TrkCand[i];
2398           int ti = iP;
2399           float chi = tmp_chi2.At(i, 0, 0);
2400           float chi_prnt = std::isfinite(chi) ? chi : -9;
2401 
2402 #if defined(MKFIT_STANDALONE)
2403           const MCHitInfo &mchi = m_event->simHitsInfo_[last_hit_ptr[i]->mcHitID()];
2404 
2405           dprintf("BKF_OVERLAP %d %d %d %d %d %d %d "
2406                   "%f%c%f %f %f%c%f %f %f %f %d %d %d %d "
2407                   "%f %f %f %f %f\n",
2408               m_event->evtID(),
2409 #else
2410           dprintf("BKF_OVERLAP %d %d %d %d %d %d "
2411                   "%f%c%f %f %f%c%f %f %f %f %d %d %d "
2412                   "%f %f %f %f %f\n",
2413 #endif
2414               bb.label(), (int)bb.prodType(), bb.isFindable(),
2415               layer, L.is_stereo(), L.is_barrel(),
2416               bb.pT(), beg_cur_sep, 1.0f / m_Par[ti].At(i, 3, 0),
2417               bb.posEta(),
2418               bb.posPhi(), beg_cur_sep, std::atan2(m_Par[ti].At(i, 1, 0), m_Par[ti].At(i, 0, 0)),
2419               hipo(m_Par[ti].At(i, 0, 0), m_Par[ti].At(i, 1, 0)),
2420               m_Par[ti].At(i, 2, 0),
2421               chi_prnt,
2422               std::isnan(chi), std::isfinite(chi), chi > 0,
2423 #if defined(MKFIT_STANDALONE)
2424               mchi.mcTrackID(),
2425 #endif
2426               // The following three can get negative / prouce nans in e2s.
2427               // std::abs the args for FPE hunt.
2428               e2s(std::abs(m_Err[ti].At(i, 0, 0))),
2429               e2s(std::abs(m_Err[ti].At(i, 1, 1))),
2430               e2s(std::abs(m_Err[ti].At(i, 2, 2))),  // sx_t sy_t sz_t -- track errors
2431               1e4f * hipo(m_msPar.At(i, 0, 0) - m_Par[ti].At(i, 0, 0),
2432                                 m_msPar.At(i, 1, 0) - m_Par[ti].At(i, 1, 0)),  // d_xy
2433               1e4f * (m_msPar.At(i, 2, 0) - m_Par[ti].At(i, 2, 0))             // d_z
2434           );
2435         }
2436       }
2437       // clang-format on
2438 #endif
2439 
2440       // update chi2
2441       m_Chi2.add(tmp_chi2);
2442     }
2443   }
2444 
2445   //------------------------------------------------------------------------------
2446 
2447   void MkFinder::bkFitPropTracksToPCA(const int N_proc) {
2448     propagateTracksToPCAZ(N_proc, m_prop_config->pca_prop_pflags);
2449   }
2450 
2451 }  // end namespace mkfit