Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-09-13 22:52:46

0001 #include <alpaka/alpaka.hpp>
0002 
0003 #include "DataFormats/TrackSoA/interface/alpaka/TrackUtilities.h"
0004 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0005 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0006 #include "RecoVertex/PixelVertexFinding/interface/PixelVertexWorkSpaceLayout.h"
0007 #include "RecoVertex/PixelVertexFinding/plugins/alpaka/PixelVertexWorkSpaceSoADeviceAlpaka.h"
0008 
0009 #include "vertexFinder.h"
0010 #include "clusterTracksDBSCAN.h"
0011 #include "clusterTracksIterative.h"
0012 #include "clusterTracksByDensity.h"
0013 #include "fitVertices.h"
0014 #include "sortByPt2.h"
0015 #include "splitVertices.h"
0016 
0017 #undef PIXVERTEX_DEBUG_PRODUCE
0018 namespace ALPAKA_ACCELERATOR_NAMESPACE {
0019   namespace vertexFinder {
0020     using namespace cms::alpakatools;
0021     // reject outlier tracks that contribute more than this to the chi2 of the vertex fit
0022     constexpr float maxChi2ForFirstFit = 50.f;
0023     constexpr float maxChi2ForFinalFit = 5000.f;
0024 
0025     // split vertices with a chi2/NDoF greater than this
0026     constexpr float maxChi2ForSplit = 9.f;
0027 
0028     template <typename TrackerTraits>
0029     class LoadTracks {
0030     public:
0031       ALPAKA_FN_ACC void operator()(Acc1D const& acc,
0032                                     reco::TrackSoAConstView<TrackerTraits> tracks_view,
0033                                     VtxSoAView data,
0034                                     TrkSoAView trkdata,
0035                                     WsSoAView ws,
0036                                     float ptMin,
0037                                     float ptMax) const {
0038         auto const* quality = tracks_view.quality();
0039         using helper = TracksUtilities<TrackerTraits>;
0040 
0041         for (auto idx : cms::alpakatools::uniform_elements(acc, tracks_view.nTracks())) {
0042           [[maybe_unused]] auto nHits = helper::nHits(tracks_view, idx);
0043           ALPAKA_ASSERT_ACC(nHits >= 3);
0044 
0045           // initialize the track data
0046           trkdata[idx].idv() = -1;
0047 
0048           // do not use triplets
0049           if (reco::isTriplet(tracks_view, idx))
0050             continue;
0051 
0052           // use only "high purity" track
0053           if (quality[idx] < ::pixelTrack::Quality::highPurity)
0054             continue;
0055 
0056           auto pt = tracks_view[idx].pt();
0057           // pT min cut
0058           if (pt < ptMin)
0059             continue;
0060 
0061           // clamp pT to the pTmax
0062           pt = std::min<float>(pt, ptMax);
0063 
0064           // load the track data into the workspace
0065           auto it = alpaka::atomicAdd(acc, &ws.ntrks(), 1u, alpaka::hierarchy::Blocks{});
0066           ws[it].itrk() = idx;
0067           ws[it].zt() = reco::zip(tracks_view, idx);
0068           ws[it].ezt2() = tracks_view[idx].covariance()(14);
0069           ws[it].ptt2() = pt * pt;
0070         }
0071       }
0072     };
0073 
0074 // #define THREE_KERNELS
0075 #ifndef THREE_KERNELS
0076     class VertexFinderOneKernel {
0077     public:
0078       ALPAKA_FN_ACC void operator()(Acc1D const& acc,
0079                                     VtxSoAView data,
0080                                     TrkSoAView trkdata,
0081                                     WsSoAView ws,
0082                                     bool doSplit,
0083                                     int minT,      // min number of neighbours to be "seed"
0084                                     float eps,     // max absolute distance to cluster
0085                                     float errmax,  // max error to be "seed"
0086                                     float chi2max  // max normalized distance to cluster,
0087       ) const {
0088         clusterTracksByDensity(acc, data, trkdata, ws, minT, eps, errmax, chi2max);
0089         alpaka::syncBlockThreads(acc);
0090         fitVertices(acc, data, trkdata, ws, maxChi2ForFirstFit);
0091         alpaka::syncBlockThreads(acc);
0092         if (doSplit) {
0093           splitVertices(acc, data, trkdata, ws, maxChi2ForSplit);
0094           alpaka::syncBlockThreads(acc);
0095           fitVertices(acc, data, trkdata, ws, maxChi2ForFinalFit);
0096           alpaka::syncBlockThreads(acc);
0097         }
0098         sortByPt2(acc, data, trkdata, ws);
0099       }
0100     };
0101 #else
0102     class VertexFinderKernel1 {
0103     public:
0104       ALPAKA_FN_ACC void operator()(Acc1D const& acc,
0105                                     VtxSoAView data,
0106                                     WsSoAView ws,
0107                                     int minT,      // min number of neighbours to be "seed"
0108                                     float eps,     // max absolute distance to cluster
0109                                     float errmax,  // max error to be "seed"
0110                                     float chi2max  // max normalized distance to cluster,
0111       ) const {
0112         clusterTracksByDensity(acc, data, ws, minT, eps, errmax, chi2max);
0113         alpaka::syncBlockThreads(acc);
0114         fitVertices(acc, data, ws, maxChi2ForFirstFit);
0115       }
0116     };
0117 
0118     class VertexFinderKernel2 {
0119     public:
0120       ALPAKA_FN_ACC void operator()(Acc1D const& acc, VtxSoAView data, WsSoAView ws) const {
0121         fitVertices(acc, data, ws, maxChi2ForFinalFit);
0122         alpaka::syncBlockThreads(acc);
0123         sortByPt2(data, ws);
0124       }
0125     };
0126 #endif
0127 
0128     template <typename TrackerTraits>
0129     ZVertexSoACollection Producer<TrackerTraits>::makeAsync(Queue& queue,
0130                                                             reco::TrackSoAConstView<TrackerTraits> const& tracks_view,
0131                                                             int maxVertices,
0132                                                             float ptMin,
0133                                                             float ptMax) const {
0134 #ifdef PIXVERTEX_DEBUG_PRODUCE
0135       std::cout << "producing Vertices on GPU" << std::endl;
0136 #endif  // PIXVERTEX_DEBUG_PRODUCE
0137       const auto maxTracks = tracks_view.metadata().size();
0138       ZVertexSoACollection vertices({{maxVertices, maxTracks}}, queue);
0139       auto data = vertices.view();
0140       auto trkdata = vertices.view<reco::ZVertexTracksSoA>();
0141 
0142       PixelVertexWorkSpaceSoADevice workspace(maxTracks, queue);
0143       auto ws = workspace.view();
0144 
0145       // Initialize
0146       const auto initWorkDiv = cms::alpakatools::make_workdiv<Acc1D>(1, 1);
0147       alpaka::exec<Acc1D>(queue, initWorkDiv, Init{}, data, ws);
0148 
0149       // Load Tracks
0150       const uint32_t blockSize = 128;
0151       const uint32_t numberOfBlocks =
0152           cms::alpakatools::divide_up_by(tracks_view.metadata().size() + blockSize - 1, blockSize);
0153       const auto loadTracksWorkDiv = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks, blockSize);
0154       alpaka::exec<Acc1D>(
0155           queue, loadTracksWorkDiv, LoadTracks<TrackerTraits>{}, tracks_view, data, trkdata, ws, ptMin, ptMax);
0156 
0157       // Running too many thread lead to problems when printf is enabled.
0158       const auto finderSorterWorkDiv = cms::alpakatools::make_workdiv<Acc1D>(1, 1024 - 128);
0159       const auto splitterFitterWorkDiv = cms::alpakatools::make_workdiv<Acc1D>(1024, 128);
0160 
0161       if (oneKernel_) {
0162         // implemented only for density clustesrs
0163 #ifndef THREE_KERNELS
0164         alpaka::exec<Acc1D>(queue,
0165                             finderSorterWorkDiv,
0166                             VertexFinderOneKernel{},
0167                             data,
0168                             trkdata,
0169                             ws,
0170                             doSplitting_,
0171                             minT,
0172                             eps,
0173                             errmax,
0174                             chi2max);
0175 #else
0176         alpaka::exec<Acc1D>(
0177             queue, finderSorterWorkDiv, VertexFinderOneKernel{}, data, trkdata, ws, minT, eps, errmax, chi2max);
0178 
0179         // one block per vertex...
0180         if (doSplitting_)
0181           alpaka::exec<Acc1D>(queue, splitterFitterWorkDiv, SplitVerticesKernel{}, data, trkdata, ws, maxChi2ForSplit);
0182         alpaka::exec<Acc1D>(queue, finderSorterWorkDiv{}, data, ws);
0183 #endif
0184       } else {  // five kernels
0185         if (useDensity_) {
0186           alpaka::exec<Acc1D>(
0187               queue, finderSorterWorkDiv, ClusterTracksByDensityKernel{}, data, trkdata, ws, minT, eps, errmax, chi2max);
0188 
0189         } else if (useDBSCAN_) {
0190           alpaka::exec<Acc1D>(
0191               queue, finderSorterWorkDiv, ClusterTracksDBSCAN{}, data, trkdata, ws, minT, eps, errmax, chi2max);
0192         } else if (useIterative_) {
0193           alpaka::exec<Acc1D>(
0194               queue, finderSorterWorkDiv, ClusterTracksIterative{}, data, trkdata, ws, minT, eps, errmax, chi2max);
0195         }
0196         alpaka::exec<Acc1D>(queue, finderSorterWorkDiv, FitVerticesKernel{}, data, trkdata, ws, maxChi2ForFirstFit);
0197 
0198         // one block per vertex...
0199         if (doSplitting_) {
0200           alpaka::exec<Acc1D>(queue, splitterFitterWorkDiv, SplitVerticesKernel{}, data, trkdata, ws, maxChi2ForSplit);
0201 
0202           alpaka::exec<Acc1D>(queue, finderSorterWorkDiv, FitVerticesKernel{}, data, trkdata, ws, maxChi2ForFinalFit);
0203         }
0204         alpaka::exec<Acc1D>(queue, finderSorterWorkDiv, SortByPt2Kernel{}, data, trkdata, ws);
0205       }
0206 
0207       return vertices;
0208     }
0209 
0210     template class Producer<pixelTopology::Phase1>;
0211     template class Producer<pixelTopology::Phase2>;
0212     template class Producer<pixelTopology::HIonPhase1>;
0213   }  // namespace vertexFinder
0214 }  // namespace ALPAKA_ACCELERATOR_NAMESPACE