Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2025-04-29 02:41:34

0001 #ifndef RecoVertex_PixelVertexFinding_plugins_alpaka_clusterTracksByDensity_h
0002 #define RecoVertex_PixelVertexFinding_plugins_alpaka_clusterTracksByDensity_h
0003 
0004 #include <algorithm>
0005 #include <cmath>
0006 #include <cstdint>
0007 
0008 #include <alpaka/alpaka.hpp>
0009 
0010 #include "DataFormats/VertexSoA/interface/ZVertexSoA.h"
0011 #include "HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h"
0012 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0013 #include "HeterogeneousCore/AlpakaInterface/interface/debug.h"
0014 #include "HeterogeneousCore/AlpakaInterface/interface/warpsize.h"
0015 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0016 #include "RecoVertex/PixelVertexFinding/interface/PixelVertexWorkSpaceLayout.h"
0017 
0018 #include "vertexFinder.h"
0019 
0020 namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder {
0021 
0022   // This algo does not really scale as it works in a single block...
0023   // It should be good enough for <10K tracks we have.
0024   //
0025   // Based on Rodrighez&Laio algo.
0026   ALPAKA_FN_ACC ALPAKA_FN_INLINE void clusterTracksByDensity(Acc1D const& acc,
0027                                                              VtxSoAView& data,
0028                                                              TrkSoAView& trkdata,
0029                                                              WsSoAView& ws,
0030                                                              int minT,      // min number of neighbours to be "seed"
0031                                                              float eps,     // max absolute distance to cluster
0032                                                              float errmax,  // max error to be "seed"
0033                                                              float chi2max  // max normalized distance to cluster
0034   ) {
0035     // workaround for #47808
0036     debug::do_not_optimise(ws);
0037 
0038     constexpr bool verbose = false;
0039 
0040     if constexpr (verbose) {
0041       if (cms::alpakatools::once_per_block(acc))
0042         printf("params %d %f %f %f\n", minT, eps, errmax, chi2max);
0043     }
0044 
0045     auto nt = ws.ntrks();
0046     ALPAKA_ASSERT_ACC(static_cast<int>(nt) <= ws.metadata().size());
0047     ALPAKA_ASSERT_ACC(static_cast<int>(nt) <= trkdata.metadata().size());
0048 
0049     float const* __restrict__ zt = ws.zt();
0050     float const* __restrict__ ezt2 = ws.ezt2();
0051     uint8_t* __restrict__ izt = ws.izt();
0052     int32_t* __restrict__ iv = ws.iv();
0053     int32_t* __restrict__ nn = trkdata.ndof();
0054     ALPAKA_ASSERT_ACC(zt);
0055     ALPAKA_ASSERT_ACC(ezt2);
0056     ALPAKA_ASSERT_ACC(izt);
0057     ALPAKA_ASSERT_ACC(iv);
0058     ALPAKA_ASSERT_ACC(nn);
0059 
0060     using Hist = cms::alpakatools::HistoContainer<uint8_t, 256, 16000, 8, uint16_t>;
0061     auto& hist = alpaka::declareSharedVar<Hist, __COUNTER__>(acc);
0062     constexpr int warpSize = cms::alpakatools::warpSize;
0063     auto& hws = alpaka::declareSharedVar<Hist::Counter[warpSize], __COUNTER__>(acc);
0064 
0065     for (auto j : cms::alpakatools::uniform_elements(acc, Hist::totbins())) {
0066       hist.off[j] = 0;
0067     }
0068     for (auto j : cms::alpakatools::uniform_elements(acc, warpSize)) {
0069       hws[j] = 0;  // used by prefix scan in hist.finalize()
0070     }
0071     alpaka::syncBlockThreads(acc);
0072 
0073     if constexpr (verbose) {
0074       if (cms::alpakatools::once_per_block(acc))
0075         printf("booked hist with %d bins, size %d for %d tracks\n", hist.totbins(), hist.capacity(), nt);
0076     }
0077 
0078     ALPAKA_ASSERT_ACC(static_cast<int>(nt) <= std::numeric_limits<Hist::index_type>::max());
0079     ALPAKA_ASSERT_ACC(static_cast<int>(nt) <= hist.capacity());
0080     ALPAKA_ASSERT_ACC(eps <= 0.1f);  // see below
0081 
0082     // fill hist (bin shall be wider than "eps")
0083     for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0084       int iz = static_cast<int>(zt[i] * 10.f);  // valid if eps <= 0.1
0085       // Equivalent of iz = std::clamp(iz, INT8_MIN, INT8_MAX)
0086       // which doesn't compile with gcc14 due to reference to __glibcxx_assert
0087       // See https://github.com/llvm/llvm-project/issues/95183
0088       int tmp_max = std::max<int>(iz, INT8_MIN);
0089       iz = std::min<int>(tmp_max, INT8_MAX);
0090       ALPAKA_ASSERT_ACC(iz - INT8_MIN >= 0);
0091       ALPAKA_ASSERT_ACC(iz - INT8_MIN < 256);
0092       izt[i] = iz - INT8_MIN;
0093       hist.count(acc, izt[i]);
0094       iv[i] = i;
0095       nn[i] = 0;
0096     }
0097     alpaka::syncBlockThreads(acc);
0098 
0099     hist.finalize(acc, hws);
0100     alpaka::syncBlockThreads(acc);
0101 
0102     ALPAKA_ASSERT_ACC(hist.size() == nt);
0103     for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0104       hist.fill(acc, izt[i], uint16_t(i));
0105     }
0106     alpaka::syncBlockThreads(acc);
0107 
0108     // count neighbours
0109     const auto errmax2 = errmax * errmax;
0110     for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0111       if (ezt2[i] > errmax2)
0112         continue;
0113       cms::alpakatools::forEachInBins(hist, izt[i], 1, [&](uint32_t j) {
0114         if (i == j)
0115           return;
0116         auto dist = std::abs(zt[i] - zt[j]);
0117         if (dist > eps)
0118           return;
0119         if (dist * dist > chi2max * (ezt2[i] + ezt2[j]))
0120           return;
0121         nn[i]++;
0122       });
0123     }
0124     alpaka::syncBlockThreads(acc);
0125 
0126     // find closest above me .... (we ignore the possibility of two j at same distance from i)
0127     for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0128       float mdist = eps;
0129       cms::alpakatools::forEachInBins(hist, izt[i], 1, [&](uint32_t j) {
0130         if (nn[j] < nn[i])
0131           return;
0132         if (nn[j] == nn[i] && zt[j] >= zt[i])
0133           return;  // if equal use natural order...
0134         auto dist = std::abs(zt[i] - zt[j]);
0135         if (dist > mdist)
0136           return;
0137         if (dist * dist > chi2max * (ezt2[i] + ezt2[j]))
0138           return;  // (break natural order???)
0139         mdist = dist;
0140         iv[i] = j;  // assign to cluster (better be unique??)
0141       });
0142     }
0143     alpaka::syncBlockThreads(acc);
0144 
0145 #ifdef GPU_DEBUG
0146     //  mini verification
0147     for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0148       if (iv[i] != int(i))
0149         ALPAKA_ASSERT_ACC(iv[iv[i]] != int(i));
0150     }
0151     alpaka::syncBlockThreads(acc);
0152 #endif
0153 
0154     // consolidate graph (percolate index of seed)
0155     for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0156       auto m = iv[i];
0157       while (m != iv[m])
0158         m = iv[m];
0159       iv[i] = m;
0160     }
0161 
0162 #ifdef GPU_DEBUG
0163     alpaka::syncBlockThreads(acc);
0164     //  mini verification
0165     for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0166       if (iv[i] != int(i))
0167         ALPAKA_ASSERT_ACC(iv[iv[i]] != int(i));
0168     }
0169 #endif
0170 
0171 #ifdef GPU_DEBUG
0172     // and verify that we did not split any cluster...
0173     for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0174       auto minJ = i;
0175       auto mdist = eps;
0176       cms::alpakatools::forEachInBins(hist, izt[i], 1, [&](uint32_t j) {
0177         if (nn[j] < nn[i])
0178           return;
0179         if (nn[j] == nn[i] && zt[j] >= zt[i])
0180           return;  // if equal use natural order...
0181         auto dist = std::abs(zt[i] - zt[j]);
0182         if (dist > mdist)
0183           return;
0184         if (dist * dist > chi2max * (ezt2[i] + ezt2[j]))
0185           return;
0186         mdist = dist;
0187         minJ = j;
0188       });
0189 
0190       // should belong to the same cluster...
0191       ALPAKA_ASSERT_ACC(iv[i] == iv[minJ]);
0192       ALPAKA_ASSERT_ACC(nn[i] <= nn[iv[i]]);
0193     }
0194     alpaka::syncBlockThreads(acc);
0195 #endif
0196 
0197     auto& foundClusters = alpaka::declareSharedVar<unsigned int, __COUNTER__>(acc);
0198     foundClusters = 0;
0199     alpaka::syncBlockThreads(acc);
0200 
0201     // find the number of different clusters, identified by a tracks with clus[i] == i and density larger than threshold;
0202     // mark these tracks with a negative id.
0203     for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0204       if (iv[i] == int(i)) {
0205         if (nn[i] >= minT) {
0206           auto old = alpaka::atomicInc(acc, &foundClusters, 0xffffffff, alpaka::hierarchy::Threads{});
0207           iv[i] = -(old + 1);
0208         } else {  // noise
0209           iv[i] = -9998;
0210         }
0211       }
0212     }
0213     alpaka::syncBlockThreads(acc);
0214 
0215     ALPAKA_ASSERT_ACC(static_cast<int>(foundClusters) < data.metadata().size());
0216 
0217     // propagate the negative id to all the tracks in the cluster.
0218     for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0219       if (iv[i] >= 0) {
0220         // mark each track in a cluster with the same id as the first one
0221         iv[i] = iv[iv[i]];
0222       }
0223     }
0224     alpaka::syncBlockThreads(acc);
0225 
0226     // adjust the cluster id to be a positive value starting from 0
0227     for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0228       iv[i] = -iv[i] - 1;
0229     }
0230 
0231     ws.nvIntermediate() = foundClusters;
0232     data.nvFinal() = foundClusters;
0233 
0234     if constexpr (verbose) {
0235       if (cms::alpakatools::once_per_block(acc))
0236         printf("found %d proto vertices\n", foundClusters);
0237     }
0238   }
0239 
0240   class ClusterTracksByDensityKernel {
0241   public:
0242     ALPAKA_FN_ACC void operator()(Acc1D const& acc,
0243                                   VtxSoAView data,
0244                                   TrkSoAView trkdata,
0245                                   WsSoAView ws,
0246                                   int minT,      // min number of neighbours to be "seed"
0247                                   float eps,     // max absolute distance to cluster
0248                                   float errmax,  // max error to be "seed"
0249                                   float chi2max  // max normalized distance to cluster
0250     ) const {
0251       clusterTracksByDensity(acc, data, trkdata, ws, minT, eps, errmax, chi2max);
0252     }
0253   };
0254 
0255 }  // namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder
0256 
0257 #endif  // RecoVertex_PixelVertexFinding_plugins_alpaka_clusterTracksByDensity_h