Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:28:40

0001 #ifndef RecoTracker_PixelVertexFinding_plugins_alpaka_clusterTracksByDensity_h
0002 #define RecoTracker_PixelVertexFinding_plugins_alpaka_clusterTracksByDensity_h
0003 
0004 #include <algorithm>
0005 #include <cmath>
0006 #include <cstdint>
0007 
0008 #include <alpaka/alpaka.hpp>
0009 
0010 #include "DataFormats/VertexSoA/interface/ZVertexSoA.h"
0011 #include "HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h"
0012 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0013 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0014 #include "RecoTracker/PixelVertexFinding/interface/PixelVertexWorkSpaceLayout.h"
0015 
0016 #include "vertexFinder.h"
0017 
0018 namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder {
0019 
0020   using VtxSoAView = ::reco::ZVertexSoAView;
0021   using WsSoAView = ::vertexFinder::PixelVertexWorkSpaceSoAView;
0022   // this algo does not really scale as it works in a single block...
0023   // enough for <10K tracks we have
0024   //
0025   // based on Rodrighez&Laio algo
0026   //
0027   template <typename TAcc>
0028   ALPAKA_FN_ACC ALPAKA_FN_INLINE void __attribute__((always_inline))
0029   clusterTracksByDensity(const TAcc& acc,
0030                          VtxSoAView& pdata,
0031                          WsSoAView& pws,
0032                          int minT,      // min number of neighbours to be "seed"
0033                          float eps,     // max absolute distance to cluster
0034                          float errmax,  // max error to be "seed"
0035                          float chi2max  // max normalized distance to cluster
0036   ) {
0037     using namespace vertexFinder;
0038     constexpr bool verbose = false;  // in principle the compiler should optmize out if false
0039     const uint32_t threadIdxLocal(alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]);
0040 
0041     if constexpr (verbose) {
0042       if (cms::alpakatools::once_per_block(acc))
0043         printf("params %d %f %f %f\n", minT, eps, errmax, chi2max);
0044     }
0045     auto er2mx = errmax * errmax;
0046 
0047     auto& __restrict__ data = pdata;
0048     auto& __restrict__ ws = pws;
0049     auto nt = ws.ntrks();
0050     float const* __restrict__ zt = ws.zt();
0051     float const* __restrict__ ezt2 = ws.ezt2();
0052 
0053     uint32_t& nvFinal = data.nvFinal();
0054     uint32_t& nvIntermediate = ws.nvIntermediate();
0055 
0056     uint8_t* __restrict__ izt = ws.izt();
0057     int32_t* __restrict__ nn = data.ndof();
0058     int32_t* __restrict__ iv = ws.iv();
0059 
0060     ALPAKA_ASSERT_ACC(zt);
0061     ALPAKA_ASSERT_ACC(ezt2);
0062     ALPAKA_ASSERT_ACC(izt);
0063     ALPAKA_ASSERT_ACC(nn);
0064     ALPAKA_ASSERT_ACC(iv);
0065 
0066     using Hist = cms::alpakatools::HistoContainer<uint8_t, 256, 16000, 8, uint16_t>;
0067     auto& hist = alpaka::declareSharedVar<Hist, __COUNTER__>(acc);
0068     auto& hws = alpaka::declareSharedVar<Hist::Counter[32], __COUNTER__>(acc);
0069 
0070     for (auto j : cms::alpakatools::uniform_elements(acc, Hist::totbins())) {
0071       hist.off[j] = 0;
0072     }
0073     alpaka::syncBlockThreads(acc);
0074 
0075     if constexpr (verbose) {
0076       if (cms::alpakatools::once_per_block(acc))
0077         printf("booked hist with %d bins, size %d for %d tracks\n", hist.totbins(), hist.capacity(), nt);
0078     }
0079     ALPAKA_ASSERT_ACC(static_cast<int>(nt) <= hist.capacity());
0080 
0081     // fill hist  (bin shall be wider than "eps")
0082     for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0083       ALPAKA_ASSERT_ACC(i < ::zVertex::MAXTRACKS);
0084       int iz = int(zt[i] * 10.);  // valid if eps<=0.1
0085       // iz = std::clamp(iz, INT8_MIN, INT8_MAX);  // sorry c++17 only
0086       iz = std::min(std::max(iz, INT8_MIN), INT8_MAX);
0087       izt[i] = iz - INT8_MIN;
0088       ALPAKA_ASSERT_ACC(iz - INT8_MIN >= 0);
0089       ALPAKA_ASSERT_ACC(iz - INT8_MIN < 256);
0090       hist.count(acc, izt[i]);
0091       iv[i] = i;
0092       nn[i] = 0;
0093     }
0094     alpaka::syncBlockThreads(acc);
0095     if (threadIdxLocal < 32)
0096       hws[threadIdxLocal] = 0;  // used by prefix scan...
0097     alpaka::syncBlockThreads(acc);
0098     hist.finalize(acc, hws);
0099     alpaka::syncBlockThreads(acc);
0100     ALPAKA_ASSERT_ACC(hist.size() == nt);
0101     for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0102       hist.fill(acc, izt[i], uint16_t(i));
0103     }
0104     alpaka::syncBlockThreads(acc);
0105     // count neighbours
0106     for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0107       if (ezt2[i] > er2mx)
0108         continue;
0109       auto loop = [&](uint32_t j) {
0110         if (i == j)
0111           return;
0112         auto dist = std::abs(zt[i] - zt[j]);
0113         if (dist > eps)
0114           return;
0115         if (dist * dist > chi2max * (ezt2[i] + ezt2[j]))
0116           return;
0117         nn[i]++;
0118       };
0119 
0120       cms::alpakatools::forEachInBins(hist, izt[i], 1, loop);
0121     }
0122     alpaka::syncBlockThreads(acc);
0123 
0124     // find closest above me .... (we ignore the possibility of two j at same distance from i)
0125     for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0126       float mdist = eps;
0127       auto loop = [&](uint32_t j) {
0128         if (nn[j] < nn[i])
0129           return;
0130         if (nn[j] == nn[i] && zt[j] >= zt[i])
0131           return;  // if equal use natural order...
0132         auto dist = std::abs(zt[i] - zt[j]);
0133         if (dist > mdist)
0134           return;
0135         if (dist * dist > chi2max * (ezt2[i] + ezt2[j]))
0136           return;  // (break natural order???)
0137         mdist = dist;
0138         iv[i] = j;  // assign to cluster (better be unique??)
0139       };
0140       cms::alpakatools::forEachInBins(hist, izt[i], 1, loop);
0141     }
0142     alpaka::syncBlockThreads(acc);
0143 
0144 #ifdef GPU_DEBUG
0145     //  mini verification
0146     for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0147       if (iv[i] != int(i))
0148         ALPAKA_ASSERT_ACC(iv[iv[i]] != int(i));
0149     }
0150     alpaka::syncBlockThreads(acc);
0151 #endif
0152 
0153     // consolidate graph (percolate index of seed)
0154     for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0155       auto m = iv[i];
0156       while (m != iv[m])
0157         m = iv[m];
0158       iv[i] = m;
0159     }
0160 
0161 #ifdef GPU_DEBUG
0162     alpaka::syncBlockThreads(acc);
0163     //  mini verification
0164     for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0165       if (iv[i] != int(i))
0166         ALPAKA_ASSERT_ACC(iv[iv[i]] != int(i));
0167     }
0168 #endif
0169 
0170 #ifdef GPU_DEBUG
0171     // and verify that we did not spit any cluster...
0172     for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0173       auto minJ = i;
0174       auto mdist = eps;
0175       auto loop = [&](uint32_t j) {
0176         if (nn[j] < nn[i])
0177           return;
0178         if (nn[j] == nn[i] && zt[j] >= zt[i])
0179           return;  // if equal use natural order...
0180         auto dist = std::abs(zt[i] - zt[j]);
0181         if (dist > mdist)
0182           return;
0183         if (dist * dist > chi2max * (ezt2[i] + ezt2[j]))
0184           return;
0185         mdist = dist;
0186         minJ = j;
0187       };
0188       cms::alpakatools::forEachInBins(hist, izt[i], 1, loop);
0189       // should belong to the same cluster...
0190       ALPAKA_ASSERT_ACC(iv[i] == iv[minJ]);
0191       ALPAKA_ASSERT_ACC(nn[i] <= nn[iv[i]]);
0192     }
0193     alpaka::syncBlockThreads(acc);
0194 #endif
0195 
0196     auto& foundClusters = alpaka::declareSharedVar<unsigned int, __COUNTER__>(acc);
0197     foundClusters = 0;
0198     alpaka::syncBlockThreads(acc);
0199 
0200     // find the number of different clusters, identified by a tracks with clus[i] == i and density larger than threshold;
0201     // mark these tracks with a negative id.
0202     for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0203       if (iv[i] == int(i)) {
0204         if (nn[i] >= minT) {
0205           auto old = alpaka::atomicInc(acc, &foundClusters, 0xffffffff, alpaka::hierarchy::Threads{});
0206           iv[i] = -(old + 1);
0207         } else {  // noise
0208           iv[i] = -9998;
0209         }
0210       }
0211     }
0212     alpaka::syncBlockThreads(acc);
0213 
0214     ALPAKA_ASSERT_ACC(foundClusters < ::zVertex::MAXVTX);
0215 
0216     // propagate the negative id to all the tracks in the cluster.
0217     for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0218       if (iv[i] >= 0) {
0219         // mark each track in a cluster with the same id as the first one
0220         iv[i] = iv[iv[i]];
0221       }
0222     }
0223     alpaka::syncBlockThreads(acc);
0224 
0225     // adjust the cluster id to be a positive value starting from 0
0226     for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0227       iv[i] = -iv[i] - 1;
0228     }
0229 
0230     nvIntermediate = nvFinal = foundClusters;
0231     if constexpr (verbose) {
0232       if (cms::alpakatools::once_per_block(acc))
0233         printf("found %d proto vertices\n", foundClusters);
0234     }
0235   }
0236   class ClusterTracksByDensityKernel {
0237   public:
0238     template <typename TAcc>
0239     ALPAKA_FN_ACC void operator()(const TAcc& acc,
0240                                   VtxSoAView pdata,
0241                                   WsSoAView pws,
0242                                   int minT,      // min number of neighbours to be "seed"
0243                                   float eps,     // max absolute distance to cluster
0244                                   float errmax,  // max error to be "seed"
0245                                   float chi2max  // max normalized distance to cluster
0246     ) const {
0247       clusterTracksByDensity(acc, pdata, pws, minT, eps, errmax, chi2max);
0248     }
0249   };
0250 
0251 }  // namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder
0252 
0253 #endif  // RecoTracker_PixelVertexFinding_plugins_alpaka_clusterTracksByDensity_h