Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-09-13 22:52:46

0001 #ifndef RecoVertex_PixelVertexFinding_plugins_alpaka_clusterTracksByDensity_h
0002 #define RecoVertex_PixelVertexFinding_plugins_alpaka_clusterTracksByDensity_h
0003 
0004 #include <algorithm>
0005 #include <cmath>
0006 #include <cstdint>
0007 
0008 #include <alpaka/alpaka.hpp>
0009 
0010 #include "DataFormats/VertexSoA/interface/ZVertexSoA.h"
0011 #include "HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h"
0012 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0013 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0014 #include "RecoVertex/PixelVertexFinding/interface/PixelVertexWorkSpaceLayout.h"
0015 
0016 #include "vertexFinder.h"
0017 
0018 namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder {
0019 
0020   // This algo does not really scale as it works in a single block...
0021   // It should be good enough for <10K tracks we have.
0022   //
0023   // Based on Rodrighez&Laio algo.
0024   ALPAKA_FN_ACC ALPAKA_FN_INLINE void clusterTracksByDensity(Acc1D const& acc,
0025                                                              VtxSoAView& data,
0026                                                              TrkSoAView& trkdata,
0027                                                              WsSoAView& ws,
0028                                                              int minT,      // min number of neighbours to be "seed"
0029                                                              float eps,     // max absolute distance to cluster
0030                                                              float errmax,  // max error to be "seed"
0031                                                              float chi2max  // max normalized distance to cluster
0032   ) {
0033     constexpr bool verbose = false;
0034 
0035     if constexpr (verbose) {
0036       if (cms::alpakatools::once_per_block(acc))
0037         printf("params %d %f %f %f\n", minT, eps, errmax, chi2max);
0038     }
0039 
0040     auto nt = ws.ntrks();
0041     ALPAKA_ASSERT_ACC(static_cast<int>(nt) <= ws.metadata().size());
0042     ALPAKA_ASSERT_ACC(static_cast<int>(nt) <= trkdata.metadata().size());
0043 
0044     float const* __restrict__ zt = ws.zt();
0045     float const* __restrict__ ezt2 = ws.ezt2();
0046     uint8_t* __restrict__ izt = ws.izt();
0047     int32_t* __restrict__ iv = ws.iv();
0048     int32_t* __restrict__ nn = trkdata.ndof();
0049     ALPAKA_ASSERT_ACC(zt);
0050     ALPAKA_ASSERT_ACC(ezt2);
0051     ALPAKA_ASSERT_ACC(izt);
0052     ALPAKA_ASSERT_ACC(iv);
0053     ALPAKA_ASSERT_ACC(nn);
0054 
0055     using Hist = cms::alpakatools::HistoContainer<uint8_t, 256, 16000, 8, uint16_t>;
0056     auto& hist = alpaka::declareSharedVar<Hist, __COUNTER__>(acc);
0057     auto& hws = alpaka::declareSharedVar<Hist::Counter[32], __COUNTER__>(acc);
0058 
0059     for (auto j : cms::alpakatools::uniform_elements(acc, Hist::totbins())) {
0060       hist.off[j] = 0;
0061     }
0062     for (auto j : cms::alpakatools::uniform_elements(acc, 32)) {
0063       hws[j] = 0;  // used by prefix scan in hist.finalize()
0064     }
0065     alpaka::syncBlockThreads(acc);
0066 
0067     if constexpr (verbose) {
0068       if (cms::alpakatools::once_per_block(acc))
0069         printf("booked hist with %d bins, size %d for %d tracks\n", hist.totbins(), hist.capacity(), nt);
0070     }
0071 
0072     ALPAKA_ASSERT_ACC(static_cast<int>(nt) <= std::numeric_limits<Hist::index_type>::max());
0073     ALPAKA_ASSERT_ACC(static_cast<int>(nt) <= hist.capacity());
0074     ALPAKA_ASSERT_ACC(eps <= 0.1f);  // see below
0075 
0076     // fill hist (bin shall be wider than "eps")
0077     for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0078       int iz = static_cast<int>(zt[i] * 10.f);  // valid if eps <= 0.1
0079       iz = std::clamp(iz, INT8_MIN, INT8_MAX);
0080       ALPAKA_ASSERT_ACC(iz - INT8_MIN >= 0);
0081       ALPAKA_ASSERT_ACC(iz - INT8_MIN < 256);
0082       izt[i] = iz - INT8_MIN;
0083       hist.count(acc, izt[i]);
0084       iv[i] = i;
0085       nn[i] = 0;
0086     }
0087     alpaka::syncBlockThreads(acc);
0088 
0089     hist.finalize(acc, hws);
0090     alpaka::syncBlockThreads(acc);
0091 
0092     ALPAKA_ASSERT_ACC(hist.size() == nt);
0093     for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0094       hist.fill(acc, izt[i], uint16_t(i));
0095     }
0096     alpaka::syncBlockThreads(acc);
0097 
0098     // count neighbours
0099     const auto errmax2 = errmax * errmax;
0100     for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0101       if (ezt2[i] > errmax2)
0102         continue;
0103       cms::alpakatools::forEachInBins(hist, izt[i], 1, [&](uint32_t j) {
0104         if (i == j)
0105           return;
0106         auto dist = std::abs(zt[i] - zt[j]);
0107         if (dist > eps)
0108           return;
0109         if (dist * dist > chi2max * (ezt2[i] + ezt2[j]))
0110           return;
0111         nn[i]++;
0112       });
0113     }
0114     alpaka::syncBlockThreads(acc);
0115 
0116     // find closest above me .... (we ignore the possibility of two j at same distance from i)
0117     for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0118       float mdist = eps;
0119       cms::alpakatools::forEachInBins(hist, izt[i], 1, [&](uint32_t j) {
0120         if (nn[j] < nn[i])
0121           return;
0122         if (nn[j] == nn[i] && zt[j] >= zt[i])
0123           return;  // if equal use natural order...
0124         auto dist = std::abs(zt[i] - zt[j]);
0125         if (dist > mdist)
0126           return;
0127         if (dist * dist > chi2max * (ezt2[i] + ezt2[j]))
0128           return;  // (break natural order???)
0129         mdist = dist;
0130         iv[i] = j;  // assign to cluster (better be unique??)
0131       });
0132     }
0133     alpaka::syncBlockThreads(acc);
0134 
0135 #ifdef GPU_DEBUG
0136     //  mini verification
0137     for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0138       if (iv[i] != int(i))
0139         ALPAKA_ASSERT_ACC(iv[iv[i]] != int(i));
0140     }
0141     alpaka::syncBlockThreads(acc);
0142 #endif
0143 
0144     // consolidate graph (percolate index of seed)
0145     for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0146       auto m = iv[i];
0147       while (m != iv[m])
0148         m = iv[m];
0149       iv[i] = m;
0150     }
0151 
0152 #ifdef GPU_DEBUG
0153     alpaka::syncBlockThreads(acc);
0154     //  mini verification
0155     for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0156       if (iv[i] != int(i))
0157         ALPAKA_ASSERT_ACC(iv[iv[i]] != int(i));
0158     }
0159 #endif
0160 
0161 #ifdef GPU_DEBUG
0162     // and verify that we did not split any cluster...
0163     for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0164       auto minJ = i;
0165       auto mdist = eps;
0166       cms::alpakatools::forEachInBins(hist, izt[i], 1, [&](uint32_t j) {
0167         if (nn[j] < nn[i])
0168           return;
0169         if (nn[j] == nn[i] && zt[j] >= zt[i])
0170           return;  // if equal use natural order...
0171         auto dist = std::abs(zt[i] - zt[j]);
0172         if (dist > mdist)
0173           return;
0174         if (dist * dist > chi2max * (ezt2[i] + ezt2[j]))
0175           return;
0176         mdist = dist;
0177         minJ = j;
0178       });
0179 
0180       // should belong to the same cluster...
0181       ALPAKA_ASSERT_ACC(iv[i] == iv[minJ]);
0182       ALPAKA_ASSERT_ACC(nn[i] <= nn[iv[i]]);
0183     }
0184     alpaka::syncBlockThreads(acc);
0185 #endif
0186 
0187     auto& foundClusters = alpaka::declareSharedVar<unsigned int, __COUNTER__>(acc);
0188     foundClusters = 0;
0189     alpaka::syncBlockThreads(acc);
0190 
0191     // find the number of different clusters, identified by a tracks with clus[i] == i and density larger than threshold;
0192     // mark these tracks with a negative id.
0193     for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0194       if (iv[i] == int(i)) {
0195         if (nn[i] >= minT) {
0196           auto old = alpaka::atomicInc(acc, &foundClusters, 0xffffffff, alpaka::hierarchy::Threads{});
0197           iv[i] = -(old + 1);
0198         } else {  // noise
0199           iv[i] = -9998;
0200         }
0201       }
0202     }
0203     alpaka::syncBlockThreads(acc);
0204 
0205     ALPAKA_ASSERT_ACC(static_cast<int>(foundClusters) < data.metadata().size());
0206 
0207     // propagate the negative id to all the tracks in the cluster.
0208     for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0209       if (iv[i] >= 0) {
0210         // mark each track in a cluster with the same id as the first one
0211         iv[i] = iv[iv[i]];
0212       }
0213     }
0214     alpaka::syncBlockThreads(acc);
0215 
0216     // adjust the cluster id to be a positive value starting from 0
0217     for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0218       iv[i] = -iv[i] - 1;
0219     }
0220 
0221     ws.nvIntermediate() = foundClusters;
0222     data.nvFinal() = foundClusters;
0223 
0224     if constexpr (verbose) {
0225       if (cms::alpakatools::once_per_block(acc))
0226         printf("found %d proto vertices\n", foundClusters);
0227     }
0228   }
0229 
0230   class ClusterTracksByDensityKernel {
0231   public:
0232     ALPAKA_FN_ACC void operator()(Acc1D const& acc,
0233                                   VtxSoAView data,
0234                                   TrkSoAView trkdata,
0235                                   WsSoAView ws,
0236                                   int minT,      // min number of neighbours to be "seed"
0237                                   float eps,     // max absolute distance to cluster
0238                                   float errmax,  // max error to be "seed"
0239                                   float chi2max  // max normalized distance to cluster
0240     ) const {
0241       clusterTracksByDensity(acc, data, trkdata, ws, minT, eps, errmax, chi2max);
0242     }
0243   };
0244 
0245 }  // namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder
0246 
0247 #endif  // RecoVertex_PixelVertexFinding_plugins_alpaka_clusterTracksByDensity_h