Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-09-13 22:52:46

0001 #ifndef RecoVertex_PixelVertexFinding_plugins_alpaka_clusterTracksDBSCAN_h
0002 #define RecoVertex_PixelVertexFinding_plugins_alpaka_clusterTracksDBSCAN_h
0003 
0004 #include <algorithm>
0005 #include <cmath>
0006 #include <cstdint>
0007 
0008 #include <alpaka/alpaka.hpp>
0009 
0010 #include "DataFormats/VertexSoA/interface/ZVertexSoA.h"
0011 #include "HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h"
0012 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0013 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0014 #include "RecoVertex/PixelVertexFinding/interface/PixelVertexWorkSpaceLayout.h"
0015 
0016 #include "vertexFinder.h"
0017 
0018 namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder {
0019 
0020   // This algo does not really scale as it works in a single block...
0021   // It should be good enough for <10K tracks we have.
0022   class ClusterTracksDBSCAN {
0023   public:
0024     ALPAKA_FN_ACC void operator()(Acc1D const& acc,
0025                                   VtxSoAView data,
0026                                   TrkSoAView trkdata,
0027                                   WsSoAView ws,
0028                                   int minT,      // min number of neighbours to be "core"
0029                                   float eps,     // max absolute distance to cluster
0030                                   float errmax,  // max error to be "seed"
0031                                   float chi2max  // max normalized distance to cluster
0032     ) const {
0033       constexpr bool verbose = false;
0034 
0035       if constexpr (verbose) {
0036         if (cms::alpakatools::once_per_block(acc))
0037           printf("params %d %f %f %f\n", minT, eps, errmax, chi2max);
0038       }
0039 
0040       auto nt = ws.ntrks();
0041       ALPAKA_ASSERT_ACC(static_cast<int>(nt) <= ws.metadata().size());
0042       ALPAKA_ASSERT_ACC(static_cast<int>(nt) <= trkdata.metadata().size());
0043 
0044       float const* __restrict__ zt = ws.zt();
0045       float const* __restrict__ ezt2 = ws.ezt2();
0046       uint8_t* __restrict__ izt = ws.izt();
0047       int32_t* __restrict__ iv = ws.iv();
0048       int32_t* __restrict__ nn = trkdata.ndof();
0049       ALPAKA_ASSERT_ACC(zt);
0050       ALPAKA_ASSERT_ACC(ezt2);
0051       ALPAKA_ASSERT_ACC(izt);
0052       ALPAKA_ASSERT_ACC(iv);
0053       ALPAKA_ASSERT_ACC(nn);
0054 
0055       using Hist = cms::alpakatools::HistoContainer<uint8_t, 256, 16000, 8, uint16_t>;
0056       auto& hist = alpaka::declareSharedVar<Hist, __COUNTER__>(acc);
0057       auto& hws = alpaka::declareSharedVar<Hist::Counter[32], __COUNTER__>(acc);
0058 
0059       for (auto j : cms::alpakatools::uniform_elements(acc, Hist::totbins())) {
0060         hist.off[j] = 0;
0061       }
0062       for (auto j : cms::alpakatools::uniform_elements(acc, 32)) {
0063         hws[j] = 0;  // used by prefix scan in hist.finalize()
0064       }
0065       alpaka::syncBlockThreads(acc);
0066 
0067       if constexpr (verbose) {
0068         if (cms::alpakatools::once_per_block(acc))
0069           printf("booked hist with %d bins, size %d for %d tracks\n", hist.nbins(), hist.capacity(), nt);
0070       }
0071 
0072       ALPAKA_ASSERT_ACC(static_cast<int>(nt) <= std::numeric_limits<Hist::index_type>::max());
0073       ALPAKA_ASSERT_ACC(static_cast<int>(nt) <= hist.capacity());
0074       ALPAKA_ASSERT_ACC(eps <= 0.1f);  // see below
0075 
0076       // fill hist (bin shall be wider than "eps")
0077       for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0078         int iz = static_cast<int>(zt[i] * 10.f);  // valid if eps <= 0.1
0079         iz = std::clamp(iz, INT8_MIN, INT8_MAX);
0080         izt[i] = iz - INT8_MIN;
0081         ALPAKA_ASSERT_ACC(iz - INT8_MIN >= 0);
0082         ALPAKA_ASSERT_ACC(iz - INT8_MIN < 256);
0083         hist.count(acc, izt[i]);
0084         iv[i] = i;
0085         nn[i] = 0;
0086       }
0087       alpaka::syncBlockThreads(acc);
0088 
0089       hist.finalize(acc, hws);
0090       alpaka::syncBlockThreads(acc);
0091 
0092       ALPAKA_ASSERT_ACC(hist.size() == nt);
0093       for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0094         hist.fill(acc, izt[i], uint16_t(i));
0095       }
0096       alpaka::syncBlockThreads(acc);
0097 
0098       // count neighbours
0099       const auto errmax2 = errmax * errmax;
0100       for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0101         if (ezt2[i] > errmax2)
0102           continue;
0103         cms::alpakatools::forEachInBins(hist, izt[i], 1, [&](uint32_t j) {
0104           if (i == j)
0105             return;
0106           auto dist = std::abs(zt[i] - zt[j]);
0107           if (dist > eps)
0108             return;
0109           nn[i]++;
0110         });
0111       }
0112       alpaka::syncBlockThreads(acc);
0113 
0114       // find NN with smaller z...
0115       for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0116         if (nn[i] < minT)
0117           continue;  // DBSCAN core rule
0118         float mz = zt[i];
0119         cms::alpakatools::forEachInBins(hist, izt[i], 1, [&](uint32_t j) {
0120           if (zt[j] >= mz)
0121             return;
0122           if (nn[j] < minT)
0123             return;  // DBSCAN core rule
0124           auto dist = std::abs(zt[i] - zt[j]);
0125           if (dist > eps)
0126             return;
0127           mz = zt[j];
0128           iv[i] = j;  // assign to cluster (better be unique??)
0129         });
0130       }
0131       alpaka::syncBlockThreads(acc);
0132 
0133 #ifdef GPU_DEBUG
0134       //  mini verification
0135       for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0136         if (iv[i] != int(i))
0137           ALPAKA_ASSERT_ACC(iv[iv[i]] != int(i));
0138       }
0139       alpaka::syncBlockThreads(acc);
0140 #endif
0141 
0142       // consolidate graph (percolate index of seed)
0143       for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0144         auto m = iv[i];
0145         while (m != iv[m])
0146           m = iv[m];
0147         iv[i] = m;
0148       }
0149       alpaka::syncBlockThreads(acc);
0150 
0151 #ifdef GPU_DEBUG
0152       //  mini verification
0153       for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0154         if (iv[i] != int(i))
0155           ALPAKA_ASSERT_ACC(iv[iv[i]] != int(i));
0156       }
0157       alpaka::syncBlockThreads(acc);
0158 #endif
0159 
0160 #ifdef GPU_DEBUG
0161       // and verify that we did not split any cluster...
0162       for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0163         if (nn[i] < minT)
0164           continue;  // DBSCAN core rule
0165         ALPAKA_ASSERT_ACC(zt[iv[i]] <= zt[i]);
0166         cms::alpakatools::forEachInBins(hist, izt[i], 1, [&](uint32_t j) {
0167           if (nn[j] < minT)
0168             return;  // DBSCAN core rule
0169           auto dist = std::abs(zt[i] - zt[j]);
0170           if (dist > eps)
0171             return;
0172           // they should belong to the same cluster, isn't it?
0173           if (iv[i] != iv[j]) {
0174             printf("ERROR %d %d %f %f %d\n", i, iv[i], zt[i], zt[iv[i]], iv[iv[i]]);
0175             printf("      %d %d %f %f %d\n", j, iv[j], zt[j], zt[iv[j]], iv[iv[j]]);
0176           }
0177           ALPAKA_ASSERT_ACC(iv[i] == iv[j]);
0178         });
0179       }
0180       alpaka::syncBlockThreads(acc);
0181 #endif
0182 
0183       // collect edges (assign to closest cluster of closest point??? here to closest point)
0184       for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0185         //    if (nn[i]==0 || nn[i]>=minT) continue;    // DBSCAN edge rule
0186         if (nn[i] >= minT)
0187           continue;  // DBSCAN edge rule
0188         float mdist = eps;
0189         cms::alpakatools::forEachInBins(hist, izt[i], 1, [&](uint32_t j) {
0190           if (nn[j] < minT)
0191             return;  // DBSCAN core rule
0192           auto dist = std::abs(zt[i] - zt[j]);
0193           if (dist > mdist)
0194             return;
0195           if (dist * dist > chi2max * (ezt2[i] + ezt2[j]))
0196             return;  // needed?
0197           mdist = dist;
0198           iv[i] = iv[j];  // assign to cluster (better be unique??)
0199         });
0200       }
0201 
0202       auto& foundClusters = alpaka::declareSharedVar<unsigned int, __COUNTER__>(acc);
0203       foundClusters = 0;
0204       alpaka::syncBlockThreads(acc);
0205 
0206       // find the number of different clusters, identified by a tracks with clus[i] == i;
0207       // mark these tracks with a negative id.
0208       for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0209         if (iv[i] == int(i)) {
0210           if (nn[i] >= minT) {
0211             auto old = alpaka::atomicInc(acc, &foundClusters, 0xffffffff, alpaka::hierarchy::Threads{});
0212             iv[i] = -(old + 1);
0213           } else {  // noise
0214             iv[i] = -9998;
0215           }
0216         }
0217       }
0218       alpaka::syncBlockThreads(acc);
0219 
0220       ALPAKA_ASSERT_ACC(static_cast<int>(foundClusters) < data.metadata().size());
0221 
0222       // propagate the negative id to all the tracks in the cluster.
0223       for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0224         if (iv[i] >= 0) {
0225           // mark each track in a cluster with the same id as the first one
0226           iv[i] = iv[iv[i]];
0227         }
0228       }
0229       alpaka::syncBlockThreads(acc);
0230 
0231       // adjust the cluster id to be a positive value starting from 0
0232       for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0233         iv[i] = -iv[i] - 1;
0234       }
0235 
0236       ws.nvIntermediate() = foundClusters;
0237       data.nvFinal() = foundClusters;
0238 
0239       if constexpr (verbose) {
0240         if (cms::alpakatools::once_per_block(acc))
0241           printf("found %d proto vertices\n", foundClusters);
0242       }
0243     }
0244   };
0245 
0246 }  // namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder
0247 
0248 #endif  // RecoVertex_PixelVertexFinding_plugins_alpaka_clusterTracksDBSCAN_h