Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2025-04-29 02:41:34

0001 #ifndef RecoVertex_PixelVertexFinding_plugins_alpaka_clusterTracksDBSCAN_h
0002 #define RecoVertex_PixelVertexFinding_plugins_alpaka_clusterTracksDBSCAN_h
0003 
0004 #include <algorithm>
0005 #include <cmath>
0006 #include <cstdint>
0007 
0008 #include <alpaka/alpaka.hpp>
0009 
0010 #include "DataFormats/VertexSoA/interface/ZVertexSoA.h"
0011 #include "HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h"
0012 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0013 #include "HeterogeneousCore/AlpakaInterface/interface/debug.h"
0014 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0015 #include "RecoVertex/PixelVertexFinding/interface/PixelVertexWorkSpaceLayout.h"
0016 
0017 #include "vertexFinder.h"
0018 
0019 namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder {
0020 
0021   // This algo does not really scale as it works in a single block...
0022   // It should be good enough for <10K tracks we have.
0023   class ClusterTracksDBSCAN {
0024   public:
0025     ALPAKA_FN_ACC void operator()(Acc1D const& acc,
0026                                   VtxSoAView data,
0027                                   TrkSoAView trkdata,
0028                                   WsSoAView ws,
0029                                   int minT,      // min number of neighbours to be "core"
0030                                   float eps,     // max absolute distance to cluster
0031                                   float errmax,  // max error to be "seed"
0032                                   float chi2max  // max normalized distance to cluster
0033     ) const {
0034       // workaround for #47808
0035       debug::do_not_optimise(ws);
0036 
0037       constexpr bool verbose = false;
0038 
0039       if constexpr (verbose) {
0040         if (cms::alpakatools::once_per_block(acc))
0041           printf("params %d %f %f %f\n", minT, eps, errmax, chi2max);
0042       }
0043 
0044       auto nt = ws.ntrks();
0045       ALPAKA_ASSERT_ACC(static_cast<int>(nt) <= ws.metadata().size());
0046       ALPAKA_ASSERT_ACC(static_cast<int>(nt) <= trkdata.metadata().size());
0047 
0048       float const* __restrict__ zt = ws.zt();
0049       float const* __restrict__ ezt2 = ws.ezt2();
0050       uint8_t* __restrict__ izt = ws.izt();
0051       int32_t* __restrict__ iv = ws.iv();
0052       int32_t* __restrict__ nn = trkdata.ndof();
0053       ALPAKA_ASSERT_ACC(zt);
0054       ALPAKA_ASSERT_ACC(ezt2);
0055       ALPAKA_ASSERT_ACC(izt);
0056       ALPAKA_ASSERT_ACC(iv);
0057       ALPAKA_ASSERT_ACC(nn);
0058 
0059       using Hist = cms::alpakatools::HistoContainer<uint8_t, 256, 16000, 8, uint16_t>;
0060       auto& hist = alpaka::declareSharedVar<Hist, __COUNTER__>(acc);
0061       auto& hws = alpaka::declareSharedVar<Hist::Counter[32], __COUNTER__>(acc);
0062 
0063       for (auto j : cms::alpakatools::uniform_elements(acc, Hist::totbins())) {
0064         hist.off[j] = 0;
0065       }
0066       for (auto j : cms::alpakatools::uniform_elements(acc, 32)) {
0067         hws[j] = 0;  // used by prefix scan in hist.finalize()
0068       }
0069       alpaka::syncBlockThreads(acc);
0070 
0071       if constexpr (verbose) {
0072         if (cms::alpakatools::once_per_block(acc))
0073           printf("booked hist with %d bins, size %d for %d tracks\n", hist.nbins(), hist.capacity(), nt);
0074       }
0075 
0076       ALPAKA_ASSERT_ACC(static_cast<int>(nt) <= std::numeric_limits<Hist::index_type>::max());
0077       ALPAKA_ASSERT_ACC(static_cast<int>(nt) <= hist.capacity());
0078       ALPAKA_ASSERT_ACC(eps <= 0.1f);  // see below
0079 
0080       // fill hist (bin shall be wider than "eps")
0081       for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0082         int iz = static_cast<int>(zt[i] * 10.f);  // valid if eps <= 0.1
0083         // Equivalent of iz = std::clamp(iz, INT8_MIN, INT8_MAX)
0084         // which doesn't compile with gcc14 due to reference to __glibcxx_assert
0085         // See https://github.com/llvm/llvm-project/issues/95183
0086         int tmp_max = std::max<int>(iz, INT8_MIN);
0087         iz = std::min<int>(tmp_max, INT8_MAX);
0088         izt[i] = iz - INT8_MIN;
0089         ALPAKA_ASSERT_ACC(iz - INT8_MIN >= 0);
0090         ALPAKA_ASSERT_ACC(iz - INT8_MIN < 256);
0091         hist.count(acc, izt[i]);
0092         iv[i] = i;
0093         nn[i] = 0;
0094       }
0095       alpaka::syncBlockThreads(acc);
0096 
0097       hist.finalize(acc, hws);
0098       alpaka::syncBlockThreads(acc);
0099 
0100       ALPAKA_ASSERT_ACC(hist.size() == nt);
0101       for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0102         hist.fill(acc, izt[i], uint16_t(i));
0103       }
0104       alpaka::syncBlockThreads(acc);
0105 
0106       // count neighbours
0107       const auto errmax2 = errmax * errmax;
0108       for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0109         if (ezt2[i] > errmax2)
0110           continue;
0111         cms::alpakatools::forEachInBins(hist, izt[i], 1, [&](uint32_t j) {
0112           if (i == j)
0113             return;
0114           auto dist = std::abs(zt[i] - zt[j]);
0115           if (dist > eps)
0116             return;
0117           nn[i]++;
0118         });
0119       }
0120       alpaka::syncBlockThreads(acc);
0121 
0122       // find NN with smaller z...
0123       for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0124         if (nn[i] < minT)
0125           continue;  // DBSCAN core rule
0126         float mz = zt[i];
0127         cms::alpakatools::forEachInBins(hist, izt[i], 1, [&](uint32_t j) {
0128           if (zt[j] >= mz)
0129             return;
0130           if (nn[j] < minT)
0131             return;  // DBSCAN core rule
0132           auto dist = std::abs(zt[i] - zt[j]);
0133           if (dist > eps)
0134             return;
0135           mz = zt[j];
0136           iv[i] = j;  // assign to cluster (better be unique??)
0137         });
0138       }
0139       alpaka::syncBlockThreads(acc);
0140 
0141 #ifdef GPU_DEBUG
0142       //  mini verification
0143       for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0144         if (iv[i] != int(i))
0145           ALPAKA_ASSERT_ACC(iv[iv[i]] != int(i));
0146       }
0147       alpaka::syncBlockThreads(acc);
0148 #endif
0149 
0150       // consolidate graph (percolate index of seed)
0151       for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0152         auto m = iv[i];
0153         while (m != iv[m])
0154           m = iv[m];
0155         iv[i] = m;
0156       }
0157       alpaka::syncBlockThreads(acc);
0158 
0159 #ifdef GPU_DEBUG
0160       //  mini verification
0161       for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0162         if (iv[i] != int(i))
0163           ALPAKA_ASSERT_ACC(iv[iv[i]] != int(i));
0164       }
0165       alpaka::syncBlockThreads(acc);
0166 #endif
0167 
0168 #ifdef GPU_DEBUG
0169       // and verify that we did not split any cluster...
0170       for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0171         if (nn[i] < minT)
0172           continue;  // DBSCAN core rule
0173         ALPAKA_ASSERT_ACC(zt[iv[i]] <= zt[i]);
0174         cms::alpakatools::forEachInBins(hist, izt[i], 1, [&](uint32_t j) {
0175           if (nn[j] < minT)
0176             return;  // DBSCAN core rule
0177           auto dist = std::abs(zt[i] - zt[j]);
0178           if (dist > eps)
0179             return;
0180           // they should belong to the same cluster, isn't it?
0181           if (iv[i] != iv[j]) {
0182             printf("ERROR %d %d %f %f %d\n", i, iv[i], zt[i], zt[iv[i]], iv[iv[i]]);
0183             printf("      %d %d %f %f %d\n", j, iv[j], zt[j], zt[iv[j]], iv[iv[j]]);
0184           }
0185           ALPAKA_ASSERT_ACC(iv[i] == iv[j]);
0186         });
0187       }
0188       alpaka::syncBlockThreads(acc);
0189 #endif
0190 
0191       // collect edges (assign to closest cluster of closest point??? here to closest point)
0192       for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0193         //    if (nn[i]==0 || nn[i]>=minT) continue;    // DBSCAN edge rule
0194         if (nn[i] >= minT)
0195           continue;  // DBSCAN edge rule
0196         float mdist = eps;
0197         cms::alpakatools::forEachInBins(hist, izt[i], 1, [&](uint32_t j) {
0198           if (nn[j] < minT)
0199             return;  // DBSCAN core rule
0200           auto dist = std::abs(zt[i] - zt[j]);
0201           if (dist > mdist)
0202             return;
0203           if (dist * dist > chi2max * (ezt2[i] + ezt2[j]))
0204             return;  // needed?
0205           mdist = dist;
0206           iv[i] = iv[j];  // assign to cluster (better be unique??)
0207         });
0208       }
0209 
0210       auto& foundClusters = alpaka::declareSharedVar<unsigned int, __COUNTER__>(acc);
0211       foundClusters = 0;
0212       alpaka::syncBlockThreads(acc);
0213 
0214       // find the number of different clusters, identified by a tracks with clus[i] == i;
0215       // mark these tracks with a negative id.
0216       for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0217         if (iv[i] == int(i)) {
0218           if (nn[i] >= minT) {
0219             auto old = alpaka::atomicInc(acc, &foundClusters, 0xffffffff, alpaka::hierarchy::Threads{});
0220             iv[i] = -(old + 1);
0221           } else {  // noise
0222             iv[i] = -9998;
0223           }
0224         }
0225       }
0226       alpaka::syncBlockThreads(acc);
0227 
0228       ALPAKA_ASSERT_ACC(static_cast<int>(foundClusters) < data.metadata().size());
0229 
0230       // propagate the negative id to all the tracks in the cluster.
0231       for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0232         if (iv[i] >= 0) {
0233           // mark each track in a cluster with the same id as the first one
0234           iv[i] = iv[iv[i]];
0235         }
0236       }
0237       alpaka::syncBlockThreads(acc);
0238 
0239       // adjust the cluster id to be a positive value starting from 0
0240       for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0241         iv[i] = -iv[i] - 1;
0242       }
0243 
0244       ws.nvIntermediate() = foundClusters;
0245       data.nvFinal() = foundClusters;
0246 
0247       if constexpr (verbose) {
0248         if (cms::alpakatools::once_per_block(acc))
0249           printf("found %d proto vertices\n", foundClusters);
0250       }
0251     }
0252   };
0253 
0254 }  // namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder
0255 
0256 #endif  // RecoVertex_PixelVertexFinding_plugins_alpaka_clusterTracksDBSCAN_h