Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2025-02-20 03:45:53

0001 #ifndef RecoVertex_PixelVertexFinding_clusterTracksIterativeAlpaka_h
0002 #define RecoVertex_PixelVertexFinding_clusterTracksIterativeAlpaka_h
0003 
0004 #include <algorithm>
0005 #include <cmath>
0006 #include <cstdint>
0007 
0008 #include <alpaka/alpaka.hpp>
0009 
0010 #include "DataFormats/VertexSoA/interface/ZVertexSoA.h"
0011 #include "HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h"
0012 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0013 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0014 #include "RecoVertex/PixelVertexFinding/interface/PixelVertexWorkSpaceLayout.h"
0015 
0016 #include "vertexFinder.h"
0017 
0018 namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder {
0019 
0020   // This algo does not really scale as it works in a single block...
0021   // It should be good enough for <10K tracks we have.
0022   class ClusterTracksIterative {
0023   public:
0024     ALPAKA_FN_ACC void operator()(Acc1D const& acc,
0025                                   VtxSoAView data,
0026                                   TrkSoAView trkdata,
0027                                   WsSoAView ws,
0028                                   int minT,      // min number of neighbours to be "core"
0029                                   float eps,     // max absolute distance to cluster
0030                                   float errmax,  // max error to be "seed"
0031                                   float chi2max  // max normalized distance to cluster
0032     ) const {
0033       constexpr bool verbose = false;
0034 
0035       if constexpr (verbose) {
0036         if (cms::alpakatools::once_per_block(acc))
0037           printf("params %d %f %f %f\n", minT, eps, errmax, chi2max);
0038       }
0039 
0040       auto nt = ws.ntrks();
0041       ALPAKA_ASSERT_ACC(static_cast<int>(nt) <= ws.metadata().size());
0042       ALPAKA_ASSERT_ACC(static_cast<int>(nt) <= trkdata.metadata().size());
0043 
0044       float const* __restrict__ zt = ws.zt();
0045       float const* __restrict__ ezt2 = ws.ezt2();
0046       uint8_t* __restrict__ izt = ws.izt();
0047       int32_t* __restrict__ iv = ws.iv();
0048       int32_t* __restrict__ nn = trkdata.ndof();
0049       ALPAKA_ASSERT_ACC(zt);
0050       ALPAKA_ASSERT_ACC(ezt2);
0051       ALPAKA_ASSERT_ACC(izt);
0052       ALPAKA_ASSERT_ACC(iv);
0053       ALPAKA_ASSERT_ACC(nn);
0054 
0055       using Hist = cms::alpakatools::HistoContainer<uint8_t, 256, 16000, 8, uint16_t>;
0056       auto& hist = alpaka::declareSharedVar<Hist, __COUNTER__>(acc);
0057       auto& hws = alpaka::declareSharedVar<Hist::Counter[32], __COUNTER__>(acc);
0058 
0059       for (auto j : cms::alpakatools::uniform_elements(acc, Hist::totbins())) {
0060         hist.off[j] = 0;
0061       }
0062       for (auto j : cms::alpakatools::uniform_elements(acc, 32)) {
0063         hws[j] = 0;  // used by prefix scan in hist.finalize()
0064       }
0065       alpaka::syncBlockThreads(acc);
0066 
0067       if constexpr (verbose) {
0068         if (cms::alpakatools::once_per_block(acc))
0069           printf("booked hist with %d bins, size %d for %d tracks\n", hist.nbins(), hist.capacity(), nt);
0070       }
0071 
0072       ALPAKA_ASSERT_ACC(static_cast<int>(nt) <= std::numeric_limits<Hist::index_type>::max());
0073       ALPAKA_ASSERT_ACC(static_cast<int>(nt) <= hist.capacity());
0074       ALPAKA_ASSERT_ACC(eps <= 0.1f);  // see below
0075 
0076       // fill hist (bin shall be wider than "eps")
0077       for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0078         int iz = static_cast<int>(zt[i] * 10.f);  // valid if eps <= 0.1
0079         // Equivalent of iz = std::clamp(iz, INT8_MIN, INT8_MAX)
0080         // which doesn't compile with gcc14 due to reference to __glibcxx_assert
0081         // See https://github.com/llvm/llvm-project/issues/95183
0082         int tmp_max = std::max<int>(iz, INT8_MIN);
0083         iz = std::min<int>(tmp_max, INT8_MAX);
0084         izt[i] = iz - INT8_MIN;
0085         ALPAKA_ASSERT_ACC(iz - INT8_MIN >= 0);
0086         ALPAKA_ASSERT_ACC(iz - INT8_MIN < 256);
0087         hist.count(acc, izt[i]);
0088         iv[i] = i;
0089         nn[i] = 0;
0090       }
0091       alpaka::syncBlockThreads(acc);
0092 
0093       hist.finalize(acc, hws);
0094       alpaka::syncBlockThreads(acc);
0095 
0096       ALPAKA_ASSERT_ACC(hist.size() == nt);
0097       for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0098         hist.fill(acc, izt[i], uint16_t(i));
0099       }
0100       alpaka::syncBlockThreads(acc);
0101 
0102       // count neighbours
0103       const auto errmax2 = errmax * errmax;
0104       for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0105         if (ezt2[i] > errmax2)
0106           continue;
0107         cms::alpakatools::forEachInBins(hist, izt[i], 1, [&](uint32_t j) {
0108           if (i == j)
0109             return;
0110           auto dist = std::abs(zt[i] - zt[j]);
0111           if (dist > eps)
0112             return;
0113           if (dist * dist > chi2max * (ezt2[i] + ezt2[j]))
0114             return;
0115           nn[i]++;
0116         });
0117       }
0118 
0119       auto& nloops = alpaka::declareSharedVar<int, __COUNTER__>(acc);
0120       nloops = 0;
0121       alpaka::syncBlockThreads(acc);
0122 
0123       // cluster seeds only
0124       bool more = true;
0125       while (alpaka::syncBlockThreadsPredicate<alpaka::BlockOr>(acc, more)) {
0126         if (1 == nloops % 2) {
0127           for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0128             auto m = iv[i];
0129             while (m != iv[m])
0130               m = iv[m];
0131             iv[i] = m;
0132           }
0133         } else {
0134           more = false;
0135           for (auto k : cms::alpakatools::uniform_elements(acc, hist.size())) {
0136             auto p = hist.begin() + k;
0137             auto i = *p;
0138             ++p;
0139             if (nn[i] < minT)
0140               continue;  // DBSCAN core rule
0141             auto be = std::min(Hist::bin(izt[i]) + 1, int(hist.nbins() - 1));
0142             for (; p < hist.end(be); ++p) {
0143               uint32_t j = *p;
0144               ALPAKA_ASSERT_ACC(i != j);
0145               if (nn[j] < minT)
0146                 continue;  // DBSCAN core rule
0147               auto dist = std::abs(zt[i] - zt[j]);
0148               if (dist > eps)
0149                 continue;
0150               if (dist * dist > chi2max * (ezt2[i] + ezt2[j]))
0151                 continue;
0152               auto old = alpaka::atomicMin(acc, &iv[j], iv[i], alpaka::hierarchy::Threads{});
0153               if (old != iv[i]) {
0154                 // end the loop only if no changes were applied
0155                 more = true;
0156               }
0157               alpaka::atomicMin(acc, &iv[i], old, alpaka::hierarchy::Threads{});
0158             }  // inner loop on p (and j)
0159           }  // outer loop on k (and i)
0160         }
0161         if (cms::alpakatools::once_per_block(acc))
0162           ++nloops;
0163       }  // while
0164 
0165       // collect edges (assign to closest cluster of closest point??? here to closest point)
0166       for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0167         //    if (nn[i]==0 || nn[i]>=minT) continue;    // DBSCAN edge rule
0168         if (nn[i] >= minT)
0169           continue;  // DBSCAN edge rule
0170         float mdist = eps;
0171         cms::alpakatools::forEachInBins(hist, izt[i], 1, [&](int j) {
0172           if (nn[j] < minT)
0173             return;  // DBSCAN core rule
0174           auto dist = std::abs(zt[i] - zt[j]);
0175           if (dist > mdist)
0176             return;
0177           if (dist * dist > chi2max * (ezt2[i] + ezt2[j]))
0178             return;  // needed?
0179           mdist = dist;
0180           iv[i] = iv[j];  // assign to cluster (better be unique??)
0181         });
0182       }
0183 
0184       auto& foundClusters = alpaka::declareSharedVar<unsigned int, __COUNTER__>(acc);
0185       foundClusters = 0;
0186       alpaka::syncBlockThreads(acc);
0187 
0188       // find the number of different clusters, identified by a tracks with clus[i] == i;
0189       // mark these tracks with a negative id.
0190       for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0191         if (iv[i] == int(i)) {
0192           if (nn[i] >= minT) {
0193             auto old = alpaka::atomicInc(acc, &foundClusters, 0xffffffff, alpaka::hierarchy::Threads{});
0194             iv[i] = -(old + 1);
0195           } else {  // noise
0196             iv[i] = -9998;
0197           }
0198         }
0199       }
0200       alpaka::syncBlockThreads(acc);
0201 
0202       ALPAKA_ASSERT_ACC(static_cast<int>(foundClusters) < data.metadata().size());
0203 
0204       // propagate the negative id to all the tracks in the cluster.
0205       for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0206         if (iv[i] >= 0) {
0207           // mark each track in a cluster with the same id as the first one
0208           iv[i] = iv[iv[i]];
0209         }
0210       }
0211       alpaka::syncBlockThreads(acc);
0212 
0213       // adjust the cluster id to be a positive value starting from 0
0214       for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0215         iv[i] = -iv[i] - 1;
0216       }
0217 
0218       ws.nvIntermediate() = foundClusters;
0219       data.nvFinal() = foundClusters;
0220 
0221       if constexpr (verbose) {
0222         if (cms::alpakatools::once_per_block(acc))
0223           printf("found %d proto vertices\n", foundClusters);
0224       }
0225     }
0226   };
0227 
0228 }  // namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder
0229 
0230 #endif  // RecoTracker_PixelVertexFinding_plugins_clusterTracksIterativeAlpaka_h