Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:28:40

0001 #ifndef RecoTracker_PixelVertexFinding_plugins_alpaka_clusterTracksDBSCAN_h
0002 #define RecoTracker_PixelVertexFinding_plugins_alpaka_clusterTracksDBSCAN_h
0003 
0004 #include <algorithm>
0005 #include <cmath>
0006 #include <cstdint>
0007 
0008 #include <alpaka/alpaka.hpp>
0009 
0010 #include "DataFormats/VertexSoA/interface/ZVertexSoA.h"
0011 #include "HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h"
0012 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0013 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0014 #include "RecoTracker/PixelVertexFinding/interface/PixelVertexWorkSpaceLayout.h"
0015 
0016 #include "vertexFinder.h"
0017 
0018 namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder {
0019 
0020   using VtxSoAView = ::reco::ZVertexSoAView;
0021   using WsSoAView = ::vertexFinder::PixelVertexWorkSpaceSoAView;
0022   // this algo does not really scale as it works in a single block...
0023   // enough for <10K tracks we have
0024   class ClusterTracksDBSCAN {
0025   public:
0026     template <typename TAcc>
0027     ALPAKA_FN_ACC void operator()(const TAcc& acc,
0028                                   VtxSoAView pdata,
0029                                   WsSoAView pws,
0030                                   int minT,      // min number of neighbours to be "core"
0031                                   float eps,     // max absolute distance to cluster
0032                                   float errmax,  // max error to be "seed"
0033                                   float chi2max  // max normalized distance to cluster
0034     ) const {
0035       constexpr bool verbose = false;  // in principle the compiler should optmize out if false
0036       const uint32_t threadIdxLocal(alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]);
0037       if constexpr (verbose) {
0038         if (cms::alpakatools::once_per_block(acc))
0039           printf("params %d %f %f %f\n", minT, eps, errmax, chi2max);
0040       }
0041       auto er2mx = errmax * errmax;
0042 
0043       auto& __restrict__ data = pdata;
0044       auto& __restrict__ ws = pws;
0045       auto nt = ws.ntrks();
0046       float const* __restrict__ zt = ws.zt();
0047       float const* __restrict__ ezt2 = ws.ezt2();
0048 
0049       uint32_t& nvFinal = data.nvFinal();
0050       uint32_t& nvIntermediate = ws.nvIntermediate();
0051 
0052       uint8_t* __restrict__ izt = ws.izt();
0053       int32_t* __restrict__ nn = data.ndof();
0054       int32_t* __restrict__ iv = ws.iv();
0055 
0056       ALPAKA_ASSERT_ACC(zt);
0057       ALPAKA_ASSERT_ACC(iv);
0058       ALPAKA_ASSERT_ACC(nn);
0059       ALPAKA_ASSERT_ACC(ezt2);
0060 
0061       using Hist = cms::alpakatools::HistoContainer<uint8_t, 256, 16000, 8, uint16_t>;
0062       auto& hist = alpaka::declareSharedVar<Hist, __COUNTER__>(acc);
0063       auto& hws = alpaka::declareSharedVar<Hist::Counter[32], __COUNTER__>(acc);
0064 
0065       for (auto j : cms::alpakatools::uniform_elements(acc, Hist::totbins())) {
0066         hist.off[j] = 0;
0067       }
0068       alpaka::syncBlockThreads(acc);
0069 
0070       if constexpr (verbose) {
0071         if (cms::alpakatools::once_per_block(acc))
0072           printf("booked hist with %d bins, size %d for %d tracks\n", hist.nbins(), hist.capacity(), nt);
0073       }
0074 
0075       ALPAKA_ASSERT_ACC(static_cast<int>(nt) <= hist.capacity());
0076 
0077       // fill hist  (bin shall be wider than "eps")
0078       for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0079         ALPAKA_ASSERT_ACC(i < ::zVertex::MAXTRACKS);
0080         int iz = int(zt[i] * 10.);  // valid if eps<=0.1
0081         iz = std::clamp(iz, INT8_MIN, INT8_MAX);
0082         izt[i] = iz - INT8_MIN;
0083         ALPAKA_ASSERT_ACC(iz - INT8_MIN >= 0);
0084         ALPAKA_ASSERT_ACC(iz - INT8_MIN < 256);
0085         hist.count(acc, izt[i]);
0086         iv[i] = i;
0087         nn[i] = 0;
0088       }
0089       alpaka::syncBlockThreads(acc);
0090       if (threadIdxLocal < 32)
0091         hws[threadIdxLocal] = 0;  // used by prefix scan...
0092       alpaka::syncBlockThreads(acc);
0093       hist.finalize(acc, hws);
0094       alpaka::syncBlockThreads(acc);
0095       ALPAKA_ASSERT_ACC(hist.size() == nt);
0096       for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0097         hist.fill(acc, izt[i], uint32_t(i));
0098       }
0099       alpaka::syncBlockThreads(acc);
0100 
0101       // count neighbours
0102       for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0103         if (ezt2[i] > er2mx)
0104           continue;
0105         auto loop = [&](uint32_t j) {
0106           if (i == j)
0107             return;
0108           auto dist = std::abs(zt[i] - zt[j]);
0109           if (dist > eps)
0110             return;
0111           //        if (dist*dist>chi2max*(ezt2[i]+ezt2[j])) return;
0112           nn[i]++;
0113         };
0114 
0115         cms::alpakatools::forEachInBins(hist, izt[i], 1, loop);
0116       }
0117 
0118       alpaka::syncBlockThreads(acc);
0119 
0120       // find NN with smaller z...
0121       for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0122         if (nn[i] < minT)
0123           continue;  // DBSCAN core rule
0124         float mz = zt[i];
0125         auto loop = [&](uint32_t j) {
0126           if (zt[j] >= mz)
0127             return;
0128           if (nn[j] < minT)
0129             return;  // DBSCAN core rule
0130           auto dist = std::abs(zt[i] - zt[j]);
0131           if (dist > eps)
0132             return;
0133           //        if (dist*dist>chi2max*(ezt2[i]+ezt2[j])) return;
0134           mz = zt[j];
0135           iv[i] = j;  // assign to cluster (better be unique??)
0136         };
0137         cms::alpakatools::forEachInBins(hist, izt[i], 1, loop);
0138       }
0139 
0140       alpaka::syncBlockThreads(acc);
0141 
0142 #ifdef GPU_DEBUG
0143       //  mini verification
0144       for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0145         if (iv[i] != int(i))
0146           ALPAKA_ASSERT_ACC(iv[iv[i]] != int(i));
0147       }
0148       alpaka::syncBlockThreads(acc);
0149 #endif
0150 
0151       // consolidate graph (percolate index of seed)
0152       for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0153         auto m = iv[i];
0154         while (m != iv[m])
0155           m = iv[m];
0156         iv[i] = m;
0157       }
0158 
0159       alpaka::syncBlockThreads(acc);
0160 
0161 #ifdef GPU_DEBUG
0162       //  mini verification
0163       for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0164         if (iv[i] != int(i))
0165           ALPAKA_ASSERT_ACC(iv[iv[i]] != int(i));
0166       }
0167       alpaka::syncBlockThreads(acc);
0168 #endif
0169 
0170 #ifdef GPU_DEBUG
0171       // and verify that we did not spit any cluster...
0172       for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0173         if (nn[i] < minT)
0174           continue;  // DBSCAN core rule
0175         ALPAKA_ASSERT_ACC(zt[iv[i]] <= zt[i]);
0176         auto loop = [&](uint32_t j) {
0177           if (nn[j] < minT)
0178             return;  // DBSCAN core rule
0179           auto dist = std::abs(zt[i] - zt[j]);
0180           if (dist > eps)
0181             return;
0182           //  if (dist*dist>chi2max*(ezt2[i]+ezt2[j])) return;
0183           // they should belong to the same cluster, isn't it?
0184           if (iv[i] != iv[j]) {
0185             printf("ERROR %d %d %f %f %d\n", i, iv[i], zt[i], zt[iv[i]], iv[iv[i]]);
0186             printf("      %d %d %f %f %d\n", j, iv[j], zt[j], zt[iv[j]], iv[iv[j]]);
0187             ;
0188           }
0189           ALPAKA_ASSERT_ACC(iv[i] == iv[j]);
0190         };
0191         cms::alpakatools::forEachInBins(hist, izt[i], 1, loop);
0192       }
0193       alpaka::syncBlockThreads(acc);
0194 #endif
0195 
0196       // collect edges (assign to closest cluster of closest point??? here to closest point)
0197       for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0198         //    if (nn[i]==0 || nn[i]>=minT) continue;    // DBSCAN edge rule
0199         if (nn[i] >= minT)
0200           continue;  // DBSCAN edge rule
0201         float mdist = eps;
0202         auto loop = [&](uint32_t j) {
0203           if (nn[j] < minT)
0204             return;  // DBSCAN core rule
0205           auto dist = std::abs(zt[i] - zt[j]);
0206           if (dist > mdist)
0207             return;
0208           if (dist * dist > chi2max * (ezt2[i] + ezt2[j]))
0209             return;  // needed?
0210           mdist = dist;
0211           iv[i] = iv[j];  // assign to cluster (better be unique??)
0212         };
0213         cms::alpakatools::forEachInBins(hist, izt[i], 1, loop);
0214       }
0215 
0216       auto& foundClusters = alpaka::declareSharedVar<unsigned int, __COUNTER__>(acc);
0217       foundClusters = 0;
0218       alpaka::syncBlockThreads(acc);
0219 
0220       // find the number of different clusters, identified by a tracks with clus[i] == i;
0221       // mark these tracks with a negative id.
0222       for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0223         if (iv[i] == int(i)) {
0224           if (nn[i] >= minT) {
0225             auto old = alpaka::atomicInc(acc, &foundClusters, 0xffffffff, alpaka::hierarchy::Threads{});
0226             iv[i] = -(old + 1);
0227           } else {  // noise
0228             iv[i] = -9998;
0229           }
0230         }
0231       }
0232       alpaka::syncBlockThreads(acc);
0233 
0234       ALPAKA_ASSERT_ACC(foundClusters < ::zVertex::MAXVTX);
0235 
0236       // propagate the negative id to all the tracks in the cluster.
0237       for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0238         if (iv[i] >= 0) {
0239           // mark each track in a cluster with the same id as the first one
0240           iv[i] = iv[iv[i]];
0241         }
0242       }
0243       alpaka::syncBlockThreads(acc);
0244 
0245       // adjust the cluster id to be a positive value starting from 0
0246       for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0247         iv[i] = -iv[i] - 1;
0248       }
0249 
0250       nvIntermediate = nvFinal = foundClusters;
0251 
0252       if constexpr (verbose) {
0253         if (cms::alpakatools::once_per_block(acc))
0254           printf("found %d proto vertices\n", foundClusters);
0255       }
0256     }
0257   };
0258 
0259 }  // namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder
0260 
0261 #endif  // RecoTracker_PixelVertexFinding_plugins_alpaka_clusterTracksDBSCAN_h