Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-09-13 22:52:46

0001 #ifndef RecoVertex_PixelVertexFinding_clusterTracksIterativeAlpaka_h
0002 #define RecoVertex_PixelVertexFinding_clusterTracksIterativeAlpaka_h
0003 
0004 #include <algorithm>
0005 #include <cmath>
0006 #include <cstdint>
0007 
0008 #include <alpaka/alpaka.hpp>
0009 
0010 #include "DataFormats/VertexSoA/interface/ZVertexSoA.h"
0011 #include "HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h"
0012 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0013 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0014 #include "RecoVertex/PixelVertexFinding/interface/PixelVertexWorkSpaceLayout.h"
0015 
0016 #include "vertexFinder.h"
0017 
0018 namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder {
0019 
0020   // This algo does not really scale as it works in a single block...
0021   // It should be good enough for <10K tracks we have.
0022   class ClusterTracksIterative {
0023   public:
0024     ALPAKA_FN_ACC void operator()(Acc1D const& acc,
0025                                   VtxSoAView data,
0026                                   TrkSoAView trkdata,
0027                                   WsSoAView ws,
0028                                   int minT,      // min number of neighbours to be "core"
0029                                   float eps,     // max absolute distance to cluster
0030                                   float errmax,  // max error to be "seed"
0031                                   float chi2max  // max normalized distance to cluster
0032     ) const {
0033       constexpr bool verbose = false;
0034 
0035       if constexpr (verbose) {
0036         if (cms::alpakatools::once_per_block(acc))
0037           printf("params %d %f %f %f\n", minT, eps, errmax, chi2max);
0038       }
0039 
0040       auto nt = ws.ntrks();
0041       ALPAKA_ASSERT_ACC(static_cast<int>(nt) <= ws.metadata().size());
0042       ALPAKA_ASSERT_ACC(static_cast<int>(nt) <= trkdata.metadata().size());
0043 
0044       float const* __restrict__ zt = ws.zt();
0045       float const* __restrict__ ezt2 = ws.ezt2();
0046       uint8_t* __restrict__ izt = ws.izt();
0047       int32_t* __restrict__ iv = ws.iv();
0048       int32_t* __restrict__ nn = trkdata.ndof();
0049       ALPAKA_ASSERT_ACC(zt);
0050       ALPAKA_ASSERT_ACC(ezt2);
0051       ALPAKA_ASSERT_ACC(izt);
0052       ALPAKA_ASSERT_ACC(iv);
0053       ALPAKA_ASSERT_ACC(nn);
0054 
0055       using Hist = cms::alpakatools::HistoContainer<uint8_t, 256, 16000, 8, uint16_t>;
0056       auto& hist = alpaka::declareSharedVar<Hist, __COUNTER__>(acc);
0057       auto& hws = alpaka::declareSharedVar<Hist::Counter[32], __COUNTER__>(acc);
0058 
0059       for (auto j : cms::alpakatools::uniform_elements(acc, Hist::totbins())) {
0060         hist.off[j] = 0;
0061       }
0062       for (auto j : cms::alpakatools::uniform_elements(acc, 32)) {
0063         hws[j] = 0;  // used by prefix scan in hist.finalize()
0064       }
0065       alpaka::syncBlockThreads(acc);
0066 
0067       if constexpr (verbose) {
0068         if (cms::alpakatools::once_per_block(acc))
0069           printf("booked hist with %d bins, size %d for %d tracks\n", hist.nbins(), hist.capacity(), nt);
0070       }
0071 
0072       ALPAKA_ASSERT_ACC(static_cast<int>(nt) <= std::numeric_limits<Hist::index_type>::max());
0073       ALPAKA_ASSERT_ACC(static_cast<int>(nt) <= hist.capacity());
0074       ALPAKA_ASSERT_ACC(eps <= 0.1f);  // see below
0075 
0076       // fill hist (bin shall be wider than "eps")
0077       for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0078         int iz = static_cast<int>(zt[i] * 10.f);  // valid if eps <= 0.1
0079         iz = std::clamp(iz, INT8_MIN, INT8_MAX);
0080         izt[i] = iz - INT8_MIN;
0081         ALPAKA_ASSERT_ACC(iz - INT8_MIN >= 0);
0082         ALPAKA_ASSERT_ACC(iz - INT8_MIN < 256);
0083         hist.count(acc, izt[i]);
0084         iv[i] = i;
0085         nn[i] = 0;
0086       }
0087       alpaka::syncBlockThreads(acc);
0088 
0089       hist.finalize(acc, hws);
0090       alpaka::syncBlockThreads(acc);
0091 
0092       ALPAKA_ASSERT_ACC(hist.size() == nt);
0093       for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0094         hist.fill(acc, izt[i], uint16_t(i));
0095       }
0096       alpaka::syncBlockThreads(acc);
0097 
0098       // count neighbours
0099       const auto errmax2 = errmax * errmax;
0100       for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0101         if (ezt2[i] > errmax2)
0102           continue;
0103         cms::alpakatools::forEachInBins(hist, izt[i], 1, [&](uint32_t j) {
0104           if (i == j)
0105             return;
0106           auto dist = std::abs(zt[i] - zt[j]);
0107           if (dist > eps)
0108             return;
0109           if (dist * dist > chi2max * (ezt2[i] + ezt2[j]))
0110             return;
0111           nn[i]++;
0112         });
0113       }
0114 
0115       auto& nloops = alpaka::declareSharedVar<int, __COUNTER__>(acc);
0116       nloops = 0;
0117       alpaka::syncBlockThreads(acc);
0118 
0119       // cluster seeds only
0120       bool more = true;
0121       while (alpaka::syncBlockThreadsPredicate<alpaka::BlockOr>(acc, more)) {
0122         if (1 == nloops % 2) {
0123           for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0124             auto m = iv[i];
0125             while (m != iv[m])
0126               m = iv[m];
0127             iv[i] = m;
0128           }
0129         } else {
0130           more = false;
0131           for (auto k : cms::alpakatools::uniform_elements(acc, hist.size())) {
0132             auto p = hist.begin() + k;
0133             auto i = *p;
0134             ++p;
0135             if (nn[i] < minT)
0136               continue;  // DBSCAN core rule
0137             auto be = std::min(Hist::bin(izt[i]) + 1, int(hist.nbins() - 1));
0138             for (; p < hist.end(be); ++p) {
0139               uint32_t j = *p;
0140               ALPAKA_ASSERT_ACC(i != j);
0141               if (nn[j] < minT)
0142                 continue;  // DBSCAN core rule
0143               auto dist = std::abs(zt[i] - zt[j]);
0144               if (dist > eps)
0145                 continue;
0146               if (dist * dist > chi2max * (ezt2[i] + ezt2[j]))
0147                 continue;
0148               auto old = alpaka::atomicMin(acc, &iv[j], iv[i], alpaka::hierarchy::Threads{});
0149               if (old != iv[i]) {
0150                 // end the loop only if no changes were applied
0151                 more = true;
0152               }
0153               alpaka::atomicMin(acc, &iv[i], old, alpaka::hierarchy::Threads{});
0154             }  // inner loop on p (and j)
0155           }  // outer loop on k (and i)
0156         }
0157         if (cms::alpakatools::once_per_block(acc))
0158           ++nloops;
0159       }  // while
0160 
0161       // collect edges (assign to closest cluster of closest point??? here to closest point)
0162       for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0163         //    if (nn[i]==0 || nn[i]>=minT) continue;    // DBSCAN edge rule
0164         if (nn[i] >= minT)
0165           continue;  // DBSCAN edge rule
0166         float mdist = eps;
0167         cms::alpakatools::forEachInBins(hist, izt[i], 1, [&](int j) {
0168           if (nn[j] < minT)
0169             return;  // DBSCAN core rule
0170           auto dist = std::abs(zt[i] - zt[j]);
0171           if (dist > mdist)
0172             return;
0173           if (dist * dist > chi2max * (ezt2[i] + ezt2[j]))
0174             return;  // needed?
0175           mdist = dist;
0176           iv[i] = iv[j];  // assign to cluster (better be unique??)
0177         });
0178       }
0179 
0180       auto& foundClusters = alpaka::declareSharedVar<unsigned int, __COUNTER__>(acc);
0181       foundClusters = 0;
0182       alpaka::syncBlockThreads(acc);
0183 
0184       // find the number of different clusters, identified by a tracks with clus[i] == i;
0185       // mark these tracks with a negative id.
0186       for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0187         if (iv[i] == int(i)) {
0188           if (nn[i] >= minT) {
0189             auto old = alpaka::atomicInc(acc, &foundClusters, 0xffffffff, alpaka::hierarchy::Threads{});
0190             iv[i] = -(old + 1);
0191           } else {  // noise
0192             iv[i] = -9998;
0193           }
0194         }
0195       }
0196       alpaka::syncBlockThreads(acc);
0197 
0198       ALPAKA_ASSERT_ACC(static_cast<int>(foundClusters) < data.metadata().size());
0199 
0200       // propagate the negative id to all the tracks in the cluster.
0201       for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0202         if (iv[i] >= 0) {
0203           // mark each track in a cluster with the same id as the first one
0204           iv[i] = iv[iv[i]];
0205         }
0206       }
0207       alpaka::syncBlockThreads(acc);
0208 
0209       // adjust the cluster id to be a positive value starting from 0
0210       for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0211         iv[i] = -iv[i] - 1;
0212       }
0213 
0214       ws.nvIntermediate() = foundClusters;
0215       data.nvFinal() = foundClusters;
0216 
0217       if constexpr (verbose) {
0218         if (cms::alpakatools::once_per_block(acc))
0219           printf("found %d proto vertices\n", foundClusters);
0220       }
0221     }
0222   };
0223 
0224 }  // namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder
0225 
0226 #endif  // RecoTracker_PixelVertexFinding_plugins_clusterTracksIterativeAlpaka_h