File indexing completed on 2024-04-06 12:28:40
0001 #ifndef RecoTracker_PixelVertexFinding_plugins_alpaka_clusterTracksByDensity_h
0002 #define RecoTracker_PixelVertexFinding_plugins_alpaka_clusterTracksByDensity_h
0003
0004 #include <algorithm>
0005 #include <cmath>
0006 #include <cstdint>
0007
0008 #include <alpaka/alpaka.hpp>
0009
0010 #include "DataFormats/VertexSoA/interface/ZVertexSoA.h"
0011 #include "HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h"
0012 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0013 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0014 #include "RecoTracker/PixelVertexFinding/interface/PixelVertexWorkSpaceLayout.h"
0015
0016 #include "vertexFinder.h"
0017
0018 namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder {
0019
0020 using VtxSoAView = ::reco::ZVertexSoAView;
0021 using WsSoAView = ::vertexFinder::PixelVertexWorkSpaceSoAView;
0022
0023
0024
0025
0026
0027 template <typename TAcc>
0028 ALPAKA_FN_ACC ALPAKA_FN_INLINE void __attribute__((always_inline))
0029 clusterTracksByDensity(const TAcc& acc,
0030 VtxSoAView& pdata,
0031 WsSoAView& pws,
0032 int minT,
0033 float eps,
0034 float errmax,
0035 float chi2max
0036 ) {
0037 using namespace vertexFinder;
0038 constexpr bool verbose = false;
0039 const uint32_t threadIdxLocal(alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]);
0040
0041 if constexpr (verbose) {
0042 if (cms::alpakatools::once_per_block(acc))
0043 printf("params %d %f %f %f\n", minT, eps, errmax, chi2max);
0044 }
0045 auto er2mx = errmax * errmax;
0046
0047 auto& __restrict__ data = pdata;
0048 auto& __restrict__ ws = pws;
0049 auto nt = ws.ntrks();
0050 float const* __restrict__ zt = ws.zt();
0051 float const* __restrict__ ezt2 = ws.ezt2();
0052
0053 uint32_t& nvFinal = data.nvFinal();
0054 uint32_t& nvIntermediate = ws.nvIntermediate();
0055
0056 uint8_t* __restrict__ izt = ws.izt();
0057 int32_t* __restrict__ nn = data.ndof();
0058 int32_t* __restrict__ iv = ws.iv();
0059
0060 ALPAKA_ASSERT_ACC(zt);
0061 ALPAKA_ASSERT_ACC(ezt2);
0062 ALPAKA_ASSERT_ACC(izt);
0063 ALPAKA_ASSERT_ACC(nn);
0064 ALPAKA_ASSERT_ACC(iv);
0065
0066 using Hist = cms::alpakatools::HistoContainer<uint8_t, 256, 16000, 8, uint16_t>;
0067 auto& hist = alpaka::declareSharedVar<Hist, __COUNTER__>(acc);
0068 auto& hws = alpaka::declareSharedVar<Hist::Counter[32], __COUNTER__>(acc);
0069
0070 for (auto j : cms::alpakatools::uniform_elements(acc, Hist::totbins())) {
0071 hist.off[j] = 0;
0072 }
0073 alpaka::syncBlockThreads(acc);
0074
0075 if constexpr (verbose) {
0076 if (cms::alpakatools::once_per_block(acc))
0077 printf("booked hist with %d bins, size %d for %d tracks\n", hist.totbins(), hist.capacity(), nt);
0078 }
0079 ALPAKA_ASSERT_ACC(static_cast<int>(nt) <= hist.capacity());
0080
0081
0082 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0083 ALPAKA_ASSERT_ACC(i < ::zVertex::MAXTRACKS);
0084 int iz = int(zt[i] * 10.);
0085
0086 iz = std::min(std::max(iz, INT8_MIN), INT8_MAX);
0087 izt[i] = iz - INT8_MIN;
0088 ALPAKA_ASSERT_ACC(iz - INT8_MIN >= 0);
0089 ALPAKA_ASSERT_ACC(iz - INT8_MIN < 256);
0090 hist.count(acc, izt[i]);
0091 iv[i] = i;
0092 nn[i] = 0;
0093 }
0094 alpaka::syncBlockThreads(acc);
0095 if (threadIdxLocal < 32)
0096 hws[threadIdxLocal] = 0;
0097 alpaka::syncBlockThreads(acc);
0098 hist.finalize(acc, hws);
0099 alpaka::syncBlockThreads(acc);
0100 ALPAKA_ASSERT_ACC(hist.size() == nt);
0101 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0102 hist.fill(acc, izt[i], uint16_t(i));
0103 }
0104 alpaka::syncBlockThreads(acc);
0105
0106 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0107 if (ezt2[i] > er2mx)
0108 continue;
0109 auto loop = [&](uint32_t j) {
0110 if (i == j)
0111 return;
0112 auto dist = std::abs(zt[i] - zt[j]);
0113 if (dist > eps)
0114 return;
0115 if (dist * dist > chi2max * (ezt2[i] + ezt2[j]))
0116 return;
0117 nn[i]++;
0118 };
0119
0120 cms::alpakatools::forEachInBins(hist, izt[i], 1, loop);
0121 }
0122 alpaka::syncBlockThreads(acc);
0123
0124
0125 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0126 float mdist = eps;
0127 auto loop = [&](uint32_t j) {
0128 if (nn[j] < nn[i])
0129 return;
0130 if (nn[j] == nn[i] && zt[j] >= zt[i])
0131 return;
0132 auto dist = std::abs(zt[i] - zt[j]);
0133 if (dist > mdist)
0134 return;
0135 if (dist * dist > chi2max * (ezt2[i] + ezt2[j]))
0136 return;
0137 mdist = dist;
0138 iv[i] = j;
0139 };
0140 cms::alpakatools::forEachInBins(hist, izt[i], 1, loop);
0141 }
0142 alpaka::syncBlockThreads(acc);
0143
0144 #ifdef GPU_DEBUG
0145
0146 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0147 if (iv[i] != int(i))
0148 ALPAKA_ASSERT_ACC(iv[iv[i]] != int(i));
0149 }
0150 alpaka::syncBlockThreads(acc);
0151 #endif
0152
0153
0154 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0155 auto m = iv[i];
0156 while (m != iv[m])
0157 m = iv[m];
0158 iv[i] = m;
0159 }
0160
0161 #ifdef GPU_DEBUG
0162 alpaka::syncBlockThreads(acc);
0163
0164 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0165 if (iv[i] != int(i))
0166 ALPAKA_ASSERT_ACC(iv[iv[i]] != int(i));
0167 }
0168 #endif
0169
0170 #ifdef GPU_DEBUG
0171
0172 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0173 auto minJ = i;
0174 auto mdist = eps;
0175 auto loop = [&](uint32_t j) {
0176 if (nn[j] < nn[i])
0177 return;
0178 if (nn[j] == nn[i] && zt[j] >= zt[i])
0179 return;
0180 auto dist = std::abs(zt[i] - zt[j]);
0181 if (dist > mdist)
0182 return;
0183 if (dist * dist > chi2max * (ezt2[i] + ezt2[j]))
0184 return;
0185 mdist = dist;
0186 minJ = j;
0187 };
0188 cms::alpakatools::forEachInBins(hist, izt[i], 1, loop);
0189
0190 ALPAKA_ASSERT_ACC(iv[i] == iv[minJ]);
0191 ALPAKA_ASSERT_ACC(nn[i] <= nn[iv[i]]);
0192 }
0193 alpaka::syncBlockThreads(acc);
0194 #endif
0195
0196 auto& foundClusters = alpaka::declareSharedVar<unsigned int, __COUNTER__>(acc);
0197 foundClusters = 0;
0198 alpaka::syncBlockThreads(acc);
0199
0200
0201
0202 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0203 if (iv[i] == int(i)) {
0204 if (nn[i] >= minT) {
0205 auto old = alpaka::atomicInc(acc, &foundClusters, 0xffffffff, alpaka::hierarchy::Threads{});
0206 iv[i] = -(old + 1);
0207 } else {
0208 iv[i] = -9998;
0209 }
0210 }
0211 }
0212 alpaka::syncBlockThreads(acc);
0213
0214 ALPAKA_ASSERT_ACC(foundClusters < ::zVertex::MAXVTX);
0215
0216
0217 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0218 if (iv[i] >= 0) {
0219
0220 iv[i] = iv[iv[i]];
0221 }
0222 }
0223 alpaka::syncBlockThreads(acc);
0224
0225
0226 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0227 iv[i] = -iv[i] - 1;
0228 }
0229
0230 nvIntermediate = nvFinal = foundClusters;
0231 if constexpr (verbose) {
0232 if (cms::alpakatools::once_per_block(acc))
0233 printf("found %d proto vertices\n", foundClusters);
0234 }
0235 }
0236 class ClusterTracksByDensityKernel {
0237 public:
0238 template <typename TAcc>
0239 ALPAKA_FN_ACC void operator()(const TAcc& acc,
0240 VtxSoAView pdata,
0241 WsSoAView pws,
0242 int minT,
0243 float eps,
0244 float errmax,
0245 float chi2max
0246 ) const {
0247 clusterTracksByDensity(acc, pdata, pws, minT, eps, errmax, chi2max);
0248 }
0249 };
0250
0251 }
0252
0253 #endif