File indexing completed on 2024-09-13 22:52:46
0001 #ifndef RecoVertex_PixelVertexFinding_clusterTracksIterativeAlpaka_h
0002 #define RecoVertex_PixelVertexFinding_clusterTracksIterativeAlpaka_h
0003
0004 #include <algorithm>
0005 #include <cmath>
0006 #include <cstdint>
0007
0008 #include <alpaka/alpaka.hpp>
0009
0010 #include "DataFormats/VertexSoA/interface/ZVertexSoA.h"
0011 #include "HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h"
0012 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0013 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0014 #include "RecoVertex/PixelVertexFinding/interface/PixelVertexWorkSpaceLayout.h"
0015
0016 #include "vertexFinder.h"
0017
0018 namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder {
0019
0020
0021
0022 class ClusterTracksIterative {
0023 public:
0024 ALPAKA_FN_ACC void operator()(Acc1D const& acc,
0025 VtxSoAView data,
0026 TrkSoAView trkdata,
0027 WsSoAView ws,
0028 int minT,
0029 float eps,
0030 float errmax,
0031 float chi2max
0032 ) const {
0033 constexpr bool verbose = false;
0034
0035 if constexpr (verbose) {
0036 if (cms::alpakatools::once_per_block(acc))
0037 printf("params %d %f %f %f\n", minT, eps, errmax, chi2max);
0038 }
0039
0040 auto nt = ws.ntrks();
0041 ALPAKA_ASSERT_ACC(static_cast<int>(nt) <= ws.metadata().size());
0042 ALPAKA_ASSERT_ACC(static_cast<int>(nt) <= trkdata.metadata().size());
0043
0044 float const* __restrict__ zt = ws.zt();
0045 float const* __restrict__ ezt2 = ws.ezt2();
0046 uint8_t* __restrict__ izt = ws.izt();
0047 int32_t* __restrict__ iv = ws.iv();
0048 int32_t* __restrict__ nn = trkdata.ndof();
0049 ALPAKA_ASSERT_ACC(zt);
0050 ALPAKA_ASSERT_ACC(ezt2);
0051 ALPAKA_ASSERT_ACC(izt);
0052 ALPAKA_ASSERT_ACC(iv);
0053 ALPAKA_ASSERT_ACC(nn);
0054
0055 using Hist = cms::alpakatools::HistoContainer<uint8_t, 256, 16000, 8, uint16_t>;
0056 auto& hist = alpaka::declareSharedVar<Hist, __COUNTER__>(acc);
0057 auto& hws = alpaka::declareSharedVar<Hist::Counter[32], __COUNTER__>(acc);
0058
0059 for (auto j : cms::alpakatools::uniform_elements(acc, Hist::totbins())) {
0060 hist.off[j] = 0;
0061 }
0062 for (auto j : cms::alpakatools::uniform_elements(acc, 32)) {
0063 hws[j] = 0;
0064 }
0065 alpaka::syncBlockThreads(acc);
0066
0067 if constexpr (verbose) {
0068 if (cms::alpakatools::once_per_block(acc))
0069 printf("booked hist with %d bins, size %d for %d tracks\n", hist.nbins(), hist.capacity(), nt);
0070 }
0071
0072 ALPAKA_ASSERT_ACC(static_cast<int>(nt) <= std::numeric_limits<Hist::index_type>::max());
0073 ALPAKA_ASSERT_ACC(static_cast<int>(nt) <= hist.capacity());
0074 ALPAKA_ASSERT_ACC(eps <= 0.1f);
0075
0076
0077 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0078 int iz = static_cast<int>(zt[i] * 10.f);
0079 iz = std::clamp(iz, INT8_MIN, INT8_MAX);
0080 izt[i] = iz - INT8_MIN;
0081 ALPAKA_ASSERT_ACC(iz - INT8_MIN >= 0);
0082 ALPAKA_ASSERT_ACC(iz - INT8_MIN < 256);
0083 hist.count(acc, izt[i]);
0084 iv[i] = i;
0085 nn[i] = 0;
0086 }
0087 alpaka::syncBlockThreads(acc);
0088
0089 hist.finalize(acc, hws);
0090 alpaka::syncBlockThreads(acc);
0091
0092 ALPAKA_ASSERT_ACC(hist.size() == nt);
0093 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0094 hist.fill(acc, izt[i], uint16_t(i));
0095 }
0096 alpaka::syncBlockThreads(acc);
0097
0098
0099 const auto errmax2 = errmax * errmax;
0100 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0101 if (ezt2[i] > errmax2)
0102 continue;
0103 cms::alpakatools::forEachInBins(hist, izt[i], 1, [&](uint32_t j) {
0104 if (i == j)
0105 return;
0106 auto dist = std::abs(zt[i] - zt[j]);
0107 if (dist > eps)
0108 return;
0109 if (dist * dist > chi2max * (ezt2[i] + ezt2[j]))
0110 return;
0111 nn[i]++;
0112 });
0113 }
0114
0115 auto& nloops = alpaka::declareSharedVar<int, __COUNTER__>(acc);
0116 nloops = 0;
0117 alpaka::syncBlockThreads(acc);
0118
0119
0120 bool more = true;
0121 while (alpaka::syncBlockThreadsPredicate<alpaka::BlockOr>(acc, more)) {
0122 if (1 == nloops % 2) {
0123 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0124 auto m = iv[i];
0125 while (m != iv[m])
0126 m = iv[m];
0127 iv[i] = m;
0128 }
0129 } else {
0130 more = false;
0131 for (auto k : cms::alpakatools::uniform_elements(acc, hist.size())) {
0132 auto p = hist.begin() + k;
0133 auto i = *p;
0134 ++p;
0135 if (nn[i] < minT)
0136 continue;
0137 auto be = std::min(Hist::bin(izt[i]) + 1, int(hist.nbins() - 1));
0138 for (; p < hist.end(be); ++p) {
0139 uint32_t j = *p;
0140 ALPAKA_ASSERT_ACC(i != j);
0141 if (nn[j] < minT)
0142 continue;
0143 auto dist = std::abs(zt[i] - zt[j]);
0144 if (dist > eps)
0145 continue;
0146 if (dist * dist > chi2max * (ezt2[i] + ezt2[j]))
0147 continue;
0148 auto old = alpaka::atomicMin(acc, &iv[j], iv[i], alpaka::hierarchy::Threads{});
0149 if (old != iv[i]) {
0150
0151 more = true;
0152 }
0153 alpaka::atomicMin(acc, &iv[i], old, alpaka::hierarchy::Threads{});
0154 }
0155 }
0156 }
0157 if (cms::alpakatools::once_per_block(acc))
0158 ++nloops;
0159 }
0160
0161
0162 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0163
0164 if (nn[i] >= minT)
0165 continue;
0166 float mdist = eps;
0167 cms::alpakatools::forEachInBins(hist, izt[i], 1, [&](int j) {
0168 if (nn[j] < minT)
0169 return;
0170 auto dist = std::abs(zt[i] - zt[j]);
0171 if (dist > mdist)
0172 return;
0173 if (dist * dist > chi2max * (ezt2[i] + ezt2[j]))
0174 return;
0175 mdist = dist;
0176 iv[i] = iv[j];
0177 });
0178 }
0179
0180 auto& foundClusters = alpaka::declareSharedVar<unsigned int, __COUNTER__>(acc);
0181 foundClusters = 0;
0182 alpaka::syncBlockThreads(acc);
0183
0184
0185
0186 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0187 if (iv[i] == int(i)) {
0188 if (nn[i] >= minT) {
0189 auto old = alpaka::atomicInc(acc, &foundClusters, 0xffffffff, alpaka::hierarchy::Threads{});
0190 iv[i] = -(old + 1);
0191 } else {
0192 iv[i] = -9998;
0193 }
0194 }
0195 }
0196 alpaka::syncBlockThreads(acc);
0197
0198 ALPAKA_ASSERT_ACC(static_cast<int>(foundClusters) < data.metadata().size());
0199
0200
0201 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0202 if (iv[i] >= 0) {
0203
0204 iv[i] = iv[iv[i]];
0205 }
0206 }
0207 alpaka::syncBlockThreads(acc);
0208
0209
0210 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0211 iv[i] = -iv[i] - 1;
0212 }
0213
0214 ws.nvIntermediate() = foundClusters;
0215 data.nvFinal() = foundClusters;
0216
0217 if constexpr (verbose) {
0218 if (cms::alpakatools::once_per_block(acc))
0219 printf("found %d proto vertices\n", foundClusters);
0220 }
0221 }
0222 };
0223
0224 }
0225
0226 #endif