File indexing completed on 2025-02-20 03:45:53
0001 #ifndef RecoVertex_PixelVertexFinding_clusterTracksIterativeAlpaka_h
0002 #define RecoVertex_PixelVertexFinding_clusterTracksIterativeAlpaka_h
0003
0004 #include <algorithm>
0005 #include <cmath>
0006 #include <cstdint>
0007
0008 #include <alpaka/alpaka.hpp>
0009
0010 #include "DataFormats/VertexSoA/interface/ZVertexSoA.h"
0011 #include "HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h"
0012 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0013 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0014 #include "RecoVertex/PixelVertexFinding/interface/PixelVertexWorkSpaceLayout.h"
0015
0016 #include "vertexFinder.h"
0017
0018 namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder {
0019
0020
0021
0022 class ClusterTracksIterative {
0023 public:
0024 ALPAKA_FN_ACC void operator()(Acc1D const& acc,
0025 VtxSoAView data,
0026 TrkSoAView trkdata,
0027 WsSoAView ws,
0028 int minT,
0029 float eps,
0030 float errmax,
0031 float chi2max
0032 ) const {
0033 constexpr bool verbose = false;
0034
0035 if constexpr (verbose) {
0036 if (cms::alpakatools::once_per_block(acc))
0037 printf("params %d %f %f %f\n", minT, eps, errmax, chi2max);
0038 }
0039
0040 auto nt = ws.ntrks();
0041 ALPAKA_ASSERT_ACC(static_cast<int>(nt) <= ws.metadata().size());
0042 ALPAKA_ASSERT_ACC(static_cast<int>(nt) <= trkdata.metadata().size());
0043
0044 float const* __restrict__ zt = ws.zt();
0045 float const* __restrict__ ezt2 = ws.ezt2();
0046 uint8_t* __restrict__ izt = ws.izt();
0047 int32_t* __restrict__ iv = ws.iv();
0048 int32_t* __restrict__ nn = trkdata.ndof();
0049 ALPAKA_ASSERT_ACC(zt);
0050 ALPAKA_ASSERT_ACC(ezt2);
0051 ALPAKA_ASSERT_ACC(izt);
0052 ALPAKA_ASSERT_ACC(iv);
0053 ALPAKA_ASSERT_ACC(nn);
0054
0055 using Hist = cms::alpakatools::HistoContainer<uint8_t, 256, 16000, 8, uint16_t>;
0056 auto& hist = alpaka::declareSharedVar<Hist, __COUNTER__>(acc);
0057 auto& hws = alpaka::declareSharedVar<Hist::Counter[32], __COUNTER__>(acc);
0058
0059 for (auto j : cms::alpakatools::uniform_elements(acc, Hist::totbins())) {
0060 hist.off[j] = 0;
0061 }
0062 for (auto j : cms::alpakatools::uniform_elements(acc, 32)) {
0063 hws[j] = 0;
0064 }
0065 alpaka::syncBlockThreads(acc);
0066
0067 if constexpr (verbose) {
0068 if (cms::alpakatools::once_per_block(acc))
0069 printf("booked hist with %d bins, size %d for %d tracks\n", hist.nbins(), hist.capacity(), nt);
0070 }
0071
0072 ALPAKA_ASSERT_ACC(static_cast<int>(nt) <= std::numeric_limits<Hist::index_type>::max());
0073 ALPAKA_ASSERT_ACC(static_cast<int>(nt) <= hist.capacity());
0074 ALPAKA_ASSERT_ACC(eps <= 0.1f);
0075
0076
0077 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0078 int iz = static_cast<int>(zt[i] * 10.f);
0079
0080
0081
0082 int tmp_max = std::max<int>(iz, INT8_MIN);
0083 iz = std::min<int>(tmp_max, INT8_MAX);
0084 izt[i] = iz - INT8_MIN;
0085 ALPAKA_ASSERT_ACC(iz - INT8_MIN >= 0);
0086 ALPAKA_ASSERT_ACC(iz - INT8_MIN < 256);
0087 hist.count(acc, izt[i]);
0088 iv[i] = i;
0089 nn[i] = 0;
0090 }
0091 alpaka::syncBlockThreads(acc);
0092
0093 hist.finalize(acc, hws);
0094 alpaka::syncBlockThreads(acc);
0095
0096 ALPAKA_ASSERT_ACC(hist.size() == nt);
0097 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0098 hist.fill(acc, izt[i], uint16_t(i));
0099 }
0100 alpaka::syncBlockThreads(acc);
0101
0102
0103 const auto errmax2 = errmax * errmax;
0104 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0105 if (ezt2[i] > errmax2)
0106 continue;
0107 cms::alpakatools::forEachInBins(hist, izt[i], 1, [&](uint32_t j) {
0108 if (i == j)
0109 return;
0110 auto dist = std::abs(zt[i] - zt[j]);
0111 if (dist > eps)
0112 return;
0113 if (dist * dist > chi2max * (ezt2[i] + ezt2[j]))
0114 return;
0115 nn[i]++;
0116 });
0117 }
0118
0119 auto& nloops = alpaka::declareSharedVar<int, __COUNTER__>(acc);
0120 nloops = 0;
0121 alpaka::syncBlockThreads(acc);
0122
0123
0124 bool more = true;
0125 while (alpaka::syncBlockThreadsPredicate<alpaka::BlockOr>(acc, more)) {
0126 if (1 == nloops % 2) {
0127 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0128 auto m = iv[i];
0129 while (m != iv[m])
0130 m = iv[m];
0131 iv[i] = m;
0132 }
0133 } else {
0134 more = false;
0135 for (auto k : cms::alpakatools::uniform_elements(acc, hist.size())) {
0136 auto p = hist.begin() + k;
0137 auto i = *p;
0138 ++p;
0139 if (nn[i] < minT)
0140 continue;
0141 auto be = std::min(Hist::bin(izt[i]) + 1, int(hist.nbins() - 1));
0142 for (; p < hist.end(be); ++p) {
0143 uint32_t j = *p;
0144 ALPAKA_ASSERT_ACC(i != j);
0145 if (nn[j] < minT)
0146 continue;
0147 auto dist = std::abs(zt[i] - zt[j]);
0148 if (dist > eps)
0149 continue;
0150 if (dist * dist > chi2max * (ezt2[i] + ezt2[j]))
0151 continue;
0152 auto old = alpaka::atomicMin(acc, &iv[j], iv[i], alpaka::hierarchy::Threads{});
0153 if (old != iv[i]) {
0154
0155 more = true;
0156 }
0157 alpaka::atomicMin(acc, &iv[i], old, alpaka::hierarchy::Threads{});
0158 }
0159 }
0160 }
0161 if (cms::alpakatools::once_per_block(acc))
0162 ++nloops;
0163 }
0164
0165
0166 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0167
0168 if (nn[i] >= minT)
0169 continue;
0170 float mdist = eps;
0171 cms::alpakatools::forEachInBins(hist, izt[i], 1, [&](int j) {
0172 if (nn[j] < minT)
0173 return;
0174 auto dist = std::abs(zt[i] - zt[j]);
0175 if (dist > mdist)
0176 return;
0177 if (dist * dist > chi2max * (ezt2[i] + ezt2[j]))
0178 return;
0179 mdist = dist;
0180 iv[i] = iv[j];
0181 });
0182 }
0183
0184 auto& foundClusters = alpaka::declareSharedVar<unsigned int, __COUNTER__>(acc);
0185 foundClusters = 0;
0186 alpaka::syncBlockThreads(acc);
0187
0188
0189
0190 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0191 if (iv[i] == int(i)) {
0192 if (nn[i] >= minT) {
0193 auto old = alpaka::atomicInc(acc, &foundClusters, 0xffffffff, alpaka::hierarchy::Threads{});
0194 iv[i] = -(old + 1);
0195 } else {
0196 iv[i] = -9998;
0197 }
0198 }
0199 }
0200 alpaka::syncBlockThreads(acc);
0201
0202 ALPAKA_ASSERT_ACC(static_cast<int>(foundClusters) < data.metadata().size());
0203
0204
0205 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0206 if (iv[i] >= 0) {
0207
0208 iv[i] = iv[iv[i]];
0209 }
0210 }
0211 alpaka::syncBlockThreads(acc);
0212
0213
0214 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0215 iv[i] = -iv[i] - 1;
0216 }
0217
0218 ws.nvIntermediate() = foundClusters;
0219 data.nvFinal() = foundClusters;
0220
0221 if constexpr (verbose) {
0222 if (cms::alpakatools::once_per_block(acc))
0223 printf("found %d proto vertices\n", foundClusters);
0224 }
0225 }
0226 };
0227
0228 }
0229
0230 #endif