File indexing completed on 2024-09-13 22:52:46
0001 #ifndef RecoVertex_PixelVertexFinding_plugins_alpaka_splitVertices_h
0002 #define RecoVertex_PixelVertexFinding_plugins_alpaka_splitVertices_h
0003
0004 #include <algorithm>
0005 #include <cmath>
0006 #include <cstdint>
0007
0008 #include <alpaka/alpaka.hpp>
0009
0010 #include "HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h"
0011 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0012 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0013
0014 #include "vertexFinder.h"
0015
0016 namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder {
0017
0018 using VtxSoAView = ::reco::ZVertexSoAView;
0019 using TrkSoAView = ::reco::ZVertexTracksSoAView;
0020 using WsSoAView = ::vertexFinder::PixelVertexWorkSpaceSoAView;
0021
0022 ALPAKA_FN_ACC ALPAKA_FN_INLINE __attribute__((always_inline)) void splitVertices(
0023 Acc1D const& acc, VtxSoAView& data, TrkSoAView& trkdata, WsSoAView& ws, float maxChi2) {
0024 constexpr bool verbose = false;
0025 constexpr uint32_t MAXTK = 512;
0026
0027 auto& it = alpaka::declareSharedVar<uint32_t[MAXTK], __COUNTER__>(acc);
0028 auto& zz = alpaka::declareSharedVar<float[MAXTK], __COUNTER__>(acc);
0029 auto& newV = alpaka::declareSharedVar<uint8_t[MAXTK], __COUNTER__>(acc);
0030 auto& ww = alpaka::declareSharedVar<float[MAXTK], __COUNTER__>(acc);
0031 auto& nq = alpaka::declareSharedVar<uint32_t, __COUNTER__>(acc);
0032
0033
0034 for (auto kv : cms::alpakatools::independent_groups(acc, data.nvFinal())) {
0035 int32_t ndof = trkdata[kv].ndof();
0036 if (ndof < 4)
0037 continue;
0038 if (data[kv].chi2() < maxChi2 * float(ndof))
0039 continue;
0040
0041 ALPAKA_ASSERT_ACC(ndof < int32_t(MAXTK));
0042
0043 if ((uint32_t)ndof >= MAXTK)
0044 continue;
0045
0046 if (cms::alpakatools::once_per_block(acc)) {
0047
0048 nq = 0u;
0049 }
0050 alpaka::syncBlockThreads(acc);
0051
0052
0053 for (auto k : cms::alpakatools::independent_group_elements(acc, ws.ntrks())) {
0054 if (ws[k].iv() == int(kv)) {
0055 auto index = alpaka::atomicInc(acc, &nq, MAXTK, alpaka::hierarchy::Threads{});
0056 it[index] = k;
0057 zz[index] = ws[k].zt() - data[kv].zv();
0058 newV[index] = zz[index] < 0 ? 0 : 1;
0059 ww[index] = 1.f / ws[k].ezt2();
0060 }
0061 }
0062
0063
0064 auto& znew = alpaka::declareSharedVar<float[2], __COUNTER__>(acc);
0065 auto& wnew = alpaka::declareSharedVar<float[2], __COUNTER__>(acc);
0066 alpaka::syncBlockThreads(acc);
0067
0068 ALPAKA_ASSERT_ACC(int(nq) == ndof + 1);
0069
0070 int maxiter = 20;
0071
0072 bool more = true;
0073 while (alpaka::syncBlockThreadsPredicate<alpaka::BlockOr>(acc, more)) {
0074 more = false;
0075 if (cms::alpakatools::once_per_block(acc)) {
0076 znew[0] = 0;
0077 znew[1] = 0;
0078 wnew[0] = 0;
0079 wnew[1] = 0;
0080 }
0081 alpaka::syncBlockThreads(acc);
0082
0083 for (auto k : cms::alpakatools::uniform_elements(acc, nq)) {
0084 auto i = newV[k];
0085 alpaka::atomicAdd(acc, &znew[i], zz[k] * ww[k], alpaka::hierarchy::Threads{});
0086 alpaka::atomicAdd(acc, &wnew[i], ww[k], alpaka::hierarchy::Threads{});
0087 }
0088 alpaka::syncBlockThreads(acc);
0089
0090 if (cms::alpakatools::once_per_block(acc)) {
0091 znew[0] /= wnew[0];
0092 znew[1] /= wnew[1];
0093 }
0094 alpaka::syncBlockThreads(acc);
0095
0096 for (auto k : cms::alpakatools::uniform_elements(acc, nq)) {
0097 auto d0 = fabs(zz[k] - znew[0]);
0098 auto d1 = fabs(zz[k] - znew[1]);
0099 auto newer = d0 < d1 ? 0 : 1;
0100 more |= newer != newV[k];
0101 newV[k] = newer;
0102 }
0103 --maxiter;
0104 if (maxiter <= 0)
0105 more = false;
0106 }
0107
0108
0109 if (0 == wnew[0] || 0 == wnew[1])
0110 continue;
0111
0112
0113 auto dist2 = (znew[0] - znew[1]) * (znew[0] - znew[1]);
0114
0115 auto chi2Dist = dist2 / (1.f / wnew[0] + 1.f / wnew[1]);
0116
0117 if constexpr (verbose) {
0118 if (cms::alpakatools::once_per_block(acc))
0119 printf("inter %d %f %f\n", 20 - maxiter, chi2Dist, dist2 * data[kv].wv());
0120 }
0121
0122 if (chi2Dist < 4)
0123 continue;
0124
0125
0126 auto& igv = alpaka::declareSharedVar<uint32_t, __COUNTER__>(acc);
0127 if (cms::alpakatools::once_per_block(acc))
0128 igv = alpaka::atomicAdd(acc, &ws.nvIntermediate(), 1u, alpaka::hierarchy::Blocks{});
0129 alpaka::syncBlockThreads(acc);
0130 for (auto k : cms::alpakatools::uniform_elements(acc, nq)) {
0131 if (1 == newV[k])
0132 ws[it[k]].iv() = igv;
0133 }
0134
0135
0136 alpaka::syncBlockThreads(acc);
0137 }
0138 }
0139
0140 class SplitVerticesKernel {
0141 public:
0142 ALPAKA_FN_ACC void operator()(
0143 Acc1D const& acc, VtxSoAView data, TrkSoAView trkdata, WsSoAView ws, float maxChi2) const {
0144 splitVertices(acc, data, trkdata, ws, maxChi2);
0145 }
0146 };
0147
0148 }
0149
0150 #endif