File indexing completed on 2024-04-06 12:28:40
0001 #ifndef RecoTracker_PixelVertexFinding_plugins_alpaka_fitVertices_h
0002 #define RecoTracker_PixelVertexFinding_plugins_alpaka_fitVertices_h
0003
0004 #include <algorithm>
0005 #include <cmath>
0006 #include <cstdint>
0007
0008 #include <alpaka/alpaka.hpp>
0009
0010 #include "HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h"
0011 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0012 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0013
0014 #include "vertexFinder.h"
0015
0016 namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder {
0017
0018 template <typename TAcc>
0019 ALPAKA_FN_ACC ALPAKA_FN_INLINE __attribute__((always_inline)) void fitVertices(const TAcc& acc,
0020 VtxSoAView& pdata,
0021 WsSoAView& pws,
0022 float chi2Max
0023 ) {
0024 constexpr bool verbose = false;
0025
0026 auto& __restrict__ data = pdata;
0027 auto& __restrict__ ws = pws;
0028 auto nt = ws.ntrks();
0029 float const* __restrict__ zt = ws.zt();
0030 float const* __restrict__ ezt2 = ws.ezt2();
0031 float* __restrict__ zv = data.zv();
0032 float* __restrict__ wv = data.wv();
0033 float* __restrict__ chi2 = data.chi2();
0034 uint32_t& nvFinal = data.nvFinal();
0035 uint32_t& nvIntermediate = ws.nvIntermediate();
0036
0037 int32_t* __restrict__ nn = data.ndof();
0038 int32_t* __restrict__ iv = ws.iv();
0039
0040 ALPAKA_ASSERT_ACC(nvFinal <= nvIntermediate);
0041 nvFinal = nvIntermediate;
0042 auto foundClusters = nvFinal;
0043
0044
0045 for (auto i : cms::alpakatools::uniform_elements(acc, foundClusters)) {
0046 zv[i] = 0;
0047 wv[i] = 0;
0048 chi2[i] = 0;
0049 }
0050
0051
0052 auto& noise = alpaka::declareSharedVar<int, __COUNTER__>(acc);
0053
0054 if constexpr (verbose) {
0055 if (cms::alpakatools::once_per_block(acc))
0056 noise = 0;
0057 }
0058 alpaka::syncBlockThreads(acc);
0059
0060
0061 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0062 if (iv[i] > 9990) {
0063 if constexpr (verbose)
0064 alpaka::atomicAdd(acc, &noise, 1, alpaka::hierarchy::Threads{});
0065 continue;
0066 }
0067 ALPAKA_ASSERT_ACC(iv[i] >= 0);
0068 ALPAKA_ASSERT_ACC(iv[i] < int(foundClusters));
0069 auto w = 1.f / ezt2[i];
0070 alpaka::atomicAdd(acc, &zv[iv[i]], zt[i] * w, alpaka::hierarchy::Threads{});
0071 alpaka::atomicAdd(acc, &wv[iv[i]], w, alpaka::hierarchy::Threads{});
0072 }
0073
0074 alpaka::syncBlockThreads(acc);
0075
0076 for (auto i : cms::alpakatools::uniform_elements(acc, foundClusters)) {
0077 ALPAKA_ASSERT_ACC(wv[i] > 0.f);
0078 zv[i] /= wv[i];
0079 nn[i] = -1;
0080 }
0081 alpaka::syncBlockThreads(acc);
0082
0083
0084 for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0085 if (iv[i] > 9990)
0086 continue;
0087
0088 auto c2 = zv[iv[i]] - zt[i];
0089 c2 *= c2 / ezt2[i];
0090 if (c2 > chi2Max) {
0091 iv[i] = 9999;
0092 continue;
0093 }
0094 alpaka::atomicAdd(acc, &chi2[iv[i]], c2, alpaka::hierarchy::Blocks{});
0095 alpaka::atomicAdd(acc, &nn[iv[i]], 1, alpaka::hierarchy::Blocks{});
0096 }
0097 alpaka::syncBlockThreads(acc);
0098
0099 for (auto i : cms::alpakatools::uniform_elements(acc, foundClusters)) {
0100 if (nn[i] > 0) {
0101 wv[i] *= float(nn[i]) / chi2[i];
0102 }
0103 }
0104 if constexpr (verbose) {
0105 if (cms::alpakatools::once_per_block(acc)) {
0106 printf("found %d proto clusters ", foundClusters);
0107 printf("and %d noise\n", noise);
0108 }
0109 }
0110 }
0111
0112 class FitVerticesKernel {
0113 public:
0114 template <typename TAcc>
0115 ALPAKA_FN_ACC void operator()(const TAcc& acc,
0116 VtxSoAView pdata,
0117 WsSoAView pws,
0118 float chi2Max
0119 ) const {
0120 fitVertices(acc, pdata, pws, chi2Max);
0121 }
0122 };
0123
0124 }
0125
0126 #endif