Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-09-13 22:52:46

0001 #ifndef RecoVertex_PixelVertexFinding_plugins_alpaka_splitVertices_h
0002 #define RecoVertex_PixelVertexFinding_plugins_alpaka_splitVertices_h
0003 
0004 #include <algorithm>
0005 #include <cmath>
0006 #include <cstdint>
0007 
0008 #include <alpaka/alpaka.hpp>
0009 
0010 #include "HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h"
0011 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0012 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0013 
0014 #include "vertexFinder.h"
0015 
0016 namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder {
0017 
0018   using VtxSoAView = ::reco::ZVertexSoAView;
0019   using TrkSoAView = ::reco::ZVertexTracksSoAView;
0020   using WsSoAView = ::vertexFinder::PixelVertexWorkSpaceSoAView;
0021 
0022   ALPAKA_FN_ACC ALPAKA_FN_INLINE __attribute__((always_inline)) void splitVertices(
0023       Acc1D const& acc, VtxSoAView& data, TrkSoAView& trkdata, WsSoAView& ws, float maxChi2) {
0024     constexpr bool verbose = false;  // in principle the compiler should optmize out if false
0025     constexpr uint32_t MAXTK = 512;
0026 
0027     auto& it = alpaka::declareSharedVar<uint32_t[MAXTK], __COUNTER__>(acc);   // track index
0028     auto& zz = alpaka::declareSharedVar<float[MAXTK], __COUNTER__>(acc);      // z pos
0029     auto& newV = alpaka::declareSharedVar<uint8_t[MAXTK], __COUNTER__>(acc);  // 0 or 1
0030     auto& ww = alpaka::declareSharedVar<float[MAXTK], __COUNTER__>(acc);      // z weight
0031     auto& nq = alpaka::declareSharedVar<uint32_t, __COUNTER__>(acc);          // number of track for this vertex
0032 
0033     // one vertex per block
0034     for (auto kv : cms::alpakatools::independent_groups(acc, data.nvFinal())) {
0035       int32_t ndof = trkdata[kv].ndof();
0036       if (ndof < 4)
0037         continue;
0038       if (data[kv].chi2() < maxChi2 * float(ndof))
0039         continue;
0040 
0041       ALPAKA_ASSERT_ACC(ndof < int32_t(MAXTK));
0042 
0043       if ((uint32_t)ndof >= MAXTK)
0044         continue;  // too bad FIXME
0045 
0046       if (cms::alpakatools::once_per_block(acc)) {
0047         // reset the number of tracks for the current vertex
0048         nq = 0u;
0049       }
0050       alpaka::syncBlockThreads(acc);
0051 
0052       // cache the data of the tracks associated to the current vertex into shared memory
0053       for (auto k : cms::alpakatools::independent_group_elements(acc, ws.ntrks())) {
0054         if (ws[k].iv() == int(kv)) {
0055           auto index = alpaka::atomicInc(acc, &nq, MAXTK, alpaka::hierarchy::Threads{});
0056           it[index] = k;
0057           zz[index] = ws[k].zt() - data[kv].zv();
0058           newV[index] = zz[index] < 0 ? 0 : 1;
0059           ww[index] = 1.f / ws[k].ezt2();
0060         }
0061       }
0062 
0063       // the new vertices
0064       auto& znew = alpaka::declareSharedVar<float[2], __COUNTER__>(acc);
0065       auto& wnew = alpaka::declareSharedVar<float[2], __COUNTER__>(acc);
0066       alpaka::syncBlockThreads(acc);
0067 
0068       ALPAKA_ASSERT_ACC(int(nq) == ndof + 1);
0069 
0070       int maxiter = 20;
0071       // kt-min....
0072       bool more = true;
0073       while (alpaka::syncBlockThreadsPredicate<alpaka::BlockOr>(acc, more)) {
0074         more = false;
0075         if (cms::alpakatools::once_per_block(acc)) {
0076           znew[0] = 0;
0077           znew[1] = 0;
0078           wnew[0] = 0;
0079           wnew[1] = 0;
0080         }
0081         alpaka::syncBlockThreads(acc);
0082 
0083         for (auto k : cms::alpakatools::uniform_elements(acc, nq)) {
0084           auto i = newV[k];
0085           alpaka::atomicAdd(acc, &znew[i], zz[k] * ww[k], alpaka::hierarchy::Threads{});
0086           alpaka::atomicAdd(acc, &wnew[i], ww[k], alpaka::hierarchy::Threads{});
0087         }
0088         alpaka::syncBlockThreads(acc);
0089 
0090         if (cms::alpakatools::once_per_block(acc)) {
0091           znew[0] /= wnew[0];
0092           znew[1] /= wnew[1];
0093         }
0094         alpaka::syncBlockThreads(acc);
0095 
0096         for (auto k : cms::alpakatools::uniform_elements(acc, nq)) {
0097           auto d0 = fabs(zz[k] - znew[0]);
0098           auto d1 = fabs(zz[k] - znew[1]);
0099           auto newer = d0 < d1 ? 0 : 1;
0100           more |= newer != newV[k];
0101           newV[k] = newer;
0102         }
0103         --maxiter;
0104         if (maxiter <= 0)
0105           more = false;
0106       }
0107 
0108       // avoid empty vertices
0109       if (0 == wnew[0] || 0 == wnew[1])
0110         continue;
0111 
0112       // quality cut
0113       auto dist2 = (znew[0] - znew[1]) * (znew[0] - znew[1]);
0114 
0115       auto chi2Dist = dist2 / (1.f / wnew[0] + 1.f / wnew[1]);
0116 
0117       if constexpr (verbose) {
0118         if (cms::alpakatools::once_per_block(acc))
0119           printf("inter %d %f %f\n", 20 - maxiter, chi2Dist, dist2 * data[kv].wv());
0120       }
0121 
0122       if (chi2Dist < 4)
0123         continue;
0124 
0125       // get a new global vertex
0126       auto& igv = alpaka::declareSharedVar<uint32_t, __COUNTER__>(acc);
0127       if (cms::alpakatools::once_per_block(acc))
0128         igv = alpaka::atomicAdd(acc, &ws.nvIntermediate(), 1u, alpaka::hierarchy::Blocks{});
0129       alpaka::syncBlockThreads(acc);
0130       for (auto k : cms::alpakatools::uniform_elements(acc, nq)) {
0131         if (1 == newV[k])
0132           ws[it[k]].iv() = igv;
0133       }
0134 
0135       // synchronise the threads before starting the next iteration of the loop over the vertices and resetting the shared memory
0136       alpaka::syncBlockThreads(acc);
0137     }  // loop on vertices
0138   }
0139 
0140   class SplitVerticesKernel {
0141   public:
0142     ALPAKA_FN_ACC void operator()(
0143         Acc1D const& acc, VtxSoAView data, TrkSoAView trkdata, WsSoAView ws, float maxChi2) const {
0144       splitVertices(acc, data, trkdata, ws, maxChi2);
0145     }
0146   };
0147 
0148 }  // namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder
0149 
0150 #endif  // RecoVertex_PixelVertexFinding_plugins_alpaka_splitVertices_h