Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:28:40

0001 #ifndef RecoTracker_PixelVertexFinding_plugins_alpaka_fitVertices_h
0002 #define RecoTracker_PixelVertexFinding_plugins_alpaka_fitVertices_h
0003 
0004 #include <algorithm>
0005 #include <cmath>
0006 #include <cstdint>
0007 
0008 #include <alpaka/alpaka.hpp>
0009 
0010 #include "HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h"
0011 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0012 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0013 
0014 #include "vertexFinder.h"
0015 
0016 namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder {
0017 
0018   template <typename TAcc>
0019   ALPAKA_FN_ACC ALPAKA_FN_INLINE __attribute__((always_inline)) void fitVertices(const TAcc& acc,
0020                                                                                  VtxSoAView& pdata,
0021                                                                                  WsSoAView& pws,
0022                                                                                  float chi2Max  // for outlier rejection
0023   ) {
0024     constexpr bool verbose = false;  // in principle the compiler should optmize out if false
0025 
0026     auto& __restrict__ data = pdata;
0027     auto& __restrict__ ws = pws;
0028     auto nt = ws.ntrks();
0029     float const* __restrict__ zt = ws.zt();
0030     float const* __restrict__ ezt2 = ws.ezt2();
0031     float* __restrict__ zv = data.zv();
0032     float* __restrict__ wv = data.wv();
0033     float* __restrict__ chi2 = data.chi2();
0034     uint32_t& nvFinal = data.nvFinal();
0035     uint32_t& nvIntermediate = ws.nvIntermediate();
0036 
0037     int32_t* __restrict__ nn = data.ndof();
0038     int32_t* __restrict__ iv = ws.iv();
0039 
0040     ALPAKA_ASSERT_ACC(nvFinal <= nvIntermediate);
0041     nvFinal = nvIntermediate;
0042     auto foundClusters = nvFinal;
0043 
0044     // zero
0045     for (auto i : cms::alpakatools::uniform_elements(acc, foundClusters)) {
0046       zv[i] = 0;
0047       wv[i] = 0;
0048       chi2[i] = 0;
0049     }
0050 
0051     // only for test
0052     auto& noise = alpaka::declareSharedVar<int, __COUNTER__>(acc);
0053 
0054     if constexpr (verbose) {
0055       if (cms::alpakatools::once_per_block(acc))
0056         noise = 0;
0057     }
0058     alpaka::syncBlockThreads(acc);
0059 
0060     // compute cluster location
0061     for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0062       if (iv[i] > 9990) {
0063         if constexpr (verbose)
0064           alpaka::atomicAdd(acc, &noise, 1, alpaka::hierarchy::Threads{});
0065         continue;
0066       }
0067       ALPAKA_ASSERT_ACC(iv[i] >= 0);
0068       ALPAKA_ASSERT_ACC(iv[i] < int(foundClusters));
0069       auto w = 1.f / ezt2[i];
0070       alpaka::atomicAdd(acc, &zv[iv[i]], zt[i] * w, alpaka::hierarchy::Threads{});
0071       alpaka::atomicAdd(acc, &wv[iv[i]], w, alpaka::hierarchy::Threads{});
0072     }
0073 
0074     alpaka::syncBlockThreads(acc);
0075     // reuse nn
0076     for (auto i : cms::alpakatools::uniform_elements(acc, foundClusters)) {
0077       ALPAKA_ASSERT_ACC(wv[i] > 0.f);
0078       zv[i] /= wv[i];
0079       nn[i] = -1;  // ndof
0080     }
0081     alpaka::syncBlockThreads(acc);
0082 
0083     // compute chi2
0084     for (auto i : cms::alpakatools::uniform_elements(acc, nt)) {
0085       if (iv[i] > 9990)
0086         continue;
0087 
0088       auto c2 = zv[iv[i]] - zt[i];
0089       c2 *= c2 / ezt2[i];
0090       if (c2 > chi2Max) {
0091         iv[i] = 9999;
0092         continue;
0093       }
0094       alpaka::atomicAdd(acc, &chi2[iv[i]], c2, alpaka::hierarchy::Blocks{});
0095       alpaka::atomicAdd(acc, &nn[iv[i]], 1, alpaka::hierarchy::Blocks{});
0096     }
0097     alpaka::syncBlockThreads(acc);
0098 
0099     for (auto i : cms::alpakatools::uniform_elements(acc, foundClusters)) {
0100       if (nn[i] > 0) {
0101         wv[i] *= float(nn[i]) / chi2[i];
0102       }
0103     }
0104     if constexpr (verbose) {
0105       if (cms::alpakatools::once_per_block(acc)) {
0106         printf("found %d proto clusters ", foundClusters);
0107         printf("and %d noise\n", noise);
0108       }
0109     }
0110   }
0111 
0112   class FitVerticesKernel {
0113   public:
0114     template <typename TAcc>
0115     ALPAKA_FN_ACC void operator()(const TAcc& acc,
0116                                   VtxSoAView pdata,
0117                                   WsSoAView pws,
0118                                   float chi2Max  // for outlier rejection
0119     ) const {
0120       fitVertices(acc, pdata, pws, chi2Max);
0121     }
0122   };
0123 
0124 }  // namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder
0125 
0126 #endif  // RecoTracker_PixelVertexFinding_plugins_alpaka_fitVertices_h