File indexing completed on 2024-09-07 04:38:05
0001 #ifndef RecoVertex_PixelVertexFinding_plugins_gpuSortByPt2_h
0002 #define RecoVertex_PixelVertexFinding_plugins_gpuSortByPt2_h
0003
0004 #include <algorithm>
0005 #include <cmath>
0006 #include <cstdint>
0007
0008 #include "HeterogeneousCore/CUDAUtilities/interface/HistoContainer.h"
0009 #include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h"
0010 #ifdef __CUDA_ARCH__
0011 #include "HeterogeneousCore/CUDAUtilities/interface/radixSort.h"
0012 #endif
0013
0014 #include "gpuVertexFinder.h"
0015
0016 namespace gpuVertexFinder {
0017
0018 __device__ __forceinline__ void sortByPt2(VtxSoAView& pdata, WsSoAView& pws) {
0019 auto& __restrict__ data = pdata;
0020 auto& __restrict__ ws = pws;
0021 auto nt = ws.ntrks();
0022 float const* __restrict__ ptt2 = ws.ptt2();
0023 uint32_t const& nvFinal = data.nvFinal();
0024
0025 int32_t const* __restrict__ iv = ws.iv();
0026 float* __restrict__ ptv2 = data.ptv2();
0027 uint16_t* __restrict__ sortInd = data.sortInd();
0028
0029 assert(ptv2);
0030 assert(sortInd);
0031
0032 if (nvFinal < 1)
0033 return;
0034
0035
0036 for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
0037 data[ws[i].itrk()].idv() = iv[i];
0038 }
0039
0040
0041 for (auto i = threadIdx.x; i < nvFinal; i += blockDim.x) {
0042 ptv2[i] = 0;
0043 }
0044 __syncthreads();
0045
0046 for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
0047 if (iv[i] > 9990)
0048 continue;
0049 atomicAdd_block(&ptv2[iv[i]], ptt2[i]);
0050 }
0051 __syncthreads();
0052
0053 if (1 == nvFinal) {
0054 if (threadIdx.x == 0)
0055 sortInd[0] = 0;
0056 return;
0057 }
0058 #ifdef __CUDA_ARCH__
0059 __shared__ uint16_t sws[1024];
0060
0061 radixSort<float, 2>(ptv2, sortInd, sws, nvFinal);
0062 #else
0063 for (uint16_t i = 0; i < nvFinal; ++i)
0064 sortInd[i] = i;
0065 std::sort(sortInd, sortInd + nvFinal, [&](auto i, auto j) { return ptv2[i] < ptv2[j]; });
0066 #endif
0067 }
0068
0069 __global__ void sortByPt2Kernel(VtxSoAView pdata, WsSoAView pws) { sortByPt2(pdata, pws); }
0070
0071 }
0072
0073 #endif