Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2021-07-01 02:23:31

0001 #ifndef RecoPixelVertexing_PixelVertexFinding_plugins_gpuSortByPt2_h
0002 #define RecoPixelVertexing_PixelVertexFinding_plugins_gpuSortByPt2_h
0003 
0004 #include <algorithm>
0005 #include <cmath>
0006 #include <cstdint>
0007 
0008 #include "HeterogeneousCore/CUDAUtilities/interface/HistoContainer.h"
0009 #include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h"
0010 #ifdef __CUDA_ARCH__
0011 #include "HeterogeneousCore/CUDAUtilities/interface/radixSort.h"
0012 #endif
0013 
0014 #include "gpuVertexFinder.h"
0015 
0016 namespace gpuVertexFinder {
0017 
0018   __device__ __forceinline__ void sortByPt2(ZVertices* pdata, WorkSpace* pws) {
0019     auto& __restrict__ data = *pdata;
0020     auto& __restrict__ ws = *pws;
0021     auto nt = ws.ntrks;
0022     float const* __restrict__ ptt2 = ws.ptt2;
0023     uint32_t const& nvFinal = data.nvFinal;
0024 
0025     int32_t const* __restrict__ iv = ws.iv;
0026     float* __restrict__ ptv2 = data.ptv2;
0027     uint16_t* __restrict__ sortInd = data.sortInd;
0028 
0029     // if (threadIdx.x == 0)
0030     //    printf("sorting %d vertices\n",nvFinal);
0031 
0032     if (nvFinal < 1)
0033       return;
0034 
0035     // fill indexing
0036     for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
0037       data.idv[ws.itrk[i]] = iv[i];
0038     }
0039 
0040     // can be done asynchronoisly at the end of previous event
0041     for (auto i = threadIdx.x; i < nvFinal; i += blockDim.x) {
0042       ptv2[i] = 0;
0043     }
0044     __syncthreads();
0045 
0046     for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
0047       if (iv[i] > 9990)
0048         continue;
0049       atomicAdd_block(&ptv2[iv[i]], ptt2[i]);
0050     }
0051     __syncthreads();
0052 
0053     if (1 == nvFinal) {
0054       if (threadIdx.x == 0)
0055         sortInd[0] = 0;
0056       return;
0057     }
0058 #ifdef __CUDA_ARCH__
0059     __shared__ uint16_t sws[1024];
0060     // sort using only 16 bits
0061     radixSort<float, 2>(ptv2, sortInd, sws, nvFinal);
0062 #else
0063     for (uint16_t i = 0; i < nvFinal; ++i)
0064       sortInd[i] = i;
0065     std::sort(sortInd, sortInd + nvFinal, [&](auto i, auto j) { return ptv2[i] < ptv2[j]; });
0066 #endif
0067   }
0068 
0069   __global__ void sortByPt2Kernel(ZVertices* pdata, WorkSpace* pws) { sortByPt2(pdata, pws); }
0070 
0071 }  // namespace gpuVertexFinder
0072 
0073 #endif  // RecoPixelVertexing_PixelVertexFinding_plugins_gpuSortByPt2_h