File indexing completed on 2023-03-17 11:19:33
0001 #ifndef RecoLocalTracker_SiPixelClusterizer_plugins_gpuClusterChargeCut_h
0002 #define RecoLocalTracker_SiPixelClusterizer_plugins_gpuClusterChargeCut_h
0003
0004 #include <cstdint>
0005 #include <cstdio>
0006
0007 #include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h"
0008 #include "Geometry/CommonTopologies/interface/SimplePixelTopology.h"
0009 #include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h"
0010 #include "HeterogeneousCore/CUDAUtilities/interface/prefixScan.h"
0011
0012
0013 #include "SiPixelClusterThresholds.h"
0014
0015 namespace gpuClustering {
0016
0017 template <typename TrackerTraits>
0018 __global__ void clusterChargeCut(
0019 SiPixelClusterThresholds
0020 clusterThresholds,
0021 uint16_t* __restrict__ id,
0022 uint16_t const* __restrict__ adc,
0023 uint32_t const* __restrict__ moduleStart,
0024 uint32_t* __restrict__ nClustersInModule,
0025 uint32_t const* __restrict__ moduleId,
0026 int32_t* __restrict__ clusterId,
0027 uint32_t numElements) {
0028 __shared__ int32_t charge[maxNumClustersPerModules];
0029 __shared__ uint8_t ok[maxNumClustersPerModules];
0030 __shared__ uint16_t newclusId[maxNumClustersPerModules];
0031
0032 constexpr int startBPIX2 = TrackerTraits::layerStart[1];
0033 [[maybe_unused]] constexpr int nMaxModules = TrackerTraits::numberOfModules;
0034
0035 assert(nMaxModules < maxNumModules);
0036 assert(startBPIX2 < nMaxModules);
0037
0038 auto firstModule = blockIdx.x;
0039 auto endModule = moduleStart[0];
0040 for (auto module = firstModule; module < endModule; module += gridDim.x) {
0041 auto firstPixel = moduleStart[1 + module];
0042 auto thisModuleId = id[firstPixel];
0043 while (thisModuleId == invalidModuleId and firstPixel < numElements) {
0044
0045 ++firstPixel;
0046 thisModuleId = id[firstPixel];
0047 }
0048 if (firstPixel >= numElements) {
0049
0050 break;
0051 }
0052 if (thisModuleId != moduleId[module]) {
0053
0054 continue;
0055 }
0056 assert(thisModuleId < nMaxModules);
0057
0058 auto nclus = nClustersInModule[thisModuleId];
0059 if (nclus == 0)
0060 continue;
0061
0062 if (threadIdx.x == 0 && nclus > maxNumClustersPerModules)
0063 printf("Warning too many clusters in module %d in block %d: %d > %d\n",
0064 thisModuleId,
0065 blockIdx.x,
0066 nclus,
0067 maxNumClustersPerModules);
0068
0069 auto first = firstPixel + threadIdx.x;
0070
0071 if (nclus > maxNumClustersPerModules) {
0072
0073 for (auto i = first; i < numElements; i += blockDim.x) {
0074 if (id[i] == invalidModuleId)
0075 continue;
0076 if (id[i] != thisModuleId)
0077 break;
0078 if (clusterId[i] >= maxNumClustersPerModules) {
0079 id[i] = invalidModuleId;
0080 clusterId[i] = invalidModuleId;
0081 }
0082 }
0083 nclus = maxNumClustersPerModules;
0084 }
0085
0086 #ifdef GPU_DEBUG
0087 if (thisModuleId % 100 == 1)
0088 if (threadIdx.x == 0)
0089 printf("start cluster charge cut for module %d in block %d\n", thisModuleId, blockIdx.x);
0090 #endif
0091
0092 assert(nclus <= maxNumClustersPerModules);
0093 for (auto i = threadIdx.x; i < nclus; i += blockDim.x) {
0094 charge[i] = 0;
0095 }
0096 __syncthreads();
0097
0098 for (auto i = first; i < numElements; i += blockDim.x) {
0099 if (id[i] == invalidModuleId)
0100 continue;
0101 if (id[i] != thisModuleId)
0102 break;
0103 atomicAdd(&charge[clusterId[i]], adc[i]);
0104 }
0105 __syncthreads();
0106
0107 auto chargeCut = clusterThresholds.getThresholdForLayerOnCondition(thisModuleId < startBPIX2);
0108
0109 bool good = true;
0110 for (auto i = threadIdx.x; i < nclus; i += blockDim.x) {
0111 newclusId[i] = ok[i] = charge[i] >= chargeCut ? 1 : 0;
0112 if (0 == ok[i])
0113 good = false;
0114 }
0115
0116
0117 if (__syncthreads_and(good))
0118 continue;
0119
0120
0121 __shared__ uint16_t ws[32];
0122 cms::cuda::blockPrefixScan(newclusId, nclus, ws);
0123
0124 assert(nclus > newclusId[nclus - 1]);
0125
0126 nClustersInModule[thisModuleId] = newclusId[nclus - 1];
0127
0128
0129 for (auto i = first; i < numElements; i += blockDim.x) {
0130 if (id[i] == invalidModuleId)
0131 continue;
0132 if (id[i] != thisModuleId)
0133 break;
0134 if (0 == ok[clusterId[i]])
0135 clusterId[i] = id[i] = invalidModuleId;
0136 else
0137 clusterId[i] = newclusId[clusterId[i]] - 1;
0138 }
0139
0140
0141 __syncthreads();
0142 }
0143 }
0144
0145 }
0146
0147 #endif