SiPixelClusterizer/plugins/gpuClusterChargeCut.h

0001 #ifndef RecoLocalTracker_SiPixelClusterizer_plugins_gpuClusterChargeCut_h
0002 #define RecoLocalTracker_SiPixelClusterizer_plugins_gpuClusterChargeCut_h
0003
0004 #include <cstdint>
0005 #include <cstdio>
0006
0007 #include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h"
0008 #include "Geometry/CommonTopologies/interface/SimplePixelTopology.h"
0009 #include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h"
0010 #include "HeterogeneousCore/CUDAUtilities/interface/prefixScan.h"
0011 #include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelClusterThresholds.h"
0012
0013 namespace gpuClustering {
0014
0015   template <typename TrackerTraits>
0016   __global__ void clusterChargeCut(
0017       SiPixelClusterThresholds
0018           clusterThresholds,             // charge cut on cluster in electrons (for layer 1 and for other layers)
0019       uint16_t* __restrict__ id,         // module id of each pixel (modified if bad cluster)
0020       uint16_t const* __restrict__ adc,  //  charge of each pixel
0021       uint32_t const* __restrict__ moduleStart,  // index of the first pixel of each module
0022       uint32_t* __restrict__ nClustersInModule,  // modified: number of clusters found in each module
0023       uint32_t const* __restrict__ moduleId,     // module id of each module
0024       int32_t* __restrict__ clusterId,           // modified: cluster id of each pixel
0025       uint32_t numElements) {
0026     constexpr int32_t maxNumClustersPerModules = TrackerTraits::maxNumClustersPerModules;
0027
0028     __shared__ int32_t charge[maxNumClustersPerModules];
0029     __shared__ uint8_t ok[maxNumClustersPerModules];
0030     __shared__ uint16_t newclusId[maxNumClustersPerModules];
0031
0032     constexpr int startBPIX2 = TrackerTraits::layerStart[1];
0033
0034     assert(TrackerTraits::numberOfModules < maxNumModules);
0035     assert(startBPIX2 < TrackerTraits::numberOfModules);
0036
0037     auto firstModule = blockIdx.x;
0038     auto endModule = moduleStart[0];
0039     for (auto module = firstModule; module < endModule; module += gridDim.x) {
0040       auto firstPixel = moduleStart[1 + module];
0041       auto thisModuleId = id[firstPixel];
0042       while (thisModuleId == invalidModuleId and firstPixel < numElements) {
0043         // skip invalid or duplicate pixels
0044         ++firstPixel;
0045         thisModuleId = id[firstPixel];
0046       }
0047       if (firstPixel >= numElements) {
0048         // reached the end of the input while skipping the invalid pixels, nothing left to do
0049         break;
0050       }
0051       if (thisModuleId != moduleId[module]) {
0052         // reached the end of the module while skipping the invalid pixels, skip this module
0053         continue;
0054       }
0055       assert(thisModuleId < TrackerTraits::numberOfModules);
0056
0057       auto nclus = nClustersInModule[thisModuleId];
0058       if (nclus == 0)
0059         continue;
0060
0061       if (threadIdx.x == 0 && nclus > maxNumClustersPerModules)
0062         printf("Warning too many clusters in module %d in block %d: %d > %d\n",
0063                thisModuleId,
0064                blockIdx.x,
0065                nclus,
0066                maxNumClustersPerModules);
0067
0068       auto first = firstPixel + threadIdx.x;
0069
0070       if (nclus > maxNumClustersPerModules) {
0071         // remove excess  FIXME find a way to cut charge first....
0072         for (auto i = first; i < numElements; i += blockDim.x) {
0073           if (id[i] == invalidModuleId)
0074             continue;  // not valid
0075           if (id[i] != thisModuleId)
0076             break;  // end of module
0077           if (clusterId[i] >= maxNumClustersPerModules) {
0078             id[i] = invalidModuleId;
0079             clusterId[i] = invalidModuleId;
0080           }
0081         }
0082         nclus = maxNumClustersPerModules;
0083       }
0084
0085 #ifdef GPU_DEBUG
0086       if (thisModuleId % 100 == 1)
0087         if (threadIdx.x == 0)
0088           printf("start cluster charge cut for module %d in block %d\n", thisModuleId, blockIdx.x);
0089 #endif
0090
0091       assert(nclus <= maxNumClustersPerModules);
0092       for (auto i = threadIdx.x; i < nclus; i += blockDim.x) {
0093         charge[i] = 0;
0094       }
0095       __syncthreads();
0096
0097       for (auto i = first; i < numElements; i += blockDim.x) {
0098         if (id[i] == invalidModuleId)
0099           continue;  // not valid
0100         if (id[i] != thisModuleId)
0101           break;  // end of module
0102         atomicAdd(&charge[clusterId[i]], adc[i]);
0103       }
0104       __syncthreads();
0105
0106       auto chargeCut = clusterThresholds.getThresholdForLayerOnCondition(thisModuleId < startBPIX2);
0107
0108       bool good = true;
0109       for (auto i = threadIdx.x; i < nclus; i += blockDim.x) {
0110         newclusId[i] = ok[i] = charge[i] >= chargeCut ? 1 : 0;
0111         if (0 == ok[i])
0112           good = false;
0113       }
0114
0115       // if all clusters above threshold do nothing
0116       if (__syncthreads_and(good))
0117         continue;
0118
0119       // renumber
0120       __shared__ uint16_t ws[32];
0121       constexpr auto maxThreads = 1024;
0122       auto minClust = nclus > maxThreads ? maxThreads : nclus;
0123
0124       cms::cuda::blockPrefixScan(newclusId, newclusId, minClust, ws);
0125       if constexpr (maxNumClustersPerModules > maxThreads)  //only if needed
0126       {
0127         for (uint32_t offset = maxThreads; offset < nclus; offset += maxThreads) {
0128           cms::cuda::blockPrefixScan(newclusId + offset, newclusId + offset, nclus - offset, ws);
0129           for (uint32_t i = threadIdx.x + offset; i < nclus; i += blockDim.x) {
0130             uint32_t prevBlockEnd = ((i / maxThreads) * maxThreads) - 1;
0131             newclusId[i] += newclusId[prevBlockEnd];
0132           }
0133           __syncthreads();
0134         }
0135       }
0136       assert(nclus > newclusId[nclus - 1]);
0137
0138       nClustersInModule[thisModuleId] = newclusId[nclus - 1];
0139
0140       // reassign id
0141       for (auto i = first; i < numElements; i += blockDim.x) {
0142         if (id[i] == invalidModuleId)
0143           continue;  // not valid
0144         if (id[i] != thisModuleId)
0145           break;  // end of module
0146         if (0 == ok[clusterId[i]])
0147           clusterId[i] = id[i] = invalidModuleId;
0148         else
0149           clusterId[i] = newclusId[clusterId[i]] - 1;
0150       }
0151
0152       // done
0153       __syncthreads();
0154     }  // loop on modules
0155   }
0156
0157 }  // namespace gpuClustering
0158
0159 #endif  // RecoLocalTracker_SiPixelClusterizer_plugins_gpuClusterChargeCut_h