Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2023-03-17 11:19:33

0001 #ifndef RecoLocalTracker_SiPixelClusterizer_plugins_gpuClusterChargeCut_h
0002 #define RecoLocalTracker_SiPixelClusterizer_plugins_gpuClusterChargeCut_h
0003 
0004 #include <cstdint>
0005 #include <cstdio>
0006 
0007 #include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h"
0008 #include "Geometry/CommonTopologies/interface/SimplePixelTopology.h"
0009 #include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h"
0010 #include "HeterogeneousCore/CUDAUtilities/interface/prefixScan.h"
0011 
0012 // local include(s)
0013 #include "SiPixelClusterThresholds.h"
0014 
0015 namespace gpuClustering {
0016 
0017   template <typename TrackerTraits>
0018   __global__ void clusterChargeCut(
0019       SiPixelClusterThresholds
0020           clusterThresholds,             // charge cut on cluster in electrons (for layer 1 and for other layers)
0021       uint16_t* __restrict__ id,         // module id of each pixel (modified if bad cluster)
0022       uint16_t const* __restrict__ adc,  //  charge of each pixel
0023       uint32_t const* __restrict__ moduleStart,  // index of the first pixel of each module
0024       uint32_t* __restrict__ nClustersInModule,  // modified: number of clusters found in each module
0025       uint32_t const* __restrict__ moduleId,     // module id of each module
0026       int32_t* __restrict__ clusterId,           // modified: cluster id of each pixel
0027       uint32_t numElements) {
0028     __shared__ int32_t charge[maxNumClustersPerModules];
0029     __shared__ uint8_t ok[maxNumClustersPerModules];
0030     __shared__ uint16_t newclusId[maxNumClustersPerModules];
0031 
0032     constexpr int startBPIX2 = TrackerTraits::layerStart[1];
0033     [[maybe_unused]] constexpr int nMaxModules = TrackerTraits::numberOfModules;
0034 
0035     assert(nMaxModules < maxNumModules);
0036     assert(startBPIX2 < nMaxModules);
0037 
0038     auto firstModule = blockIdx.x;
0039     auto endModule = moduleStart[0];
0040     for (auto module = firstModule; module < endModule; module += gridDim.x) {
0041       auto firstPixel = moduleStart[1 + module];
0042       auto thisModuleId = id[firstPixel];
0043       while (thisModuleId == invalidModuleId and firstPixel < numElements) {
0044         // skip invalid or duplicate pixels
0045         ++firstPixel;
0046         thisModuleId = id[firstPixel];
0047       }
0048       if (firstPixel >= numElements) {
0049         // reached the end of the input while skipping the invalid pixels, nothing left to do
0050         break;
0051       }
0052       if (thisModuleId != moduleId[module]) {
0053         // reached the end of the module while skipping the invalid pixels, skip this module
0054         continue;
0055       }
0056       assert(thisModuleId < nMaxModules);
0057 
0058       auto nclus = nClustersInModule[thisModuleId];
0059       if (nclus == 0)
0060         continue;
0061 
0062       if (threadIdx.x == 0 && nclus > maxNumClustersPerModules)
0063         printf("Warning too many clusters in module %d in block %d: %d > %d\n",
0064                thisModuleId,
0065                blockIdx.x,
0066                nclus,
0067                maxNumClustersPerModules);
0068 
0069       auto first = firstPixel + threadIdx.x;
0070 
0071       if (nclus > maxNumClustersPerModules) {
0072         // remove excess  FIXME find a way to cut charge first....
0073         for (auto i = first; i < numElements; i += blockDim.x) {
0074           if (id[i] == invalidModuleId)
0075             continue;  // not valid
0076           if (id[i] != thisModuleId)
0077             break;  // end of module
0078           if (clusterId[i] >= maxNumClustersPerModules) {
0079             id[i] = invalidModuleId;
0080             clusterId[i] = invalidModuleId;
0081           }
0082         }
0083         nclus = maxNumClustersPerModules;
0084       }
0085 
0086 #ifdef GPU_DEBUG
0087       if (thisModuleId % 100 == 1)
0088         if (threadIdx.x == 0)
0089           printf("start cluster charge cut for module %d in block %d\n", thisModuleId, blockIdx.x);
0090 #endif
0091 
0092       assert(nclus <= maxNumClustersPerModules);
0093       for (auto i = threadIdx.x; i < nclus; i += blockDim.x) {
0094         charge[i] = 0;
0095       }
0096       __syncthreads();
0097 
0098       for (auto i = first; i < numElements; i += blockDim.x) {
0099         if (id[i] == invalidModuleId)
0100           continue;  // not valid
0101         if (id[i] != thisModuleId)
0102           break;  // end of module
0103         atomicAdd(&charge[clusterId[i]], adc[i]);
0104       }
0105       __syncthreads();
0106 
0107       auto chargeCut = clusterThresholds.getThresholdForLayerOnCondition(thisModuleId < startBPIX2);
0108 
0109       bool good = true;
0110       for (auto i = threadIdx.x; i < nclus; i += blockDim.x) {
0111         newclusId[i] = ok[i] = charge[i] >= chargeCut ? 1 : 0;
0112         if (0 == ok[i])
0113           good = false;
0114       }
0115 
0116       // if all clusters above threshold do nothing
0117       if (__syncthreads_and(good))
0118         continue;
0119 
0120       // renumber
0121       __shared__ uint16_t ws[32];
0122       cms::cuda::blockPrefixScan(newclusId, nclus, ws);
0123 
0124       assert(nclus > newclusId[nclus - 1]);
0125 
0126       nClustersInModule[thisModuleId] = newclusId[nclus - 1];
0127 
0128       // reassign id
0129       for (auto i = first; i < numElements; i += blockDim.x) {
0130         if (id[i] == invalidModuleId)
0131           continue;  // not valid
0132         if (id[i] != thisModuleId)
0133           break;  // end of module
0134         if (0 == ok[clusterId[i]])
0135           clusterId[i] = id[i] = invalidModuleId;
0136         else
0137           clusterId[i] = newclusId[clusterId[i]] - 1;
0138       }
0139 
0140       //done
0141       __syncthreads();
0142     }  // loop on modules
0143   }
0144 
0145 }  // namespace gpuClustering
0146 
0147 #endif  // RecoLocalTracker_SiPixelClusterizer_plugins_gpuClusterChargeCut_h