File indexing completed on 2023-10-25 10:00:38
0001 #ifndef RecoLocalTracker_SiPixelRecHits_plugins_gpuPixelRecHits_h
0002 #define RecoLocalTracker_SiPixelRecHits_plugins_gpuPixelRecHits_h
0003
0004 #include <cstdint>
0005 #include <cstdio>
0006 #include <limits>
0007
0008 #include "CUDADataFormats/BeamSpot/interface/BeamSpotCUDA.h"
0009 #include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h"
0010 #include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHitsUtilities.h"
0011 #include "DataFormats/Math/interface/approx_atan2.h"
0012 #include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h"
0013 #include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h"
0014 #include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h"
0015
0016
0017 namespace gpuPixelRecHits {
0018
0019 template <typename TrackerTraits>
0020 __global__ void getHits(pixelCPEforGPU::ParamsOnGPUT<TrackerTraits> const* __restrict__ cpeParams,
0021 BeamSpotPOD const* __restrict__ bs,
0022 SiPixelDigisCUDASOAConstView digis,
0023 int numElements,
0024 SiPixelClustersCUDASOAConstView clusters,
0025 TrackingRecHitSoAView<TrackerTraits> hits) {
0026
0027
0028
0029
0030
0031 assert(cpeParams);
0032
0033
0034 if (0 == blockIdx.x) {
0035 auto& agc = hits.averageGeometry();
0036 auto const& ag = cpeParams->averageGeometry();
0037 auto nLadders = TrackerTraits::numberOfLaddersInBarrel;
0038
0039 for (int il = threadIdx.x, nl = nLadders; il < nl; il += blockDim.x) {
0040 agc.ladderZ[il] = ag.ladderZ[il] - bs->z;
0041 agc.ladderX[il] = ag.ladderX[il] - bs->x;
0042 agc.ladderY[il] = ag.ladderY[il] - bs->y;
0043 agc.ladderR[il] = sqrt(agc.ladderX[il] * agc.ladderX[il] + agc.ladderY[il] * agc.ladderY[il]);
0044 agc.ladderMinZ[il] = ag.ladderMinZ[il] - bs->z;
0045 agc.ladderMaxZ[il] = ag.ladderMaxZ[il] - bs->z;
0046 }
0047
0048 if (0 == threadIdx.x) {
0049 agc.endCapZ[0] = ag.endCapZ[0] - bs->z;
0050 agc.endCapZ[1] = ag.endCapZ[1] - bs->z;
0051 }
0052 }
0053
0054
0055 using gpuClustering::invalidModuleId;
0056 constexpr int32_t MaxHitsInIter = pixelCPEforGPU::MaxHitsInIter;
0057
0058 using ClusParams = pixelCPEforGPU::ClusParams;
0059
0060
0061 __shared__ ClusParams clusParams;
0062
0063 auto me = clusters[blockIdx.x].moduleId();
0064 int nclus = clusters[me].clusInModule();
0065
0066 if (0 == nclus)
0067 return;
0068 #ifdef GPU_DEBUG
0069 if (threadIdx.x == 0) {
0070 auto k = clusters[1 + blockIdx.x].moduleStart();
0071 while (digis[k].moduleId() == invalidModuleId)
0072 ++k;
0073 assert(digis[k].moduleId() == me);
0074 }
0075
0076 if (me % 100 == 1)
0077 if (threadIdx.x == 0)
0078 printf("hitbuilder: %d clusters in module %d. will write at %d\n", nclus, me, clusters[me].clusModuleStart());
0079 #endif
0080
0081 for (int startClus = 0, endClus = nclus; startClus < endClus; startClus += MaxHitsInIter) {
0082 int nClusInIter = std::min(MaxHitsInIter, endClus - startClus);
0083 int lastClus = startClus + nClusInIter;
0084 assert(nClusInIter <= nclus);
0085 assert(nClusInIter > 0);
0086 assert(lastClus <= nclus);
0087
0088 assert(nclus > MaxHitsInIter || (0 == startClus && nClusInIter == nclus && lastClus == nclus));
0089
0090
0091 for (int ic = threadIdx.x; ic < nClusInIter; ic += blockDim.x) {
0092 clusParams.minRow[ic] = std::numeric_limits<uint32_t>::max();
0093 clusParams.maxRow[ic] = 0;
0094 clusParams.minCol[ic] = std::numeric_limits<uint32_t>::max();
0095 clusParams.maxCol[ic] = 0;
0096 clusParams.charge[ic] = 0;
0097 clusParams.q_f_X[ic] = 0;
0098 clusParams.q_l_X[ic] = 0;
0099 clusParams.q_f_Y[ic] = 0;
0100 clusParams.q_l_Y[ic] = 0;
0101 }
0102
0103 __syncthreads();
0104
0105
0106 auto first = clusters[1 + blockIdx.x].moduleStart() + threadIdx.x;
0107 for (int i = first; i < numElements; i += blockDim.x) {
0108 auto id = digis[i].moduleId();
0109 if (id == invalidModuleId)
0110 continue;
0111 if (id != me)
0112 break;
0113 auto cl = digis[i].clus();
0114 if (cl < startClus || cl >= lastClus)
0115 continue;
0116 cl -= startClus;
0117 assert(cl >= 0);
0118 assert(cl < MaxHitsInIter);
0119 auto x = digis[i].xx();
0120 auto y = digis[i].yy();
0121 atomicMin(&clusParams.minRow[cl], x);
0122 atomicMax(&clusParams.maxRow[cl], x);
0123 atomicMin(&clusParams.minCol[cl], y);
0124 atomicMax(&clusParams.maxCol[cl], y);
0125 }
0126
0127 __syncthreads();
0128
0129 auto pixmx = cpeParams->detParams(me).pixmx;
0130 for (int i = first; i < numElements; i += blockDim.x) {
0131 auto id = digis[i].moduleId();
0132 if (id == invalidModuleId)
0133 continue;
0134 if (id != me)
0135 break;
0136 auto cl = digis[i].clus();
0137 if (cl < startClus || cl >= lastClus)
0138 continue;
0139 cl -= startClus;
0140 assert(cl >= 0);
0141 assert(cl < MaxHitsInIter);
0142 auto x = digis[i].xx();
0143 auto y = digis[i].yy();
0144 auto ch = digis[i].adc();
0145 atomicAdd(&clusParams.charge[cl], ch);
0146 ch = std::min(ch, pixmx);
0147 if (clusParams.minRow[cl] == x)
0148 atomicAdd(&clusParams.q_f_X[cl], ch);
0149 if (clusParams.maxRow[cl] == x)
0150 atomicAdd(&clusParams.q_l_X[cl], ch);
0151 if (clusParams.minCol[cl] == y)
0152 atomicAdd(&clusParams.q_f_Y[cl], ch);
0153 if (clusParams.maxCol[cl] == y)
0154 atomicAdd(&clusParams.q_l_Y[cl], ch);
0155 }
0156
0157 __syncthreads();
0158
0159
0160
0161 first = clusters[me].clusModuleStart() + startClus;
0162 for (int ic = threadIdx.x; ic < nClusInIter; ic += blockDim.x) {
0163 auto h = first + ic;
0164
0165 assert(h < hits.nHits());
0166 assert(h < clusters[me + 1].clusModuleStart());
0167
0168 pixelCPEforGPU::position<TrackerTraits>(cpeParams->commonParams(), cpeParams->detParams(me), clusParams, ic);
0169
0170 pixelCPEforGPU::errorFromDB<TrackerTraits>(cpeParams->commonParams(), cpeParams->detParams(me), clusParams, ic);
0171
0172
0173 hits[h].chargeAndStatus().charge = clusParams.charge[ic];
0174 hits[h].chargeAndStatus().status = clusParams.status[ic];
0175 hits[h].detectorIndex() = me;
0176
0177 float xl, yl;
0178 hits[h].xLocal() = xl = clusParams.xpos[ic];
0179 hits[h].yLocal() = yl = clusParams.ypos[ic];
0180
0181 hits[h].clusterSizeX() = clusParams.xsize[ic];
0182 hits[h].clusterSizeY() = clusParams.ysize[ic];
0183
0184 hits[h].xerrLocal() = clusParams.xerr[ic] * clusParams.xerr[ic] + cpeParams->detParams(me).apeXX;
0185 hits[h].yerrLocal() = clusParams.yerr[ic] * clusParams.yerr[ic] + cpeParams->detParams(me).apeYY;
0186
0187
0188 float xg, yg, zg;
0189
0190 cpeParams->detParams(me).frame.toGlobal(xl, yl, xg, yg, zg);
0191
0192 xg -= bs->x;
0193 yg -= bs->y;
0194 zg -= bs->z;
0195
0196 hits[h].xGlobal() = xg;
0197 hits[h].yGlobal() = yg;
0198 hits[h].zGlobal() = zg;
0199
0200 hits[h].rGlobal() = std::sqrt(xg * xg + yg * yg);
0201 hits[h].iphi() = unsafe_atan2s<7>(yg, xg);
0202 }
0203 __syncthreads();
0204 }
0205 }
0206
0207 }
0208
0209 #endif