File indexing completed on 2021-12-24 02:22:16
0001 #ifndef RecoLocalTracker_SiPixelRecHits_plugins_gpuPixelRecHits_h
0002 #define RecoLocalTracker_SiPixelRecHits_plugins_gpuPixelRecHits_h
0003
0004 #include <cstdint>
0005 #include <cstdio>
0006 #include <limits>
0007
0008 #include "CUDADataFormats/BeamSpot/interface/BeamSpotCUDA.h"
0009 #include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h"
0010 #include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h"
0011 #include "DataFormats/Math/interface/approx_atan2.h"
0012 #include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h"
0013 #include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h"
0014 #include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDASOAView.h"
0015
0016 namespace gpuPixelRecHits {
0017
0018 __global__ void getHits(pixelCPEforGPU::ParamsOnGPU const* __restrict__ cpeParams,
0019 BeamSpotPOD const* __restrict__ bs,
0020 SiPixelDigisCUDASOAView const digis,
0021 int numElements,
0022 SiPixelClustersCUDA::SiPixelClustersCUDASOAView const* __restrict__ pclusters,
0023 TrackingRecHit2DSOAView* phits) {
0024
0025
0026
0027
0028 assert(phits);
0029 assert(cpeParams);
0030 auto& hits = *phits;
0031
0032 auto const& clusters = *pclusters;
0033 auto isPhase2 = cpeParams->commonParams().isPhase2;
0034
0035 if (0 == blockIdx.x) {
0036 auto& agc = hits.averageGeometry();
0037 auto const& ag = cpeParams->averageGeometry();
0038 auto nLadders =
0039 isPhase2 ? phase2PixelTopology::numberOfLaddersInBarrel : phase1PixelTopology::numberOfLaddersInBarrel;
0040
0041 for (int il = threadIdx.x, nl = nLadders; il < nl; il += blockDim.x) {
0042 agc.ladderZ[il] = ag.ladderZ[il] - bs->z;
0043 agc.ladderX[il] = ag.ladderX[il] - bs->x;
0044 agc.ladderY[il] = ag.ladderY[il] - bs->y;
0045 agc.ladderR[il] = sqrt(agc.ladderX[il] * agc.ladderX[il] + agc.ladderY[il] * agc.ladderY[il]);
0046 agc.ladderMinZ[il] = ag.ladderMinZ[il] - bs->z;
0047 agc.ladderMaxZ[il] = ag.ladderMaxZ[il] - bs->z;
0048 }
0049
0050 if (0 == threadIdx.x) {
0051 agc.endCapZ[0] = ag.endCapZ[0] - bs->z;
0052 agc.endCapZ[1] = ag.endCapZ[1] - bs->z;
0053
0054 }
0055 }
0056
0057
0058 using gpuClustering::invalidModuleId;
0059 constexpr int32_t MaxHitsInIter = pixelCPEforGPU::MaxHitsInIter;
0060
0061 using ClusParams = pixelCPEforGPU::ClusParams;
0062
0063
0064 __shared__ ClusParams clusParams;
0065
0066 auto me = clusters.moduleId(blockIdx.x);
0067 int nclus = clusters.clusInModule(me);
0068
0069 if (0 == nclus)
0070 return;
0071 #ifdef GPU_DEBUG
0072 if (threadIdx.x == 0) {
0073 auto k = clusters.moduleStart(1 + blockIdx.x);
0074 while (digis.moduleInd(k) == invalidModuleId)
0075 ++k;
0076 assert(digis.moduleInd(k) == me);
0077 }
0078 #endif
0079 #ifdef GPU_DEBUG
0080 if (me % 100 == 1)
0081 if (threadIdx.x == 0)
0082 printf("hitbuilder: %d clusters in module %d. will write at %d\n", nclus, me, clusters.clusModuleStart(me));
0083 #endif
0084 for (int startClus = 0, endClus = nclus; startClus < endClus; startClus += MaxHitsInIter) {
0085 int nClusInIter = std::min(MaxHitsInIter, endClus - startClus);
0086 int lastClus = startClus + nClusInIter;
0087 assert(nClusInIter <= nclus);
0088 assert(nClusInIter > 0);
0089 assert(lastClus <= nclus);
0090
0091 assert(nclus > MaxHitsInIter || (0 == startClus && nClusInIter == nclus && lastClus == nclus));
0092
0093
0094 for (int ic = threadIdx.x; ic < nClusInIter; ic += blockDim.x) {
0095 clusParams.minRow[ic] = std::numeric_limits<uint32_t>::max();
0096 clusParams.maxRow[ic] = 0;
0097 clusParams.minCol[ic] = std::numeric_limits<uint32_t>::max();
0098 clusParams.maxCol[ic] = 0;
0099 clusParams.charge[ic] = 0;
0100 clusParams.q_f_X[ic] = 0;
0101 clusParams.q_l_X[ic] = 0;
0102 clusParams.q_f_Y[ic] = 0;
0103 clusParams.q_l_Y[ic] = 0;
0104 }
0105
0106 __syncthreads();
0107
0108
0109 auto first = clusters.moduleStart(1 + blockIdx.x) + threadIdx.x;
0110 for (int i = first; i < numElements; i += blockDim.x) {
0111 auto id = digis.moduleInd(i);
0112 if (id == invalidModuleId)
0113 continue;
0114 if (id != me)
0115 break;
0116 auto cl = digis.clus(i);
0117 if (cl < startClus || cl >= lastClus)
0118 continue;
0119 cl -= startClus;
0120 assert(cl >= 0);
0121 assert(cl < MaxHitsInIter);
0122 auto x = digis.xx(i);
0123 auto y = digis.yy(i);
0124 atomicMin(&clusParams.minRow[cl], x);
0125 atomicMax(&clusParams.maxRow[cl], x);
0126 atomicMin(&clusParams.minCol[cl], y);
0127 atomicMax(&clusParams.maxCol[cl], y);
0128 }
0129
0130 __syncthreads();
0131
0132 auto pixmx = cpeParams->detParams(me).pixmx;
0133 for (int i = first; i < numElements; i += blockDim.x) {
0134 auto id = digis.moduleInd(i);
0135 if (id == invalidModuleId)
0136 continue;
0137 if (id != me)
0138 break;
0139 auto cl = digis.clus(i);
0140 if (cl < startClus || cl >= lastClus)
0141 continue;
0142 cl -= startClus;
0143 assert(cl >= 0);
0144 assert(cl < MaxHitsInIter);
0145 auto x = digis.xx(i);
0146 auto y = digis.yy(i);
0147 auto ch = digis.adc(i);
0148 atomicAdd(&clusParams.charge[cl], ch);
0149 ch = std::min(ch, pixmx);
0150 if (clusParams.minRow[cl] == x)
0151 atomicAdd(&clusParams.q_f_X[cl], ch);
0152 if (clusParams.maxRow[cl] == x)
0153 atomicAdd(&clusParams.q_l_X[cl], ch);
0154 if (clusParams.minCol[cl] == y)
0155 atomicAdd(&clusParams.q_f_Y[cl], ch);
0156 if (clusParams.maxCol[cl] == y)
0157 atomicAdd(&clusParams.q_l_Y[cl], ch);
0158 }
0159
0160 __syncthreads();
0161
0162
0163
0164 first = clusters.clusModuleStart(me) + startClus;
0165 for (int ic = threadIdx.x; ic < nClusInIter; ic += blockDim.x) {
0166 auto h = first + ic;
0167
0168 assert(h < hits.nHits());
0169 assert(h < clusters.clusModuleStart(me + 1));
0170
0171 pixelCPEforGPU::position(cpeParams->commonParams(), cpeParams->detParams(me), clusParams, ic);
0172 if (!isPhase2)
0173 pixelCPEforGPU::errorFromDB(cpeParams->commonParams(), cpeParams->detParams(me), clusParams, ic);
0174 else
0175 pixelCPEforGPU::errorFromSize(cpeParams->commonParams(), cpeParams->detParams(me), clusParams, ic);
0176
0177
0178 hits.setChargeAndStatus(h, clusParams.charge[ic], clusParams.status[ic]);
0179 hits.detectorIndex(h) = me;
0180
0181 float xl, yl;
0182 hits.xLocal(h) = xl = clusParams.xpos[ic];
0183 hits.yLocal(h) = yl = clusParams.ypos[ic];
0184
0185 hits.clusterSizeX(h) = clusParams.xsize[ic];
0186 hits.clusterSizeY(h) = clusParams.ysize[ic];
0187
0188 hits.xerrLocal(h) = clusParams.xerr[ic] * clusParams.xerr[ic] + cpeParams->detParams(me).apeXX;
0189 hits.yerrLocal(h) = clusParams.yerr[ic] * clusParams.yerr[ic] + cpeParams->detParams(me).apeYY;
0190
0191
0192 float xg, yg, zg;
0193
0194 cpeParams->detParams(me).frame.toGlobal(xl, yl, xg, yg, zg);
0195
0196 xg -= bs->x;
0197 yg -= bs->y;
0198 zg -= bs->z;
0199
0200 hits.xGlobal(h) = xg;
0201 hits.yGlobal(h) = yg;
0202 hits.zGlobal(h) = zg;
0203
0204 hits.rGlobal(h) = std::sqrt(xg * xg + yg * yg);
0205 hits.iphi(h) = unsafe_atan2s<7>(yg, xg);
0206 }
0207 __syncthreads();
0208 }
0209 }
0210
0211 }
0212
0213 #endif