Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2025-01-22 07:34:24

0001 #ifndef RecoLocalTracker_SiPixelRecHits_alpaka_PixelRecHits_h
0002 #define RecoLocalTracker_SiPixelRecHits_alpaka_PixelRecHits_h
0003 
0004 // C++ headers
0005 #include <cassert>
0006 #include <cstdint>
0007 #include <limits>
0008 #include <type_traits>
0009 
0010 // Alpaka headers
0011 #include <alpaka/alpaka.hpp>
0012 
0013 // CMSSW headers
0014 #include "DataFormats/BeamSpot/interface/BeamSpotPOD.h"
0015 #include "DataFormats/Math/interface/approx_atan2.h"
0016 #include "DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h"
0017 #include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersSoA.h"
0018 #include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisSoA.h"
0019 #include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h"
0020 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0021 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0022 #include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforDevice.h"
0023 
0024 //#define GPU_DEBUG
0025 
0026 namespace ALPAKA_ACCELERATOR_NAMESPACE {
0027   namespace pixelRecHits {
0028 
0029     template <typename TrackerTraits>
0030     class GetHits {
0031     public:
0032       ALPAKA_FN_ACC void operator()(Acc1D const& acc,
0033                                     pixelCPEforDevice::ParamsOnDeviceT<TrackerTraits> const* __restrict__ cpeParams,
0034                                     BeamSpotPOD const* __restrict__ bs,
0035                                     SiPixelDigisSoAConstView digis,
0036                                     uint32_t numElements,
0037                                     uint32_t nonEmptyModules,
0038                                     SiPixelClustersSoAConstView clusters,
0039                                     TrackingRecHitSoAView<TrackerTraits> hits) const {
0040         ALPAKA_ASSERT_ACC(cpeParams);
0041 
0042         // outer loop: one block per module
0043         for (uint32_t module : cms::alpakatools::independent_groups(acc, nonEmptyModules)) {
0044           // This is necessary only once - consider moving it somewhere else.
0045           // Copy the average geometry corrected by the beamspot.
0046           if (0 == module) {
0047             auto& agc = hits.averageGeometry();
0048             auto const& ag = cpeParams->averageGeometry();
0049             auto nLadders = TrackerTraits::numberOfLaddersInBarrel;
0050 
0051             for (uint32_t il : cms::alpakatools::independent_group_elements(acc, nLadders)) {
0052               agc.ladderZ[il] = ag.ladderZ[il] - bs->z;
0053               agc.ladderX[il] = ag.ladderX[il] - bs->x;
0054               agc.ladderY[il] = ag.ladderY[il] - bs->y;
0055               agc.ladderR[il] = sqrt(agc.ladderX[il] * agc.ladderX[il] + agc.ladderY[il] * agc.ladderY[il]);
0056               agc.ladderMinZ[il] = ag.ladderMinZ[il] - bs->z;
0057               agc.ladderMaxZ[il] = ag.ladderMaxZ[il] - bs->z;
0058             }
0059 
0060             if (cms::alpakatools::once_per_block(acc)) {
0061               agc.endCapZ[0] = ag.endCapZ[0] - bs->z;
0062               agc.endCapZ[1] = ag.endCapZ[1] - bs->z;
0063             }
0064           }
0065 
0066           // to be moved in common namespace...
0067           using pixelClustering::invalidModuleId;
0068           constexpr int32_t maxHitsInIter = pixelCPEforDevice::MaxHitsInIter;
0069 
0070           auto me = clusters[module].moduleId();
0071           int nclus = clusters[me].clusInModule();
0072 
0073           // skip empty modules
0074           if (0 == nclus)
0075             continue;
0076 
0077 #ifdef GPU_DEBUG
0078           if (cms::alpakatools::once_per_block(acc)) {
0079             auto k = clusters[1 + module].moduleStart();
0080             while (digis[k].moduleId() == invalidModuleId)
0081               ++k;
0082             ALPAKA_ASSERT_ACC(digis[k].moduleId() == me);
0083           }
0084 
0085           if (me % 100 == 1)
0086             if (cms::alpakatools::once_per_block(acc))
0087               printf("hitbuilder: %d clusters in module %d. will write at %d\n",
0088                      nclus,
0089                      me,
0090                      clusters[me].clusModuleStart());
0091 #endif
0092 
0093           auto& clusParams = alpaka::declareSharedVar<pixelCPEforDevice::ClusParams, __COUNTER__>(acc);
0094           for (int startClus = 0, endClus = nclus; startClus < endClus; startClus += maxHitsInIter) {
0095             auto first = clusters[1 + module].moduleStart();
0096 
0097             int nClusInIter = alpaka::math::min(acc, maxHitsInIter, endClus - startClus);
0098             int lastClus = startClus + nClusInIter;
0099             ALPAKA_ASSERT_ACC(nClusInIter <= nclus);
0100             ALPAKA_ASSERT_ACC(nClusInIter > 0);
0101             ALPAKA_ASSERT_ACC(lastClus <= nclus);
0102             ALPAKA_ASSERT_ACC(nclus > maxHitsInIter || (0 == startClus && nClusInIter == nclus && lastClus == nclus));
0103 
0104             // init
0105             for (uint32_t ic : cms::alpakatools::independent_group_elements(acc, nClusInIter)) {
0106               clusParams.minRow[ic] = std::numeric_limits<uint32_t>::max();
0107               clusParams.maxRow[ic] = 0;
0108               clusParams.minCol[ic] = std::numeric_limits<uint32_t>::max();
0109               clusParams.maxCol[ic] = 0;
0110               clusParams.charge[ic] = 0;
0111               clusParams.q_f_X[ic] = 0;
0112               clusParams.q_l_X[ic] = 0;
0113               clusParams.q_f_Y[ic] = 0;
0114               clusParams.q_l_Y[ic] = 0;
0115             }
0116 
0117             alpaka::syncBlockThreads(acc);
0118 
0119             // one thread or element per "digi"
0120             for (uint32_t i : cms::alpakatools::independent_group_elements(acc, first, numElements)) {
0121               auto id = digis[i].moduleId();
0122               if (id == invalidModuleId)
0123                 continue;  // not valid
0124               if (id != me)
0125                 break;  // end of module
0126               auto cl = digis[i].clus();
0127               if (cl < startClus || cl >= lastClus)
0128                 continue;
0129               cl -= startClus;
0130               ALPAKA_ASSERT_ACC(cl >= 0);
0131               ALPAKA_ASSERT_ACC(cl < maxHitsInIter);
0132               auto x = digis[i].xx();
0133               auto y = digis[i].yy();
0134               alpaka::atomicMin(acc, &clusParams.minRow[cl], (uint32_t)x, alpaka::hierarchy::Threads{});
0135               alpaka::atomicMax(acc, &clusParams.maxRow[cl], (uint32_t)x, alpaka::hierarchy::Threads{});
0136               alpaka::atomicMin(acc, &clusParams.minCol[cl], (uint32_t)y, alpaka::hierarchy::Threads{});
0137               alpaka::atomicMax(acc, &clusParams.maxCol[cl], (uint32_t)y, alpaka::hierarchy::Threads{});
0138             }
0139 
0140             alpaka::syncBlockThreads(acc);
0141 
0142             auto pixmx = cpeParams->detParams(me).pixmx;
0143             for (uint32_t i : cms::alpakatools::independent_group_elements(acc, first, numElements)) {
0144               auto id = digis[i].moduleId();
0145               if (id == invalidModuleId)
0146                 continue;  // not valid
0147               if (id != me)
0148                 break;  // end of module
0149               auto cl = digis[i].clus();
0150               if (cl < startClus || cl >= lastClus)
0151                 continue;
0152               cl -= startClus;
0153               ALPAKA_ASSERT_ACC(cl >= 0);
0154               ALPAKA_ASSERT_ACC(cl < maxHitsInIter);
0155               auto x = digis[i].xx();
0156               auto y = digis[i].yy();
0157               auto ch = digis[i].adc();
0158               alpaka::atomicAdd(acc, &clusParams.charge[cl], (int32_t)ch, alpaka::hierarchy::Threads{});
0159               ch = alpaka::math::min(acc, ch, pixmx);
0160               if (clusParams.minRow[cl] == x)
0161                 alpaka::atomicAdd(acc, &clusParams.q_f_X[cl], (int32_t)ch, alpaka::hierarchy::Threads{});
0162               if (clusParams.maxRow[cl] == x)
0163                 alpaka::atomicAdd(acc, &clusParams.q_l_X[cl], (int32_t)ch, alpaka::hierarchy::Threads{});
0164               if (clusParams.minCol[cl] == y)
0165                 alpaka::atomicAdd(acc, &clusParams.q_f_Y[cl], (int32_t)ch, alpaka::hierarchy::Threads{});
0166               if (clusParams.maxCol[cl] == y)
0167                 alpaka::atomicAdd(acc, &clusParams.q_l_Y[cl], (int32_t)ch, alpaka::hierarchy::Threads{});
0168             }
0169 
0170             alpaka::syncBlockThreads(acc);
0171 
0172             // next one cluster per thread...
0173             first = clusters[me].clusModuleStart() + startClus;
0174             for (uint32_t ic : cms::alpakatools::independent_group_elements(acc, nClusInIter)) {
0175               auto h = first + ic;  // output index in global memory
0176 
0177               assert(h < (uint32_t)hits.metadata().size());
0178               assert(h < clusters[me + 1].clusModuleStart());
0179 
0180               pixelCPEforDevice::position<TrackerTraits>(
0181                   cpeParams->commonParams(), cpeParams->detParams(me), clusParams, ic);
0182 
0183               pixelCPEforDevice::errorFromDB<TrackerTraits>(
0184                   cpeParams->commonParams(), cpeParams->detParams(me), clusParams, ic);
0185 
0186               // store it
0187               hits[h].chargeAndStatus().charge = clusParams.charge[ic];
0188               hits[h].chargeAndStatus().status = clusParams.status[ic];
0189               hits[h].detectorIndex() = me;
0190 
0191               // local coordinates for computations
0192               float xl, yl;
0193               hits[h].xLocal() = xl = clusParams.xpos[ic];
0194               hits[h].yLocal() = yl = clusParams.ypos[ic];
0195 
0196               hits[h].clusterSizeX() = clusParams.xsize[ic];
0197               hits[h].clusterSizeY() = clusParams.ysize[ic];
0198 
0199               hits[h].xerrLocal() = clusParams.xerr[ic] * clusParams.xerr[ic] + cpeParams->detParams(me).apeXX;
0200               hits[h].yerrLocal() = clusParams.yerr[ic] * clusParams.yerr[ic] + cpeParams->detParams(me).apeYY;
0201 
0202               // global coordinates and phi computation
0203               float xg, yg, zg;
0204               cpeParams->detParams(me).frame.toGlobal(xl, yl, xg, yg, zg);
0205               // correct for the beamspot position
0206               xg -= bs->x;
0207               yg -= bs->y;
0208               zg -= bs->z;
0209 
0210               hits[h].xGlobal() = xg;
0211               hits[h].yGlobal() = yg;
0212               hits[h].zGlobal() = zg;
0213               hits[h].rGlobal() = alpaka::math::sqrt(acc, xg * xg + yg * yg);
0214               hits[h].iphi() = unsafe_atan2s<7>(yg, xg);
0215             }
0216             alpaka::syncBlockThreads(acc);
0217           }  // end loop on batches
0218         }
0219       }
0220     };
0221 
0222   }  // namespace pixelRecHits
0223 }  // namespace ALPAKA_ACCELERATOR_NAMESPACE
0224 
0225 #endif  // RecoLocalTracker_SiPixelRecHits_plugins_alpaka_PixelRecHits_h