Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:26:23

0001 #ifndef RecoLocalTracker_SiPixelRecHits_alpaka_PixelRecHits_h
0002 #define RecoLocalTracker_SiPixelRecHits_alpaka_PixelRecHits_h
0003 
0004 // C++ headers
0005 #include <cassert>
0006 #include <cstdint>
0007 #include <limits>
0008 #include <type_traits>
0009 
0010 // Alpaka headers
0011 #include <alpaka/alpaka.hpp>
0012 
0013 // CMSSW headers
0014 #include "DataFormats/BeamSpot/interface/BeamSpotPOD.h"
0015 #include "DataFormats/Math/interface/approx_atan2.h"
0016 #include "DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h"
0017 #include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersSoA.h"
0018 #include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisSoA.h"
0019 #include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h"
0020 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0021 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0022 #include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforDevice.h"
0023 
0024 //#define GPU_DEBUG
0025 
0026 namespace ALPAKA_ACCELERATOR_NAMESPACE {
0027   namespace pixelRecHits {
0028 
0029     template <typename TrackerTraits>
0030     class GetHits {
0031     public:
0032       template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0033       ALPAKA_FN_ACC void operator()(const TAcc& acc,
0034                                     pixelCPEforDevice::ParamsOnDeviceT<TrackerTraits> const* __restrict__ cpeParams,
0035                                     BeamSpotPOD const* __restrict__ bs,
0036                                     SiPixelDigisSoAConstView digis,
0037                                     uint32_t numElements,
0038                                     uint32_t nonEmptyModules,
0039                                     SiPixelClustersSoAConstView clusters,
0040                                     TrackingRecHitSoAView<TrackerTraits> hits) const {
0041         ALPAKA_ASSERT_ACC(cpeParams);
0042 
0043         // outer loop: one block per module
0044         for (uint32_t module : cms::alpakatools::independent_groups(acc, nonEmptyModules)) {
0045           // This is necessary only once - consider moving it somewhere else.
0046           // Copy the average geometry corrected by the beamspot.
0047           if (0 == module) {
0048             auto& agc = hits.averageGeometry();
0049             auto const& ag = cpeParams->averageGeometry();
0050             auto nLadders = TrackerTraits::numberOfLaddersInBarrel;
0051 
0052             for (uint32_t il : cms::alpakatools::independent_group_elements(acc, nLadders)) {
0053               agc.ladderZ[il] = ag.ladderZ[il] - bs->z;
0054               agc.ladderX[il] = ag.ladderX[il] - bs->x;
0055               agc.ladderY[il] = ag.ladderY[il] - bs->y;
0056               agc.ladderR[il] = sqrt(agc.ladderX[il] * agc.ladderX[il] + agc.ladderY[il] * agc.ladderY[il]);
0057               agc.ladderMinZ[il] = ag.ladderMinZ[il] - bs->z;
0058               agc.ladderMaxZ[il] = ag.ladderMaxZ[il] - bs->z;
0059             }
0060 
0061             if (cms::alpakatools::once_per_block(acc)) {
0062               agc.endCapZ[0] = ag.endCapZ[0] - bs->z;
0063               agc.endCapZ[1] = ag.endCapZ[1] - bs->z;
0064             }
0065           }
0066 
0067           // to be moved in common namespace...
0068           using pixelClustering::invalidModuleId;
0069           constexpr int32_t maxHitsInIter = pixelCPEforDevice::MaxHitsInIter;
0070 
0071           auto me = clusters[module].moduleId();
0072           int nclus = clusters[me].clusInModule();
0073 
0074           // skip empty modules
0075           if (0 == nclus)
0076             continue;
0077 
0078 #ifdef GPU_DEBUG
0079           if (cms::alpakatools::once_per_block(acc)) {
0080             auto k = clusters[1 + module].moduleStart();
0081             while (digis[k].moduleId() == invalidModuleId)
0082               ++k;
0083             ALPAKA_ASSERT_ACC(digis[k].moduleId() == me);
0084           }
0085 
0086           if (me % 100 == 1)
0087             if (cms::alpakatools::once_per_block(acc))
0088               printf("hitbuilder: %d clusters in module %d. will write at %d\n",
0089                      nclus,
0090                      me,
0091                      clusters[me].clusModuleStart());
0092 #endif
0093 
0094           auto& clusParams = alpaka::declareSharedVar<pixelCPEforDevice::ClusParams, __COUNTER__>(acc);
0095           for (int startClus = 0, endClus = nclus; startClus < endClus; startClus += maxHitsInIter) {
0096             auto first = clusters[1 + module].moduleStart();
0097 
0098             int nClusInIter = alpaka::math::min(acc, maxHitsInIter, endClus - startClus);
0099             int lastClus = startClus + nClusInIter;
0100             assert(nClusInIter <= nclus);
0101             assert(nClusInIter > 0);
0102             assert(lastClus <= nclus);
0103 
0104             assert(nclus > maxHitsInIter || (0 == startClus && nClusInIter == nclus && lastClus == nclus));
0105 
0106             // init
0107             for (uint32_t ic : cms::alpakatools::independent_group_elements(acc, nClusInIter)) {
0108               clusParams.minRow[ic] = std::numeric_limits<uint32_t>::max();
0109               clusParams.maxRow[ic] = 0;
0110               clusParams.minCol[ic] = std::numeric_limits<uint32_t>::max();
0111               clusParams.maxCol[ic] = 0;
0112               clusParams.charge[ic] = 0;
0113               clusParams.q_f_X[ic] = 0;
0114               clusParams.q_l_X[ic] = 0;
0115               clusParams.q_f_Y[ic] = 0;
0116               clusParams.q_l_Y[ic] = 0;
0117             }
0118 
0119             alpaka::syncBlockThreads(acc);
0120 
0121             // one thread or element per "digi"
0122             for (uint32_t i : cms::alpakatools::independent_group_elements(acc, first, numElements)) {
0123               auto id = digis[i].moduleId();
0124               if (id == invalidModuleId)
0125                 continue;  // not valid
0126               if (id != me)
0127                 break;  // end of module
0128               auto cl = digis[i].clus();
0129               if (cl < startClus || cl >= lastClus)
0130                 continue;
0131               cl -= startClus;
0132               ALPAKA_ASSERT_ACC(cl >= 0);
0133               ALPAKA_ASSERT_ACC(cl < maxHitsInIter);
0134               auto x = digis[i].xx();
0135               auto y = digis[i].yy();
0136               alpaka::atomicMin(acc, &clusParams.minRow[cl], (uint32_t)x, alpaka::hierarchy::Threads{});
0137               alpaka::atomicMax(acc, &clusParams.maxRow[cl], (uint32_t)x, alpaka::hierarchy::Threads{});
0138               alpaka::atomicMin(acc, &clusParams.minCol[cl], (uint32_t)y, alpaka::hierarchy::Threads{});
0139               alpaka::atomicMax(acc, &clusParams.maxCol[cl], (uint32_t)y, alpaka::hierarchy::Threads{});
0140             }
0141 
0142             alpaka::syncBlockThreads(acc);
0143 
0144             auto pixmx = cpeParams->detParams(me).pixmx;
0145             for (uint32_t i : cms::alpakatools::independent_group_elements(acc, first, numElements)) {
0146               auto id = digis[i].moduleId();
0147               if (id == invalidModuleId)
0148                 continue;  // not valid
0149               if (id != me)
0150                 break;  // end of module
0151               auto cl = digis[i].clus();
0152               if (cl < startClus || cl >= lastClus)
0153                 continue;
0154               cl -= startClus;
0155               ALPAKA_ASSERT_ACC(cl >= 0);
0156               ALPAKA_ASSERT_ACC(cl < maxHitsInIter);
0157               auto x = digis[i].xx();
0158               auto y = digis[i].yy();
0159               auto ch = digis[i].adc();
0160               alpaka::atomicAdd(acc, &clusParams.charge[cl], (int32_t)ch, alpaka::hierarchy::Threads{});
0161               ch = alpaka::math::min(acc, ch, pixmx);
0162               if (clusParams.minRow[cl] == x)
0163                 alpaka::atomicAdd(acc, &clusParams.q_f_X[cl], (int32_t)ch, alpaka::hierarchy::Threads{});
0164               if (clusParams.maxRow[cl] == x)
0165                 alpaka::atomicAdd(acc, &clusParams.q_l_X[cl], (int32_t)ch, alpaka::hierarchy::Threads{});
0166               if (clusParams.minCol[cl] == y)
0167                 alpaka::atomicAdd(acc, &clusParams.q_f_Y[cl], (int32_t)ch, alpaka::hierarchy::Threads{});
0168               if (clusParams.maxCol[cl] == y)
0169                 alpaka::atomicAdd(acc, &clusParams.q_l_Y[cl], (int32_t)ch, alpaka::hierarchy::Threads{});
0170             }
0171 
0172             alpaka::syncBlockThreads(acc);
0173 
0174             // next one cluster per thread...
0175             first = clusters[me].clusModuleStart() + startClus;
0176             for (uint32_t ic : cms::alpakatools::independent_group_elements(acc, nClusInIter)) {
0177               auto h = first + ic;  // output index in global memory
0178 
0179               assert(h < (uint32_t)hits.metadata().size());
0180               assert(h < clusters[me + 1].clusModuleStart());
0181 
0182               pixelCPEforDevice::position<TrackerTraits>(
0183                   cpeParams->commonParams(), cpeParams->detParams(me), clusParams, ic);
0184 
0185               pixelCPEforDevice::errorFromDB<TrackerTraits>(
0186                   cpeParams->commonParams(), cpeParams->detParams(me), clusParams, ic);
0187 
0188               // store it
0189               hits[h].chargeAndStatus().charge = clusParams.charge[ic];
0190               hits[h].chargeAndStatus().status = clusParams.status[ic];
0191               hits[h].detectorIndex() = me;
0192 
0193               // local coordinates for computations
0194               float xl, yl;
0195               hits[h].xLocal() = xl = clusParams.xpos[ic];
0196               hits[h].yLocal() = yl = clusParams.ypos[ic];
0197 
0198               hits[h].clusterSizeX() = clusParams.xsize[ic];
0199               hits[h].clusterSizeY() = clusParams.ysize[ic];
0200 
0201               hits[h].xerrLocal() = clusParams.xerr[ic] * clusParams.xerr[ic] + cpeParams->detParams(me).apeXX;
0202               hits[h].yerrLocal() = clusParams.yerr[ic] * clusParams.yerr[ic] + cpeParams->detParams(me).apeYY;
0203 
0204               // global coordinates and phi computation
0205               float xg, yg, zg;
0206               cpeParams->detParams(me).frame.toGlobal(xl, yl, xg, yg, zg);
0207               // correct for the beamspot position
0208               xg -= bs->x;
0209               yg -= bs->y;
0210               zg -= bs->z;
0211 
0212               hits[h].xGlobal() = xg;
0213               hits[h].yGlobal() = yg;
0214               hits[h].zGlobal() = zg;
0215               hits[h].rGlobal() = alpaka::math::sqrt(acc, xg * xg + yg * yg);
0216               hits[h].iphi() = unsafe_atan2s<7>(yg, xg);
0217             }
0218             alpaka::syncBlockThreads(acc);
0219           }  // end loop on batches
0220         }
0221       }
0222     };
0223 
0224   }  // namespace pixelRecHits
0225 }  // namespace ALPAKA_ACCELERATOR_NAMESPACE
0226 
0227 #endif  // RecoLocalTracker_SiPixelRecHits_plugins_alpaka_PixelRecHits_h