Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-23 22:56:27

0001 #ifndef RecoLocalTracker_SiPixelRecHits_alpaka_PixelRecHits_h
0002 #define RecoLocalTracker_SiPixelRecHits_alpaka_PixelRecHits_h
0003 
0004 // C++ headers
0005 #include <cassert>
0006 #include <cstdint>
0007 #include <limits>
0008 #include <type_traits>
0009 
0010 // Alpaka headers
0011 #include <alpaka/alpaka.hpp>
0012 
0013 // CMSSW headers
0014 #include "DataFormats/BeamSpot/interface/BeamSpotPOD.h"
0015 #include "DataFormats/Math/interface/approx_atan2.h"
0016 #include "DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h"
0017 #include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersSoA.h"
0018 #include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisSoA.h"
0019 #include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h"
0020 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0021 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0022 #include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforDevice.h"
0023 
0024 //#define GPU_DEBUG
0025 
0026 namespace ALPAKA_ACCELERATOR_NAMESPACE {
0027   namespace pixelRecHits {
0028 
0029     template <typename TrackerTraits>
0030     class GetHits {
0031     public:
0032       template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0033       ALPAKA_FN_ACC void operator()(const TAcc& acc,
0034                                     pixelCPEforDevice::ParamsOnDeviceT<TrackerTraits> const* __restrict__ cpeParams,
0035                                     BeamSpotPOD const* __restrict__ bs,
0036                                     SiPixelDigisSoAConstView digis,
0037                                     uint32_t numElements,
0038                                     uint32_t nonEmptyModules,
0039                                     SiPixelClustersSoAConstView clusters,
0040                                     TrackingRecHitSoAView<TrackerTraits> hits) const {
0041         ALPAKA_ASSERT_ACC(cpeParams);
0042 
0043         // outer loop: one block per module
0044         for (uint32_t module : cms::alpakatools::independent_groups(acc, nonEmptyModules)) {
0045           // This is necessary only once - consider moving it somewhere else.
0046           // Copy the average geometry corrected by the beamspot.
0047           if (0 == module) {
0048             auto& agc = hits.averageGeometry();
0049             auto const& ag = cpeParams->averageGeometry();
0050             auto nLadders = TrackerTraits::numberOfLaddersInBarrel;
0051 
0052             for (uint32_t il : cms::alpakatools::independent_group_elements(acc, nLadders)) {
0053               agc.ladderZ[il] = ag.ladderZ[il] - bs->z;
0054               agc.ladderX[il] = ag.ladderX[il] - bs->x;
0055               agc.ladderY[il] = ag.ladderY[il] - bs->y;
0056               agc.ladderR[il] = sqrt(agc.ladderX[il] * agc.ladderX[il] + agc.ladderY[il] * agc.ladderY[il]);
0057               agc.ladderMinZ[il] = ag.ladderMinZ[il] - bs->z;
0058               agc.ladderMaxZ[il] = ag.ladderMaxZ[il] - bs->z;
0059             }
0060 
0061             if (cms::alpakatools::once_per_block(acc)) {
0062               agc.endCapZ[0] = ag.endCapZ[0] - bs->z;
0063               agc.endCapZ[1] = ag.endCapZ[1] - bs->z;
0064             }
0065           }
0066 
0067           // to be moved in common namespace...
0068           using pixelClustering::invalidModuleId;
0069           constexpr int32_t maxHitsInIter = pixelCPEforDevice::MaxHitsInIter;
0070 
0071           auto me = clusters[module].moduleId();
0072           int nclus = clusters[me].clusInModule();
0073 
0074           // skip empty modules
0075           if (0 == nclus)
0076             continue;
0077 
0078 #ifdef GPU_DEBUG
0079           if (cms::alpakatools::once_per_block(acc)) {
0080             auto k = clusters[1 + module].moduleStart();
0081             while (digis[k].moduleId() == invalidModuleId)
0082               ++k;
0083             ALPAKA_ASSERT_ACC(digis[k].moduleId() == me);
0084           }
0085 
0086           if (me % 100 == 1)
0087             if (cms::alpakatools::once_per_block(acc))
0088               printf("hitbuilder: %d clusters in module %d. will write at %d\n",
0089                      nclus,
0090                      me,
0091                      clusters[me].clusModuleStart());
0092 #endif
0093 
0094           auto& clusParams = alpaka::declareSharedVar<pixelCPEforDevice::ClusParams, __COUNTER__>(acc);
0095           for (int startClus = 0, endClus = nclus; startClus < endClus; startClus += maxHitsInIter) {
0096             auto first = clusters[1 + module].moduleStart();
0097 
0098             int nClusInIter = alpaka::math::min(acc, maxHitsInIter, endClus - startClus);
0099             int lastClus = startClus + nClusInIter;
0100             ALPAKA_ASSERT_ACC(nClusInIter <= nclus);
0101             ALPAKA_ASSERT_ACC(nClusInIter > 0);
0102             ALPAKA_ASSERT_ACC(lastClus <= nclus);
0103             ALPAKA_ASSERT_ACC(nclus > maxHitsInIter || (0 == startClus && nClusInIter == nclus && lastClus == nclus));
0104 
0105             // init
0106             for (uint32_t ic : cms::alpakatools::independent_group_elements(acc, nClusInIter)) {
0107               clusParams.minRow[ic] = std::numeric_limits<uint32_t>::max();
0108               clusParams.maxRow[ic] = 0;
0109               clusParams.minCol[ic] = std::numeric_limits<uint32_t>::max();
0110               clusParams.maxCol[ic] = 0;
0111               clusParams.charge[ic] = 0;
0112               clusParams.q_f_X[ic] = 0;
0113               clusParams.q_l_X[ic] = 0;
0114               clusParams.q_f_Y[ic] = 0;
0115               clusParams.q_l_Y[ic] = 0;
0116             }
0117 
0118             alpaka::syncBlockThreads(acc);
0119 
0120             // one thread or element per "digi"
0121             for (uint32_t i : cms::alpakatools::independent_group_elements(acc, first, numElements)) {
0122               auto id = digis[i].moduleId();
0123               if (id == invalidModuleId)
0124                 continue;  // not valid
0125               if (id != me)
0126                 break;  // end of module
0127               auto cl = digis[i].clus();
0128               if (cl < startClus || cl >= lastClus)
0129                 continue;
0130               cl -= startClus;
0131               ALPAKA_ASSERT_ACC(cl >= 0);
0132               ALPAKA_ASSERT_ACC(cl < maxHitsInIter);
0133               auto x = digis[i].xx();
0134               auto y = digis[i].yy();
0135               alpaka::atomicMin(acc, &clusParams.minRow[cl], (uint32_t)x, alpaka::hierarchy::Threads{});
0136               alpaka::atomicMax(acc, &clusParams.maxRow[cl], (uint32_t)x, alpaka::hierarchy::Threads{});
0137               alpaka::atomicMin(acc, &clusParams.minCol[cl], (uint32_t)y, alpaka::hierarchy::Threads{});
0138               alpaka::atomicMax(acc, &clusParams.maxCol[cl], (uint32_t)y, alpaka::hierarchy::Threads{});
0139             }
0140 
0141             alpaka::syncBlockThreads(acc);
0142 
0143             auto pixmx = cpeParams->detParams(me).pixmx;
0144             for (uint32_t i : cms::alpakatools::independent_group_elements(acc, first, numElements)) {
0145               auto id = digis[i].moduleId();
0146               if (id == invalidModuleId)
0147                 continue;  // not valid
0148               if (id != me)
0149                 break;  // end of module
0150               auto cl = digis[i].clus();
0151               if (cl < startClus || cl >= lastClus)
0152                 continue;
0153               cl -= startClus;
0154               ALPAKA_ASSERT_ACC(cl >= 0);
0155               ALPAKA_ASSERT_ACC(cl < maxHitsInIter);
0156               auto x = digis[i].xx();
0157               auto y = digis[i].yy();
0158               auto ch = digis[i].adc();
0159               alpaka::atomicAdd(acc, &clusParams.charge[cl], (int32_t)ch, alpaka::hierarchy::Threads{});
0160               ch = alpaka::math::min(acc, ch, pixmx);
0161               if (clusParams.minRow[cl] == x)
0162                 alpaka::atomicAdd(acc, &clusParams.q_f_X[cl], (int32_t)ch, alpaka::hierarchy::Threads{});
0163               if (clusParams.maxRow[cl] == x)
0164                 alpaka::atomicAdd(acc, &clusParams.q_l_X[cl], (int32_t)ch, alpaka::hierarchy::Threads{});
0165               if (clusParams.minCol[cl] == y)
0166                 alpaka::atomicAdd(acc, &clusParams.q_f_Y[cl], (int32_t)ch, alpaka::hierarchy::Threads{});
0167               if (clusParams.maxCol[cl] == y)
0168                 alpaka::atomicAdd(acc, &clusParams.q_l_Y[cl], (int32_t)ch, alpaka::hierarchy::Threads{});
0169             }
0170 
0171             alpaka::syncBlockThreads(acc);
0172 
0173             // next one cluster per thread...
0174             first = clusters[me].clusModuleStart() + startClus;
0175             for (uint32_t ic : cms::alpakatools::independent_group_elements(acc, nClusInIter)) {
0176               auto h = first + ic;  // output index in global memory
0177 
0178               assert(h < (uint32_t)hits.metadata().size());
0179               assert(h < clusters[me + 1].clusModuleStart());
0180 
0181               pixelCPEforDevice::position<TrackerTraits>(
0182                   cpeParams->commonParams(), cpeParams->detParams(me), clusParams, ic);
0183 
0184               pixelCPEforDevice::errorFromDB<TrackerTraits>(
0185                   cpeParams->commonParams(), cpeParams->detParams(me), clusParams, ic);
0186 
0187               // store it
0188               hits[h].chargeAndStatus().charge = clusParams.charge[ic];
0189               hits[h].chargeAndStatus().status = clusParams.status[ic];
0190               hits[h].detectorIndex() = me;
0191 
0192               // local coordinates for computations
0193               float xl, yl;
0194               hits[h].xLocal() = xl = clusParams.xpos[ic];
0195               hits[h].yLocal() = yl = clusParams.ypos[ic];
0196 
0197               hits[h].clusterSizeX() = clusParams.xsize[ic];
0198               hits[h].clusterSizeY() = clusParams.ysize[ic];
0199 
0200               hits[h].xerrLocal() = clusParams.xerr[ic] * clusParams.xerr[ic] + cpeParams->detParams(me).apeXX;
0201               hits[h].yerrLocal() = clusParams.yerr[ic] * clusParams.yerr[ic] + cpeParams->detParams(me).apeYY;
0202 
0203               // global coordinates and phi computation
0204               float xg, yg, zg;
0205               cpeParams->detParams(me).frame.toGlobal(xl, yl, xg, yg, zg);
0206               // correct for the beamspot position
0207               xg -= bs->x;
0208               yg -= bs->y;
0209               zg -= bs->z;
0210 
0211               hits[h].xGlobal() = xg;
0212               hits[h].yGlobal() = yg;
0213               hits[h].zGlobal() = zg;
0214               hits[h].rGlobal() = alpaka::math::sqrt(acc, xg * xg + yg * yg);
0215               hits[h].iphi() = unsafe_atan2s<7>(yg, xg);
0216             }
0217             alpaka::syncBlockThreads(acc);
0218           }  // end loop on batches
0219         }
0220       }
0221     };
0222 
0223   }  // namespace pixelRecHits
0224 }  // namespace ALPAKA_ACCELERATOR_NAMESPACE
0225 
0226 #endif  // RecoLocalTracker_SiPixelRecHits_plugins_alpaka_PixelRecHits_h