Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2022-05-04 02:52:44

0001 #ifndef CUDADataFormats_TrackingRecHit_interface_TrackingRecHit2DHeterogeneous_h
0002 #define CUDADataFormats_TrackingRecHit_interface_TrackingRecHit2DHeterogeneous_h
0003 
0004 #include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DSOAView.h"
0005 #include "CUDADataFormats/Common/interface/HeterogeneousSoA.h"
0006 #include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h"
0007 
0008 template <typename Traits>
0009 class TrackingRecHit2DHeterogeneous {
0010 public:
0011   enum class Storage32 {
0012     kXLocal = 0,
0013     kYLocal = 1,
0014     kXerror = 2,
0015     kYerror = 3,
0016     kCharge = 4,
0017     kXGlobal = 5,
0018     kYGlobal = 6,
0019     kZGlobal = 7,
0020     kRGlobal = 8,
0021     kPhiStorage = 9,
0022     kLayers = 10
0023   };
0024 
0025   enum class Storage16 {
0026     kDetId = 0,
0027     kPhi = 1,
0028     kXSize = 2,
0029     kYSize = 3,
0030   };
0031 
0032   template <typename T>
0033   using unique_ptr = typename Traits::template unique_ptr<T>;
0034 
0035   using PhiBinner = TrackingRecHit2DSOAView::PhiBinner;
0036 
0037   TrackingRecHit2DHeterogeneous() = default;
0038 
0039   explicit TrackingRecHit2DHeterogeneous(
0040       uint32_t nHits,
0041       bool isPhase2,
0042       int32_t offsetBPIX2,
0043       pixelCPEforGPU::ParamsOnGPU const* cpeParams,
0044       uint32_t const* hitsModuleStart,
0045       cudaStream_t stream,
0046       TrackingRecHit2DHeterogeneous<cms::cudacompat::GPUTraits> const* input = nullptr);
0047 
0048   explicit TrackingRecHit2DHeterogeneous(
0049       float* store32, uint16_t* store16, uint32_t* modules, int nHits, cudaStream_t stream = nullptr);
0050   ~TrackingRecHit2DHeterogeneous() = default;
0051 
0052   TrackingRecHit2DHeterogeneous(const TrackingRecHit2DHeterogeneous&) = delete;
0053   TrackingRecHit2DHeterogeneous& operator=(const TrackingRecHit2DHeterogeneous&) = delete;
0054   TrackingRecHit2DHeterogeneous(TrackingRecHit2DHeterogeneous&&) = default;
0055   TrackingRecHit2DHeterogeneous& operator=(TrackingRecHit2DHeterogeneous&&) = default;
0056 
0057   TrackingRecHit2DSOAView* view() { return m_view.get(); }
0058   TrackingRecHit2DSOAView const* view() const { return m_view.get(); }
0059 
0060   auto nHits() const { return m_nHits; }
0061   auto nMaxModules() const { return m_nMaxModules; }
0062   auto offsetBPIX2() const { return m_offsetBPIX2; }
0063 
0064   auto hitsModuleStart() const { return m_hitsModuleStart; }
0065   auto hitsLayerStart() { return m_hitsLayerStart; }
0066   auto phiBinner() { return m_phiBinner; }
0067   auto phiBinnerStorage() { return m_phiBinnerStorage; }
0068   auto iphi() { return m_iphi; }
0069 
0070   cms::cuda::host::unique_ptr<float[]> localCoordToHostAsync(cudaStream_t stream) const;
0071 
0072   cms::cuda::host::unique_ptr<uint32_t[]> hitsModuleStartToHostAsync(cudaStream_t stream) const;
0073 
0074   cms::cuda::host::unique_ptr<uint16_t[]> store16ToHostAsync(cudaStream_t stream) const;
0075   cms::cuda::host::unique_ptr<float[]> store32ToHostAsync(cudaStream_t stream) const;
0076 
0077   // needs specialization for Host
0078   void copyFromGPU(TrackingRecHit2DHeterogeneous<cms::cudacompat::GPUTraits> const* input, cudaStream_t stream);
0079 
0080 private:
0081   static constexpr uint32_t n16 = 4;                 // number of elements in m_store16
0082   static constexpr uint32_t n32 = 10;                // number of elements in m_store32
0083   static_assert(sizeof(uint32_t) == sizeof(float));  // just stating the obvious
0084   static_assert(n32 == static_cast<uint32_t>(Storage32::kLayers));
0085   unique_ptr<uint16_t[]> m_store16;  //!
0086   unique_ptr<float[]> m_store32;     //!
0087 
0088   unique_ptr<TrackingRecHit2DSOAView::PhiBinner> m_PhiBinnerStore;              //!
0089   unique_ptr<TrackingRecHit2DSOAView::AverageGeometry> m_AverageGeometryStore;  //!
0090 
0091   unique_ptr<TrackingRecHit2DSOAView> m_view;  //!
0092 
0093   uint32_t m_nHits;
0094   int32_t m_offsetBPIX2;
0095 
0096   uint32_t const* m_hitsModuleStart;  // needed for legacy, this is on GPU!
0097 
0098   uint32_t m_nMaxModules;
0099   // needed as kernel params...
0100   PhiBinner* m_phiBinner;
0101   PhiBinner::index_type* m_phiBinnerStorage;
0102   uint32_t* m_hitsLayerStart;
0103   int16_t* m_iphi;
0104 };
0105 
0106 using TrackingRecHit2DGPU = TrackingRecHit2DHeterogeneous<cms::cudacompat::GPUTraits>;
0107 using TrackingRecHit2DCPU = TrackingRecHit2DHeterogeneous<cms::cudacompat::CPUTraits>;
0108 using TrackingRecHit2DHost = TrackingRecHit2DHeterogeneous<cms::cudacompat::HostTraits>;
0109 
0110 #include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h"
0111 #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
0112 
0113 template <typename Traits>
0114 TrackingRecHit2DHeterogeneous<Traits>::TrackingRecHit2DHeterogeneous(
0115     uint32_t nHits,
0116     bool isPhase2,
0117     int32_t offsetBPIX2,
0118     pixelCPEforGPU::ParamsOnGPU const* cpeParams,
0119     uint32_t const* hitsModuleStart,
0120     cudaStream_t stream,
0121     TrackingRecHit2DHeterogeneous<cms::cudacompat::GPUTraits> const* input)
0122     : m_nHits(nHits), m_offsetBPIX2(offsetBPIX2), m_hitsModuleStart(hitsModuleStart) {
0123   auto view = Traits::template make_host_unique<TrackingRecHit2DSOAView>(stream);
0124 
0125   m_nMaxModules = isPhase2 ? phase2PixelTopology::numberOfModules : phase1PixelTopology::numberOfModules;
0126 
0127   view->m_nHits = nHits;
0128   view->m_nMaxModules = m_nMaxModules;
0129   m_view = Traits::template make_unique<TrackingRecHit2DSOAView>(stream);  // leave it on host and pass it by value?
0130   m_AverageGeometryStore = Traits::template make_unique<TrackingRecHit2DSOAView::AverageGeometry>(stream);
0131   view->m_averageGeometry = m_AverageGeometryStore.get();
0132   view->m_cpeParams = cpeParams;
0133   view->m_hitsModuleStart = hitsModuleStart;
0134 
0135   // if empy do not bother
0136   if (0 == nHits) {
0137     if constexpr (std::is_same_v<Traits, cms::cudacompat::GPUTraits>) {
0138       cms::cuda::copyAsync(m_view, view, stream);
0139     } else {
0140       m_view.reset(view.release());  // NOLINT: std::move() breaks CUDA version
0141     }
0142     return;
0143   }
0144 
0145   // the single arrays are not 128 bit alligned...
0146   // the hits are actually accessed in order only in building
0147   // if ordering is relevant they may have to be stored phi-ordered by layer or so
0148   // this will break 1to1 correspondence with cluster and module locality
0149   // so unless proven VERY inefficient we keep it ordered as generated
0150 
0151   // host copy is "reduced"  (to be reviewed at some point)
0152   if constexpr (std::is_same_v<Traits, cms::cudacompat::HostTraits>) {
0153     // it has to compile for ALL cases
0154     copyFromGPU(input, stream);
0155   } else {
0156     assert(input == nullptr);
0157 
0158     auto nL = isPhase2 ? phase2PixelTopology::numberOfLayers : phase1PixelTopology::numberOfLayers;
0159 
0160     m_store16 = Traits::template make_unique<uint16_t[]>(nHits * n16, stream);
0161     m_store32 = Traits::template make_unique<float[]>(nHits * n32 + nL + 1, stream);
0162     m_PhiBinnerStore = Traits::template make_unique<TrackingRecHit2DSOAView::PhiBinner>(stream);
0163   }
0164 
0165   static_assert(sizeof(TrackingRecHit2DSOAView::hindex_type) == sizeof(float));
0166   static_assert(sizeof(TrackingRecHit2DSOAView::hindex_type) == sizeof(TrackingRecHit2DSOAView::PhiBinner::index_type));
0167 
0168   auto get32 = [&](Storage32 i) { return m_store32.get() + static_cast<int>(i) * nHits; };
0169 
0170   // copy all the pointers
0171   m_phiBinner = view->m_phiBinner = m_PhiBinnerStore.get();
0172   m_phiBinnerStorage = view->m_phiBinnerStorage =
0173       reinterpret_cast<TrackingRecHit2DSOAView::PhiBinner::index_type*>(get32(Storage32::kPhiStorage));
0174 
0175   view->m_xl = get32(Storage32::kXLocal);
0176   view->m_yl = get32(Storage32::kYLocal);
0177   view->m_xerr = get32(Storage32::kXerror);
0178   view->m_yerr = get32(Storage32::kYerror);
0179   view->m_chargeAndStatus = reinterpret_cast<uint32_t*>(get32(Storage32::kCharge));
0180 
0181   if constexpr (!std::is_same_v<Traits, cms::cudacompat::HostTraits>) {
0182     assert(input == nullptr);
0183     view->m_xg = get32(Storage32::kXGlobal);
0184     view->m_yg = get32(Storage32::kYGlobal);
0185     view->m_zg = get32(Storage32::kZGlobal);
0186     view->m_rg = get32(Storage32::kRGlobal);
0187 
0188     auto get16 = [&](Storage16 i) { return m_store16.get() + static_cast<int>(i) * nHits; };
0189     m_iphi = view->m_iphi = reinterpret_cast<int16_t*>(get16(Storage16::kPhi));
0190 
0191     view->m_xsize = reinterpret_cast<int16_t*>(get16(Storage16::kXSize));
0192     view->m_ysize = reinterpret_cast<int16_t*>(get16(Storage16::kYSize));
0193     view->m_detInd = get16(Storage16::kDetId);
0194 
0195     m_phiBinner = view->m_phiBinner = m_PhiBinnerStore.get();
0196     m_hitsLayerStart = view->m_hitsLayerStart = reinterpret_cast<uint32_t*>(get32(Storage32::kLayers));
0197   }
0198 
0199   // transfer view
0200   if constexpr (std::is_same_v<Traits, cms::cudacompat::GPUTraits>) {
0201     cms::cuda::copyAsync(m_view, view, stream);
0202   } else {
0203     m_view.reset(view.release());  // NOLINT: std::move() breaks CUDA version
0204   }
0205 }
0206 
0207 //this is intended to be used only for CPU SoA but doesn't hurt to have it for all cases
0208 template <typename Traits>
0209 TrackingRecHit2DHeterogeneous<Traits>::TrackingRecHit2DHeterogeneous(
0210     float* store32, uint16_t* store16, uint32_t* modules, int nHits, cudaStream_t stream)
0211     : m_nHits(nHits), m_hitsModuleStart(modules) {
0212   auto view = Traits::template make_host_unique<TrackingRecHit2DSOAView>(stream);
0213 
0214   m_view = Traits::template make_unique<TrackingRecHit2DSOAView>(stream);
0215 
0216   view->m_nHits = nHits;
0217 
0218   if (0 == nHits) {
0219     if constexpr (std::is_same_v<Traits, cms::cudacompat::GPUTraits>) {
0220       cms::cuda::copyAsync(m_view, view, stream);
0221     } else {
0222       m_view = std::move(view);
0223     }
0224     return;
0225   }
0226 
0227   m_store16 = Traits::template make_unique<uint16_t[]>(nHits * n16, stream);
0228   m_store32 = Traits::template make_unique<float[]>(nHits * n32, stream);
0229   m_PhiBinnerStore = Traits::template make_unique<TrackingRecHit2DSOAView::PhiBinner>(stream);
0230   m_AverageGeometryStore = Traits::template make_unique<TrackingRecHit2DSOAView::AverageGeometry>(stream);
0231 
0232   view->m_averageGeometry = m_AverageGeometryStore.get();
0233   view->m_hitsModuleStart = m_hitsModuleStart;
0234 
0235   //store transfer
0236   if constexpr (std::is_same_v<Traits, cms::cudacompat::GPUTraits>) {
0237     cms::cuda::copyAsync(m_store16, store16, stream);
0238     cms::cuda::copyAsync(m_store32, store32, stream);
0239   } else {
0240     std::copy(store32, store32 + nHits * n32, m_store32.get());  // want to copy it
0241     std::copy(store16, store16 + nHits * n16, m_store16.get());
0242   }
0243 
0244   //getters
0245   auto get32 = [&](Storage32 i) { return m_store32.get() + static_cast<int>(i) * nHits; };
0246   auto get16 = [&](Storage16 i) { return m_store16.get() + static_cast<int>(i) * nHits; };
0247 
0248   //Store 32
0249   view->m_xl = get32(Storage32::kXLocal);
0250   view->m_yl = get32(Storage32::kYLocal);
0251   view->m_xerr = get32(Storage32::kXerror);
0252   view->m_yerr = get32(Storage32::kYerror);
0253   view->m_chargeAndStatus = reinterpret_cast<uint32_t*>(get32(Storage32::kCharge));
0254   view->m_xg = get32(Storage32::kXGlobal);
0255   view->m_yg = get32(Storage32::kYGlobal);
0256   view->m_zg = get32(Storage32::kZGlobal);
0257   view->m_rg = get32(Storage32::kRGlobal);
0258 
0259   m_phiBinner = view->m_phiBinner = m_PhiBinnerStore.get();
0260   m_phiBinnerStorage = view->m_phiBinnerStorage =
0261       reinterpret_cast<TrackingRecHit2DSOAView::PhiBinner::index_type*>(get32(Storage32::kPhiStorage));
0262 
0263   //Store 16
0264   view->m_detInd = get16(Storage16::kDetId);
0265   m_iphi = view->m_iphi = reinterpret_cast<int16_t*>(get16(Storage16::kPhi));
0266   view->m_xsize = reinterpret_cast<int16_t*>(get16(Storage16::kXSize));
0267   view->m_ysize = reinterpret_cast<int16_t*>(get16(Storage16::kYSize));
0268 
0269   // transfer view
0270   if constexpr (std::is_same_v<Traits, cms::cudacompat::GPUTraits>) {
0271     cms::cuda::copyAsync(m_view, view, stream);
0272   } else {
0273     m_view = std::move(view);
0274   }
0275 }
0276 
0277 #endif  // CUDADataFormats_TrackingRecHit_interface_TrackingRecHit2DHeterogeneous_h