Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:10:42

0001 #include <iostream>
0002 
0003 #include "CUDADataFormats/HcalDigi/interface/DigiCollection.h"
0004 #include "DataFormats/HcalDigi/interface/HcalDigiCollections.h"
0005 #include "FWCore/Framework/interface/Event.h"
0006 #include "FWCore/Framework/interface/EventSetup.h"
0007 #include "FWCore/Framework/interface/MakerMacros.h"
0008 #include "FWCore/Framework/interface/stream/EDProducer.h"
0009 #include "FWCore/ParameterSet/interface/ParameterSet.h"
0010 #include "FWCore/ServiceRegistry/interface/Service.h"
0011 #include "HeterogeneousCore/CUDACore/interface/ScopedContext.h"
0012 #include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h"
0013 #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
0014 
0015 class HcalDigisProducerGPU : public edm::stream::EDProducer<edm::ExternalWork> {
0016 public:
0017   explicit HcalDigisProducerGPU(edm::ParameterSet const& ps);
0018   ~HcalDigisProducerGPU() override = default;
0019   static void fillDescriptions(edm::ConfigurationDescriptions&);
0020 
0021 private:
0022   void acquire(edm::Event const&, edm::EventSetup const&, edm::WaitingTaskWithArenaHolder) override;
0023   void produce(edm::Event&, edm::EventSetup const&) override;
0024 
0025 private:
0026   // input product tokens
0027   edm::EDGetTokenT<HBHEDigiCollection> hbheDigiToken_;
0028   edm::EDGetTokenT<QIE11DigiCollection> qie11DigiToken_;
0029 
0030   // type aliases
0031   using HostCollectionf01 =
0032       hcal::DigiCollection<hcal::Flavor1, calo::common::VecStoragePolicy<calo::common::CUDAHostAllocatorAlias>>;
0033   using DeviceCollectionf01 = hcal::DigiCollection<hcal::Flavor1, calo::common::DevStoragePolicy>;
0034   using HostCollectionf5 =
0035       hcal::DigiCollection<hcal::Flavor5, calo::common::VecStoragePolicy<calo::common::CUDAHostAllocatorAlias>>;
0036   using DeviceCollectionf5 = hcal::DigiCollection<hcal::Flavor5, calo::common::DevStoragePolicy>;
0037   using HostCollectionf3 =
0038       hcal::DigiCollection<hcal::Flavor3, calo::common::VecStoragePolicy<calo::common::CUDAHostAllocatorAlias>>;
0039   using DeviceCollectionf3 = hcal::DigiCollection<hcal::Flavor3, calo::common::DevStoragePolicy>;
0040 
0041   // output product tokens
0042   using ProductTypef01 = cms::cuda::Product<DeviceCollectionf01>;
0043   edm::EDPutTokenT<ProductTypef01> digisF01HEToken_;
0044   using ProductTypef5 = cms::cuda::Product<DeviceCollectionf5>;
0045   edm::EDPutTokenT<ProductTypef5> digisF5HBToken_;
0046   using ProductTypef3 = cms::cuda::Product<DeviceCollectionf3>;
0047   edm::EDPutTokenT<ProductTypef3> digisF3HBToken_;
0048 
0049   cms::cuda::ContextState cudaState_;
0050 
0051   struct ConfigParameters {
0052     uint32_t maxChannelsF01HE, maxChannelsF5HB, maxChannelsF3HB;
0053   };
0054   ConfigParameters config_;
0055 
0056   // per event host buffers
0057   HostCollectionf01 hf01_;
0058   HostCollectionf5 hf5_;
0059   HostCollectionf3 hf3_;
0060 
0061   // device products: product owns memory (i.e. not the module)
0062   DeviceCollectionf01 df01_;
0063   DeviceCollectionf5 df5_;
0064   DeviceCollectionf3 df3_;
0065 };
0066 
0067 void HcalDigisProducerGPU::fillDescriptions(edm::ConfigurationDescriptions& confDesc) {
0068   edm::ParameterSetDescription desc;
0069 
0070   // FIXME
0071   desc.add<edm::InputTag>("hbheDigisLabel", edm::InputTag("hcalDigis"));
0072   desc.add<edm::InputTag>("qie11DigiLabel", edm::InputTag("hcalDigis"));
0073   desc.add<std::string>("digisLabelF01HE", std::string{"f01HEDigisGPU"});
0074   desc.add<std::string>("digisLabelF5HB", std::string{"f5HBDigisGPU"});
0075   desc.add<std::string>("digisLabelF3HB", std::string{"f3HBDigisGPU"});
0076   desc.add<uint32_t>("maxChannelsF01HE", 10000u);
0077   desc.add<uint32_t>("maxChannelsF5HB", 10000u);
0078   desc.add<uint32_t>("maxChannelsF3HB", 10000u);
0079 
0080   confDesc.addWithDefaultLabel(desc);
0081 }
0082 
0083 HcalDigisProducerGPU::HcalDigisProducerGPU(const edm::ParameterSet& ps)
0084     : hbheDigiToken_{consumes<HBHEDigiCollection>(ps.getParameter<edm::InputTag>("hbheDigisLabel"))},
0085       qie11DigiToken_{consumes<QIE11DigiCollection>(ps.getParameter<edm::InputTag>("qie11DigiLabel"))},
0086       digisF01HEToken_{produces<ProductTypef01>(ps.getParameter<std::string>("digisLabelF01HE"))},
0087       digisF5HBToken_{produces<ProductTypef5>(ps.getParameter<std::string>("digisLabelF5HB"))},
0088       digisF3HBToken_{produces<ProductTypef3>(ps.getParameter<std::string>("digisLabelF3HB"))} {
0089   config_.maxChannelsF01HE = ps.getParameter<uint32_t>("maxChannelsF01HE");
0090   config_.maxChannelsF5HB = ps.getParameter<uint32_t>("maxChannelsF5HB");
0091   config_.maxChannelsF3HB = ps.getParameter<uint32_t>("maxChannelsF3HB");
0092 
0093   // this is a preallocation for the max statically known number of time samples
0094   // actual stride/nsamples will be inferred from data
0095   hf01_.stride = hcal::compute_stride<hcal::Flavor1>(QIE11DigiCollection::MAXSAMPLES);
0096   hf5_.stride = hcal::compute_stride<hcal::Flavor5>(HBHEDataFrame::MAXSAMPLES);
0097   hf3_.stride = hcal::compute_stride<hcal::Flavor3>(QIE11DigiCollection::MAXSAMPLES);
0098 
0099   // preallocate pinned host memory only if CUDA is available
0100   edm::Service<CUDAInterface> cuda;
0101   if (cuda and cuda->enabled()) {
0102     hf01_.reserve(config_.maxChannelsF01HE);
0103     hf5_.reserve(config_.maxChannelsF5HB);
0104     hf3_.reserve(config_.maxChannelsF3HB);
0105   }
0106 }
0107 
0108 void HcalDigisProducerGPU::acquire(edm::Event const& event,
0109                                    edm::EventSetup const& setup,
0110                                    edm::WaitingTaskWithArenaHolder holder) {
0111   // raii
0112   cms::cuda::ScopedContextAcquire ctx{event.streamID(), std::move(holder), cudaState_};
0113 
0114   // clear host buffers
0115   hf01_.clear();
0116   hf5_.clear();
0117   hf3_.clear();
0118 
0119   // event data
0120   edm::Handle<HBHEDigiCollection> hbheDigis;
0121   edm::Handle<QIE11DigiCollection> qie11Digis;
0122   event.getByToken(hbheDigiToken_, hbheDigis);
0123   event.getByToken(qie11DigiToken_, qie11Digis);
0124 
0125   // init f5 collection
0126   if (not hbheDigis->empty()) {
0127     auto const nsamples = (*hbheDigis)[0].size();
0128     auto const stride = hcal::compute_stride<hcal::Flavor5>(nsamples);
0129     hf5_.stride = stride;
0130 
0131     // flavor5 get device blobs
0132     df5_.stride = stride;
0133     df5_.data = cms::cuda::make_device_unique<uint16_t[]>(config_.maxChannelsF5HB * stride, ctx.stream());
0134     df5_.ids = cms::cuda::make_device_unique<uint32_t[]>(config_.maxChannelsF5HB, ctx.stream());
0135     df5_.npresamples = cms::cuda::make_device_unique<uint8_t[]>(config_.maxChannelsF5HB, ctx.stream());
0136   }
0137 
0138   if (not qie11Digis->empty()) {
0139     auto const nsamples = qie11Digis->samples();
0140     auto const stride01 = hcal::compute_stride<hcal::Flavor1>(nsamples);
0141     auto const stride3 = hcal::compute_stride<hcal::Flavor3>(nsamples);
0142 
0143     hf01_.stride = stride01;
0144     hf3_.stride = stride3;
0145 
0146     // flavor 0/1 get devie blobs
0147     df01_.stride = stride01;
0148     df01_.data = cms::cuda::make_device_unique<uint16_t[]>(config_.maxChannelsF01HE * stride01, ctx.stream());
0149     df01_.ids = cms::cuda::make_device_unique<uint32_t[]>(config_.maxChannelsF01HE, ctx.stream());
0150 
0151     // flavor3 get device blobs
0152     df3_.stride = stride3;
0153     df3_.data = cms::cuda::make_device_unique<uint16_t[]>(config_.maxChannelsF3HB * stride3, ctx.stream());
0154     df3_.ids = cms::cuda::make_device_unique<uint32_t[]>(config_.maxChannelsF3HB, ctx.stream());
0155   }
0156 
0157   for (auto const& hbhe : *hbheDigis) {
0158     auto const id = hbhe.id().rawId();
0159     auto const presamples = hbhe.presamples();
0160     hf5_.ids.push_back(id);
0161     hf5_.npresamples.push_back(presamples);
0162     auto const stride = hcal::compute_stride<hcal::Flavor5>(hbhe.size());
0163     assert(stride == hf5_.stride && "strides must be the same for every single digi of the collection");
0164     // simple for now...
0165     static_assert(hcal::Flavor5::HEADER_WORDS == 1);
0166     uint16_t header_word = (1 << 15) | (0x5 << 12) | (0 << 10) | ((hbhe.sample(0).capid() & 0x3) << 8);
0167     hf5_.data.push_back(header_word);
0168     for (unsigned int i = 0; i < stride - hcal::Flavor5::HEADER_WORDS; i++) {
0169       uint16_t s0 = (0 << 7) | (static_cast<uint8_t>(hbhe.sample(2 * i).adc()) & 0x7f);
0170       uint16_t s1 = (0 << 7) | (static_cast<uint8_t>(hbhe.sample(2 * i + 1).adc()) & 0x7f);
0171       uint16_t sample = (s1 << 8) | s0;
0172       hf5_.data.push_back(sample);
0173     }
0174   }
0175 
0176   for (unsigned int i = 0; i < qie11Digis->size(); i++) {
0177     auto const& digi = QIE11DataFrame{(*qie11Digis)[i]};
0178     assert(digi.samples() == qie11Digis->samples() && "collection nsamples must equal per digi samples");
0179     if (digi.flavor() == 0 or digi.flavor() == 1) {
0180       if (digi.detid().subdetId() != HcalEndcap)
0181         continue;
0182       auto const id = digi.detid().rawId();
0183       hf01_.ids.push_back(id);
0184       for (int hw = 0; hw < hcal::Flavor1::HEADER_WORDS; hw++)
0185         hf01_.data.push_back((*qie11Digis)[i][hw]);
0186       for (int sample = 0; sample < digi.samples(); sample++) {
0187         hf01_.data.push_back((*qie11Digis)[i][hcal::Flavor1::HEADER_WORDS + sample]);
0188       }
0189     } else if (digi.flavor() == 3) {
0190       if (digi.detid().subdetId() != HcalBarrel)
0191         continue;
0192       auto const id = digi.detid().rawId();
0193       hf3_.ids.push_back(id);
0194       for (int hw = 0; hw < hcal::Flavor3::HEADER_WORDS; hw++)
0195         hf3_.data.push_back((*qie11Digis)[i][hw]);
0196       for (int sample = 0; sample < digi.samples(); sample++) {
0197         hf3_.data.push_back((*qie11Digis)[i][hcal::Flavor3::HEADER_WORDS + sample]);
0198       }
0199     }
0200   }
0201 
0202   auto lambdaToTransfer = [&ctx](auto* dest, auto const& src) {
0203     if (src.empty())
0204       return;
0205     using vector_type = typename std::remove_reference<decltype(src)>::type;
0206     using type = typename vector_type::value_type;
0207     using dest_data_type = typename std::remove_pointer<decltype(dest)>::type;
0208     static_assert(std::is_same<dest_data_type, type>::value && "Dest and Src data typesdo not match");
0209     cudaCheck(cudaMemcpyAsync(dest, src.data(), src.size() * sizeof(type), cudaMemcpyHostToDevice, ctx.stream()));
0210   };
0211 
0212   lambdaToTransfer(df01_.data.get(), hf01_.data);
0213   lambdaToTransfer(df01_.ids.get(), hf01_.ids);
0214 
0215   lambdaToTransfer(df5_.data.get(), hf5_.data);
0216   lambdaToTransfer(df5_.ids.get(), hf5_.ids);
0217   lambdaToTransfer(df5_.npresamples.get(), hf5_.npresamples);
0218 
0219   lambdaToTransfer(df3_.data.get(), hf3_.data);
0220   lambdaToTransfer(df3_.ids.get(), hf3_.ids);
0221 
0222   df01_.size = hf01_.ids.size();
0223   df5_.size = hf5_.ids.size();
0224   df3_.size = hf3_.ids.size();
0225 }
0226 
0227 void HcalDigisProducerGPU::produce(edm::Event& event, edm::EventSetup const& setup) {
0228   cms::cuda::ScopedContextProduce ctx{cudaState_};
0229 
0230   ctx.emplace(event, digisF01HEToken_, std::move(df01_));
0231   ctx.emplace(event, digisF5HBToken_, std::move(df5_));
0232   ctx.emplace(event, digisF3HBToken_, std::move(df3_));
0233 }
0234 
0235 DEFINE_FWK_MODULE(HcalDigisProducerGPU);