Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 11:59:33

0001 #include <cuda.h>
0002 
0003 #include "CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h"
0004 #include "CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLT.h"
0005 #include "CondFormats/SiPixelObjects/interface/SiPixelGainForHLTonGPU.h"
0006 #include "FWCore/MessageLogger/interface/MessageLogger.h"
0007 #include "Geometry/CommonDetUnit/interface/GeomDetType.h"
0008 #include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h"
0009 #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
0010 
0011 SiPixelGainCalibrationForHLTGPU::SiPixelGainCalibrationForHLTGPU(const SiPixelGainCalibrationForHLT& gains,
0012                                                                  const TrackerGeometry& geom)
0013     : gains_(&gains) {
0014   // bizzarre logic (looking for fist strip-det) don't ask
0015   auto const& dus = geom.detUnits();
0016   unsigned int n_detectors = dus.size();
0017   for (unsigned int i = 1; i < 7; ++i) {
0018     const auto offset = geom.offsetDU(GeomDetEnumerators::tkDetEnum[i]);
0019     if (offset != dus.size() && dus[offset]->type().isTrackerStrip()) {
0020       if (n_detectors > offset)
0021         n_detectors = offset;
0022     }
0023   }
0024 
0025   LogDebug("SiPixelGainCalibrationForHLTGPU")
0026       << "caching calibs for " << n_detectors << " pixel detectors of size " << gains.data().size() << '\n'
0027       << "sizes " << sizeof(char) << ' ' << sizeof(uint8_t) << ' ' << sizeof(SiPixelGainForHLTonGPU::DecodingStructure);
0028 
0029   cudaCheck(cudaMallocHost((void**)&gainForHLTonHost_, sizeof(SiPixelGainForHLTonGPU)));
0030   gainForHLTonHost_->v_pedestals_ =
0031       (SiPixelGainForHLTonGPU_DecodingStructure*)this->gains_->data().data();  // so it can be used on CPU as well...
0032 
0033   // do not read back from the (possibly write-combined) memory buffer
0034   auto minPed = gains.getPedLow();
0035   auto maxPed = gains.getPedHigh();
0036   auto minGain = gains.getGainLow();
0037   auto maxGain = gains.getGainHigh();
0038   auto nBinsToUseForEncoding = 253;
0039 
0040   // we will simplify later (not everything is needed....)
0041   gainForHLTonHost_->minPed_ = minPed;
0042   gainForHLTonHost_->maxPed_ = maxPed;
0043   gainForHLTonHost_->minGain_ = minGain;
0044   gainForHLTonHost_->maxGain_ = maxGain;
0045 
0046   gainForHLTonHost_->numberOfRowsAveragedOver_ = 80;
0047   gainForHLTonHost_->nBinsToUseForEncoding_ = nBinsToUseForEncoding;
0048   gainForHLTonHost_->deadFlag_ = 255;
0049   gainForHLTonHost_->noisyFlag_ = 254;
0050 
0051   gainForHLTonHost_->pedPrecision_ = static_cast<float>(maxPed - minPed) / nBinsToUseForEncoding;
0052   gainForHLTonHost_->gainPrecision_ = static_cast<float>(maxGain - minGain) / nBinsToUseForEncoding;
0053 
0054   LogDebug("SiPixelGainCalibrationForHLTGPU")
0055       << "precisions g " << gainForHLTonHost_->pedPrecision_ << ' ' << gainForHLTonHost_->gainPrecision_;
0056 
0057   // fill the index map
0058   auto const& ind = gains.getIndexes();
0059   LogDebug("SiPixelGainCalibrationForHLTGPU") << ind.size() << " " << n_detectors;
0060 
0061   for (auto i = 0U; i < n_detectors; ++i) {
0062     auto p = std::lower_bound(
0063         ind.begin(), ind.end(), dus[i]->geographicalId().rawId(), SiPixelGainCalibrationForHLT::StrictWeakOrdering());
0064     assert(p != ind.end() && p->detid == dus[i]->geographicalId());
0065     assert(p->iend <= gains.data().size());
0066     assert(p->iend >= p->ibegin);
0067     assert(0 == p->ibegin % 2);
0068     assert(0 == p->iend % 2);
0069     assert(p->ibegin != p->iend);
0070     assert(p->ncols > 0);
0071     gainForHLTonHost_->rangeAndCols_[i] = std::make_pair(SiPixelGainForHLTonGPU::Range(p->ibegin, p->iend), p->ncols);
0072     if (ind[i].detid != dus[i]->geographicalId())
0073       LogDebug("SiPixelGainCalibrationForHLTGPU") << ind[i].detid << "!=" << dus[i]->geographicalId();
0074   }
0075 }
0076 
0077 SiPixelGainCalibrationForHLTGPU::~SiPixelGainCalibrationForHLTGPU() { cudaCheck(cudaFreeHost(gainForHLTonHost_)); }
0078 
0079 SiPixelGainCalibrationForHLTGPU::GPUData::~GPUData() {
0080   cudaCheck(cudaFree(gainForHLTonGPU));
0081   cudaCheck(cudaFree(gainDataOnGPU));
0082 }
0083 
0084 const SiPixelGainForHLTonGPU* SiPixelGainCalibrationForHLTGPU::getGPUProductAsync(cudaStream_t cudaStream) const {
0085   const auto& data = gpuData_.dataForCurrentDeviceAsync(cudaStream, [this](GPUData& data, cudaStream_t stream) {
0086     cudaCheck(cudaMalloc((void**)&data.gainForHLTonGPU, sizeof(SiPixelGainForHLTonGPU)));
0087     cudaCheck(cudaMalloc((void**)&data.gainDataOnGPU, this->gains_->data().size()));
0088     // gains.data().data() is used also for non-GPU code, we cannot allocate it on aligned and write-combined memory
0089     cudaCheck(cudaMemcpyAsync(
0090         data.gainDataOnGPU, this->gains_->data().data(), this->gains_->data().size(), cudaMemcpyDefault, stream));
0091 
0092     cudaCheck(cudaMemcpyAsync(
0093         data.gainForHLTonGPU, this->gainForHLTonHost_, sizeof(SiPixelGainForHLTonGPU), cudaMemcpyDefault, stream));
0094     cudaCheck(cudaMemcpyAsync(&(data.gainForHLTonGPU->v_pedestals_),
0095                               &(data.gainDataOnGPU),
0096                               sizeof(SiPixelGainForHLTonGPU_DecodingStructure*),
0097                               cudaMemcpyDefault,
0098                               stream));
0099   });
0100   return data.gainForHLTonGPU;
0101 }