SiStripClusterizer/plugins/SiStripRawToClusterGPUKernel.cc

0001 #include "EventFilter/SiStripRawToDigi/interface/SiStripFEDBuffer.h"
0002 #include "DataFormats/Common/interface/DetSetVectorNew.h"
0003 #include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"
0004 #include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h"
0005 #include "RecoLocalTracker/SiStripClusterizer/interface/ClusterChargeCut.h"
0006
0007 #include "SiStripRawToClusterGPUKernel.h"
0008
0009 #include "CalibFormats/SiStripObjects/interface/SiStripClusterizerConditionsGPU.h"
0010 #include "ChannelLocsGPU.h"
0011 #include "StripDataView.h"
0012
0013 namespace stripgpu {
0014   StripDataGPU::StripDataGPU(size_t size, cudaStream_t stream) {
0015     alldataGPU_ = cms::cuda::make_device_unique<uint8_t[]>(size, stream);
0016     channelGPU_ = cms::cuda::make_device_unique<uint16_t[]>(size, stream);
0017     stripIdGPU_ = cms::cuda::make_device_unique<stripgpu::stripId_t[]>(size, stream);
0018   }
0019
0020   SiStripRawToClusterGPUKernel::SiStripRawToClusterGPUKernel(const edm::ParameterSet& conf)
0021       : fedIndex_(sistrip::NUMBER_OF_FEDS, stripgpu::invalidFed),
0022         channelThreshold_(conf.getParameter<double>("ChannelThreshold")),
0023         seedThreshold_(conf.getParameter<double>("SeedThreshold")),
0024         clusterThresholdSquared_(std::pow(conf.getParameter<double>("ClusterThreshold"), 2.0f)),
0025         maxSequentialHoles_(conf.getParameter<unsigned>("MaxSequentialHoles")),
0026         maxSequentialBad_(conf.getParameter<unsigned>("MaxSequentialBad")),
0027         maxAdjacentBad_(conf.getParameter<unsigned>("MaxAdjacentBad")),
0028         maxClusterSize_(conf.getParameter<unsigned>("MaxClusterSize")),
0029         minGoodCharge_(clusterChargeCut(conf)) {
0030     fedRawDataOffsets_.reserve(sistrip::NUMBER_OF_FEDS);
0031   }
0032
0033   void SiStripRawToClusterGPUKernel::makeAsync(const std::vector<const FEDRawData*>& rawdata,
0034                                                const std::vector<std::unique_ptr<sistrip::FEDBuffer>>& buffers,
0035                                                const SiStripClusterizerConditionsGPU& conditions,
0036                                                cudaStream_t stream) {
0037     size_t totalSize{0};
0038     for (const auto& buff : buffers) {
0039       if (buff != nullptr) {
0040         totalSize += buff->bufferSize();
0041       }
0042     }
0043
0044     auto fedRawDataHost = cms::cuda::make_host_unique<uint8_t[]>(totalSize, stream);
0045     auto fedRawDataGPU = cms::cuda::make_device_unique<uint8_t[]>(totalSize, stream);
0046
0047     size_t off = 0;
0048     fedRawDataOffsets_.clear();
0049     fedIndex_.clear();
0050     fedIndex_.resize(sistrip::NUMBER_OF_FEDS, stripgpu::invalidFed);
0051
0052     sistrip::FEDReadoutMode mode = sistrip::READOUT_MODE_INVALID;
0053
0054     for (size_t fedi = 0; fedi < buffers.size(); ++fedi) {
0055       auto& buff = buffers[fedi];
0056       if (buff != nullptr) {
0057         const auto raw = rawdata[fedi];
0058         memcpy(fedRawDataHost.get() + off, raw->data(), raw->size());
0059         fedIndex_[stripgpu::fedIndex(fedi)] = fedRawDataOffsets_.size();
0060         fedRawDataOffsets_.push_back(off);
0061         off += raw->size();
0062         if (fedRawDataOffsets_.size() == 1) {
0063           mode = buff->readoutMode();
0064         } else {
0065           if (buff->readoutMode() != mode) {
0066             throw cms::Exception("[SiStripRawToClusterGPUKernel] inconsistent readout mode ")
0067                 << buff->readoutMode() << " != " << mode;
0068           }
0069         }
0070       }
0071     }
0072     // send rawdata to GPU
0073     cms::cuda::copyAsync(fedRawDataGPU, fedRawDataHost, totalSize, stream);
0074
0075     const auto& detmap = conditions.detToFeds();
0076     if ((mode != sistrip::READOUT_MODE_ZERO_SUPPRESSED) && (mode != sistrip::READOUT_MODE_ZERO_SUPPRESSED_LITE10)) {
0077       throw cms::Exception("[SiStripRawToClusterGPUKernel] unsupported readout mode ") << mode;
0078     }
0079     const uint16_t headerlen = mode == sistrip::READOUT_MODE_ZERO_SUPPRESSED ? 7 : 2;
0080     size_t offset = 0;
0081     auto chanlocs = std::make_unique<ChannelLocs>(detmap.size(), stream);
0082     auto inputGPU = cms::cuda::make_host_unique<const uint8_t*[]>(chanlocs->size(), stream);
0083
0084     // iterate over the detector in DetID/APVPair order
0085     // mapping out where the data are
0086     for (size_t i = 0; i < detmap.size(); ++i) {
0087       const auto& detp = detmap[i];
0088       const auto fedId = detp.fedID();
0089       const auto fedCh = detp.fedCh();
0090       const auto fedi = fedIndex_[stripgpu::fedIndex(fedId)];
0091
0092       if (fedi != invalidFed) {
0093         const auto buffer = buffers[fedId].get();
0094         const auto& channel = buffer->channel(detp.fedCh());
0095
0096         auto len = channel.length();
0097         auto off = channel.offset();
0098
0099         assert(len >= headerlen || len == 0);
0100
0101         if (len >= headerlen) {
0102           len -= headerlen;
0103           off += headerlen;
0104         }
0105
0106         chanlocs->setChannelLoc(i, channel.data(), off, offset, len, fedId, fedCh, detp.detID());
0107         inputGPU[i] = fedRawDataGPU.get() + fedRawDataOffsets_[fedi] + (channel.data() - rawdata[fedId]->data());
0108         offset += len;
0109
0110       } else {
0111         chanlocs->setChannelLoc(i, nullptr, 0, 0, 0, invalidFed, 0, invalidDet);
0112         inputGPU[i] = nullptr;
0113       }
0114     }
0115
0116     const auto n_strips = offset;
0117
0118     sst_data_d_ = cms::cuda::make_host_unique<StripDataView>(stream);
0119     sst_data_d_->nStrips = n_strips;
0120
0121     chanlocsGPU_ = std::make_unique<ChannelLocsGPU>(detmap.size(), stream);
0122     chanlocsGPU_->setVals(chanlocs.get(), std::move(inputGPU), stream);
0123
0124     stripdata_ = std::make_unique<StripDataGPU>(n_strips, stream);
0125
0126     const auto& condGPU = conditions.getGPUProductAsync(stream);
0127
0128     unpackChannelsGPU(condGPU.deviceView(), stream);
0129 #ifdef GPU_CHECK
0130     cudaCheck(cudaStreamSynchronize(stream));
0131 #endif
0132
0133 #ifdef EDM_ML_DEBUG
0134     auto outdata = cms::cuda::make_host_unique<uint8_t[]>(n_strips, stream);
0135     cms::cuda::copyAsync(outdata, stripdata_->alldataGPU_, n_strips, stream);
0136     cudaCheck(cudaStreamSynchronize(stream));
0137
0138     constexpr int xor3bits = 7;
0139     for (size_t i = 0; i < chanlocs->size(); ++i) {
0140       const auto data = chanlocs->input(i);
0141       const auto len = chanlocs->length(i);
0142
0143       if (data != nullptr && len > 0) {
0144         auto aoff = chanlocs->offset(i);
0145         auto choff = chanlocs->inoff(i);
0146         const auto end = choff + len;
0147
0148         while (choff < end) {
0149           const auto stripIndex = data[choff++ ^ xor3bits];
0150           const auto groupLength = data[choff++ ^ xor3bits];
0151           aoff += 2;
0152           for (auto k = 0; k < groupLength; ++k, ++choff, ++aoff) {
0153             if (data[choff ^ xor3bits] != outdata[aoff]) {
0154               LogDebug("SiStripRawToClusterGPUKernel")
0155                   << "Strip mismatch " << stripIndex << " i:k " << i << ":" << k << " "
0156                   << (uint32_t)data[choff ^ xor3bits] << " != " << (uint32_t)outdata[aoff] << std::endl;
0157             }
0158           }
0159         }
0160       }
0161     }
0162     outdata.reset(nullptr);
0163 #endif
0164
0165     fedRawDataGPU.reset();
0166     allocateSSTDataGPU(n_strips, stream);
0167     setSeedStripsNCIndexGPU(condGPU.deviceView(), stream);
0168
0169     clusters_d_ = SiStripClustersCUDADevice(kMaxSeedStrips, maxClusterSize_, stream);
0170     findClusterGPU(condGPU.deviceView(), stream);
0171
0172     stripdata_.reset();
0173   }
0174
0175   SiStripClustersCUDADevice SiStripRawToClusterGPUKernel::getResults(cudaStream_t stream) {
0176     reset();
0177
0178     return std::move(clusters_d_);
0179   }
0180
0181   void SiStripRawToClusterGPUKernel::reset() {
0182     chanlocsGPU_.reset();
0183     sst_data_d_.reset();
0184   }
0185 }  // namespace stripgpu