File indexing completed on 2024-04-06 12:26:27
0001 #include "EventFilter/SiStripRawToDigi/interface/SiStripFEDBuffer.h"
0002 #include "DataFormats/Common/interface/DetSetVectorNew.h"
0003 #include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"
0004 #include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h"
0005 #include "RecoLocalTracker/SiStripClusterizer/interface/ClusterChargeCut.h"
0006
0007 #include "SiStripRawToClusterGPUKernel.h"
0008
0009 #include "CalibFormats/SiStripObjects/interface/SiStripClusterizerConditionsGPU.h"
0010 #include "ChannelLocsGPU.h"
0011 #include "StripDataView.h"
0012
0013 namespace stripgpu {
0014 StripDataGPU::StripDataGPU(size_t size, cudaStream_t stream) {
0015 alldataGPU_ = cms::cuda::make_device_unique<uint8_t[]>(size, stream);
0016 channelGPU_ = cms::cuda::make_device_unique<uint16_t[]>(size, stream);
0017 stripIdGPU_ = cms::cuda::make_device_unique<stripgpu::stripId_t[]>(size, stream);
0018 }
0019
0020 SiStripRawToClusterGPUKernel::SiStripRawToClusterGPUKernel(const edm::ParameterSet& conf)
0021 : fedIndex_(sistrip::NUMBER_OF_FEDS, stripgpu::invalidFed),
0022 channelThreshold_(conf.getParameter<double>("ChannelThreshold")),
0023 seedThreshold_(conf.getParameter<double>("SeedThreshold")),
0024 clusterThresholdSquared_(std::pow(conf.getParameter<double>("ClusterThreshold"), 2.0f)),
0025 maxSequentialHoles_(conf.getParameter<unsigned>("MaxSequentialHoles")),
0026 maxSequentialBad_(conf.getParameter<unsigned>("MaxSequentialBad")),
0027 maxAdjacentBad_(conf.getParameter<unsigned>("MaxAdjacentBad")),
0028 maxClusterSize_(conf.getParameter<unsigned>("MaxClusterSize")),
0029 minGoodCharge_(clusterChargeCut(conf)) {
0030 fedRawDataOffsets_.reserve(sistrip::NUMBER_OF_FEDS);
0031 }
0032
0033 void SiStripRawToClusterGPUKernel::makeAsync(const std::vector<const FEDRawData*>& rawdata,
0034 const std::vector<std::unique_ptr<sistrip::FEDBuffer>>& buffers,
0035 const SiStripClusterizerConditionsGPU& conditions,
0036 cudaStream_t stream) {
0037 size_t totalSize{0};
0038 for (const auto& buff : buffers) {
0039 if (buff != nullptr) {
0040 totalSize += buff->bufferSize();
0041 }
0042 }
0043
0044 auto fedRawDataHost = cms::cuda::make_host_unique<uint8_t[]>(totalSize, stream);
0045 auto fedRawDataGPU = cms::cuda::make_device_unique<uint8_t[]>(totalSize, stream);
0046
0047 size_t off = 0;
0048 fedRawDataOffsets_.clear();
0049 fedIndex_.clear();
0050 fedIndex_.resize(sistrip::NUMBER_OF_FEDS, stripgpu::invalidFed);
0051
0052 sistrip::FEDReadoutMode mode = sistrip::READOUT_MODE_INVALID;
0053
0054 for (size_t fedi = 0; fedi < buffers.size(); ++fedi) {
0055 auto& buff = buffers[fedi];
0056 if (buff != nullptr) {
0057 const auto raw = rawdata[fedi];
0058 memcpy(fedRawDataHost.get() + off, raw->data(), raw->size());
0059 fedIndex_[stripgpu::fedIndex(fedi)] = fedRawDataOffsets_.size();
0060 fedRawDataOffsets_.push_back(off);
0061 off += raw->size();
0062 if (fedRawDataOffsets_.size() == 1) {
0063 mode = buff->readoutMode();
0064 } else {
0065 if (buff->readoutMode() != mode) {
0066 throw cms::Exception("[SiStripRawToClusterGPUKernel] inconsistent readout mode ")
0067 << buff->readoutMode() << " != " << mode;
0068 }
0069 }
0070 }
0071 }
0072
0073 cms::cuda::copyAsync(fedRawDataGPU, fedRawDataHost, totalSize, stream);
0074
0075 const auto& detmap = conditions.detToFeds();
0076 if ((mode != sistrip::READOUT_MODE_ZERO_SUPPRESSED) && (mode != sistrip::READOUT_MODE_ZERO_SUPPRESSED_LITE10)) {
0077 throw cms::Exception("[SiStripRawToClusterGPUKernel] unsupported readout mode ") << mode;
0078 }
0079 const uint16_t headerlen = mode == sistrip::READOUT_MODE_ZERO_SUPPRESSED ? 7 : 2;
0080 size_t offset = 0;
0081 auto chanlocs = std::make_unique<ChannelLocs>(detmap.size(), stream);
0082 auto inputGPU = cms::cuda::make_host_unique<const uint8_t*[]>(chanlocs->size(), stream);
0083
0084
0085
0086 for (size_t i = 0; i < detmap.size(); ++i) {
0087 const auto& detp = detmap[i];
0088 const auto fedId = detp.fedID();
0089 const auto fedCh = detp.fedCh();
0090 const auto fedi = fedIndex_[stripgpu::fedIndex(fedId)];
0091
0092 if (fedi != invalidFed) {
0093 const auto buffer = buffers[fedId].get();
0094 const auto& channel = buffer->channel(detp.fedCh());
0095
0096 auto len = channel.length();
0097 auto off = channel.offset();
0098
0099 assert(len >= headerlen || len == 0);
0100
0101 if (len >= headerlen) {
0102 len -= headerlen;
0103 off += headerlen;
0104 }
0105
0106 chanlocs->setChannelLoc(i, channel.data(), off, offset, len, fedId, fedCh, detp.detID());
0107 inputGPU[i] = fedRawDataGPU.get() + fedRawDataOffsets_[fedi] + (channel.data() - rawdata[fedId]->data());
0108 offset += len;
0109
0110 } else {
0111 chanlocs->setChannelLoc(i, nullptr, 0, 0, 0, invalidFed, 0, invalidDet);
0112 inputGPU[i] = nullptr;
0113 }
0114 }
0115
0116 const auto n_strips = offset;
0117
0118 sst_data_d_ = cms::cuda::make_host_unique<StripDataView>(stream);
0119 sst_data_d_->nStrips = n_strips;
0120
0121 chanlocsGPU_ = std::make_unique<ChannelLocsGPU>(detmap.size(), stream);
0122 chanlocsGPU_->setVals(chanlocs.get(), std::move(inputGPU), stream);
0123
0124 stripdata_ = std::make_unique<StripDataGPU>(n_strips, stream);
0125
0126 const auto& condGPU = conditions.getGPUProductAsync(stream);
0127
0128 unpackChannelsGPU(condGPU.deviceView(), stream);
0129 #ifdef GPU_CHECK
0130 cudaCheck(cudaStreamSynchronize(stream));
0131 #endif
0132
0133 #ifdef EDM_ML_DEBUG
0134 auto outdata = cms::cuda::make_host_unique<uint8_t[]>(n_strips, stream);
0135 cms::cuda::copyAsync(outdata, stripdata_->alldataGPU_, n_strips, stream);
0136 cudaCheck(cudaStreamSynchronize(stream));
0137
0138 constexpr int xor3bits = 7;
0139 for (size_t i = 0; i < chanlocs->size(); ++i) {
0140 const auto data = chanlocs->input(i);
0141 const auto len = chanlocs->length(i);
0142
0143 if (data != nullptr && len > 0) {
0144 auto aoff = chanlocs->offset(i);
0145 auto choff = chanlocs->inoff(i);
0146 const auto end = choff + len;
0147
0148 while (choff < end) {
0149 const auto stripIndex = data[choff++ ^ xor3bits];
0150 const auto groupLength = data[choff++ ^ xor3bits];
0151 aoff += 2;
0152 for (auto k = 0; k < groupLength; ++k, ++choff, ++aoff) {
0153 if (data[choff ^ xor3bits] != outdata[aoff]) {
0154 LogDebug("SiStripRawToClusterGPUKernel")
0155 << "Strip mismatch " << stripIndex << " i:k " << i << ":" << k << " "
0156 << (uint32_t)data[choff ^ xor3bits] << " != " << (uint32_t)outdata[aoff] << std::endl;
0157 }
0158 }
0159 }
0160 }
0161 }
0162 outdata.reset(nullptr);
0163 #endif
0164
0165 fedRawDataGPU.reset();
0166 allocateSSTDataGPU(n_strips, stream);
0167 setSeedStripsNCIndexGPU(condGPU.deviceView(), stream);
0168
0169 clusters_d_ = SiStripClustersCUDADevice(kMaxSeedStrips, maxClusterSize_, stream);
0170 findClusterGPU(condGPU.deviceView(), stream);
0171
0172 stripdata_.reset();
0173 }
0174
0175 SiStripClustersCUDADevice SiStripRawToClusterGPUKernel::getResults(cudaStream_t stream) {
0176 reset();
0177
0178 return std::move(clusters_d_);
0179 }
0180
0181 void SiStripRawToClusterGPUKernel::reset() {
0182 chanlocsGPU_.reset();
0183 sst_data_d_.reset();
0184 }
0185 }