File indexing completed on 2025-01-22 07:34:22
0001 #ifndef RecoLocalCalo_EcalRecProducers_plugins_alpaka_AmplitudeComputationCommonKernels_h
0002 #define RecoLocalCalo_EcalRecProducers_plugins_alpaka_AmplitudeComputationCommonKernels_h
0003
0004 #include <cstdlib>
0005 #include <limits>
0006 #include <alpaka/alpaka.hpp>
0007
0008 #include "CondFormats/EcalObjects/interface/alpaka/EcalMultifitConditionsDevice.h"
0009 #include "DataFormats/EcalDigi/interface/alpaka/EcalDigiDeviceCollection.h"
0010 #include "DataFormats/EcalRecHit/interface/alpaka/EcalUncalibratedRecHitDeviceCollection.h"
0011 #include "CondFormats/EcalObjects/interface/EcalPulseShapes.h"
0012 #include "DataFormats/EcalDigi/interface/EcalDataFrame.h"
0013 #include "DataFormats/EcalDigi/interface/EcalMGPASample.h"
0014 #include "DataFormats/EcalRecHit/interface/EcalUncalibratedRecHit.h"
0015 #include "FWCore/Utilities/interface/CMSUnrollLoop.h"
0016 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0017 #include "RecoLocalCalo/EcalRecProducers/interface/EigenMatrixTypes_gpu.h"
0018
0019 #include "DeclsForKernels.h"
0020 #include "KernelHelpers.h"
0021
0022 namespace ALPAKA_ACCELERATOR_NAMESPACE::ecal::multifit {
0023
0024
0025
0026
0027
0028
0029 class Kernel_prep_1d_and_initialize {
0030 public:
0031 ALPAKA_FN_ACC void operator()(Acc1D const& acc,
0032 EcalDigiDeviceCollection::ConstView digisDevEB,
0033 EcalDigiDeviceCollection::ConstView digisDevEE,
0034 EcalUncalibratedRecHitDeviceCollection::View uncalibRecHitsEB,
0035 EcalUncalibratedRecHitDeviceCollection::View uncalibRecHitsEE,
0036 EcalMultifitConditionsDevice::ConstView conditionsDev,
0037 ::ecal::multifit::SampleVector* amplitudes,
0038 ::ecal::multifit::SampleGainVector* gainsNoise,
0039 bool* hasSwitchToGain6,
0040 bool* hasSwitchToGain1,
0041 bool* isSaturated,
0042 char* acState,
0043 ::ecal::multifit::BXVectorType* bxs,
0044 bool const gainSwitchUseMaxSampleEB,
0045 bool const gainSwitchUseMaxSampleEE) const {
0046 constexpr bool dynamicPedestal = false;
0047 constexpr auto nsamples = EcalDataFrame::MAXSAMPLES;
0048 constexpr int sample_max = 5;
0049 constexpr int full_pulse_max = 9;
0050 auto const offsetForHashes = conditionsDev.offsetEE();
0051
0052 auto const nchannelsEB = digisDevEB.size();
0053 auto const nchannelsEE = digisDevEE.size();
0054 auto const nchannels = nchannelsEB + nchannelsEE;
0055 auto const totalElements = nchannels * nsamples;
0056
0057 auto const elemsPerBlock = alpaka::getWorkDiv<alpaka::Block, alpaka::Elems>(acc)[0u];
0058
0059 char* shared_mem = alpaka::getDynSharedMem<char>(acc);
0060 auto* shr_hasSwitchToGain6 = reinterpret_cast<bool*>(shared_mem);
0061 auto* shr_hasSwitchToGain1 = shr_hasSwitchToGain6 + elemsPerBlock;
0062 auto* shr_hasSwitchToGain0 = shr_hasSwitchToGain1 + elemsPerBlock;
0063 auto* shr_isSaturated = shr_hasSwitchToGain0 + elemsPerBlock;
0064 auto* shr_hasSwitchToGain0_tmp = shr_isSaturated + elemsPerBlock;
0065 auto* shr_counts = reinterpret_cast<char*>(shr_hasSwitchToGain0_tmp) + elemsPerBlock;
0066
0067 for (auto block : cms::alpakatools::uniform_groups(acc, totalElements)) {
0068 for (auto idx : cms::alpakatools::uniform_group_elements(acc, block, totalElements)) {
0069
0070 if (idx.global == 0) {
0071 uncalibRecHitsEB.size() = nchannelsEB;
0072 uncalibRecHitsEE.size() = nchannelsEE;
0073 }
0074
0075 auto const ch = idx.global / nsamples;
0076
0077 int const inputTx = ch >= nchannelsEB ? idx.global - nchannelsEB * nsamples : idx.global;
0078
0079 auto const* digis_in = ch >= nchannelsEB ? digisDevEE.data()->data() : digisDevEB.data()->data();
0080 auto const gainId = ecalMGPA::gainId(digis_in[inputTx]);
0081
0082
0083 shr_hasSwitchToGain6[idx.local] = gainId == EcalMgpaBitwiseGain6;
0084 shr_hasSwitchToGain1[idx.local] = gainId == EcalMgpaBitwiseGain1;
0085 shr_hasSwitchToGain0_tmp[idx.local] = gainId == EcalMgpaBitwiseGain0;
0086 shr_hasSwitchToGain0[idx.local] = shr_hasSwitchToGain0_tmp[idx.local];
0087 shr_counts[idx.local] = 0;
0088 }
0089
0090 alpaka::syncBlockThreads(acc);
0091
0092 for (auto idx : cms::alpakatools::uniform_group_elements(acc, block, totalElements)) {
0093 auto const sample = idx.local % nsamples;
0094
0095
0096 if (idx.local <= elemsPerBlock - 5) {
0097 CMS_UNROLL_LOOP
0098 for (int i = 0; i < 5; ++i)
0099 shr_counts[idx.local] += shr_hasSwitchToGain0[idx.local + i];
0100 }
0101 shr_isSaturated[idx.local] = shr_counts[idx.local] == 5;
0102
0103
0104
0105
0106 if (sample < 5) {
0107 shr_hasSwitchToGain6[idx.local] = shr_hasSwitchToGain6[idx.local] || shr_hasSwitchToGain6[idx.local + 5];
0108 shr_hasSwitchToGain1[idx.local] = shr_hasSwitchToGain1[idx.local] || shr_hasSwitchToGain1[idx.local + 5];
0109
0110
0111
0112 shr_hasSwitchToGain0_tmp[idx.local] =
0113 shr_hasSwitchToGain0_tmp[idx.local] || shr_hasSwitchToGain0_tmp[idx.local + 5];
0114 }
0115 }
0116
0117 alpaka::syncBlockThreads(acc);
0118
0119 for (auto idx : cms::alpakatools::uniform_group_elements(acc, block, totalElements)) {
0120 auto const sample = idx.local % nsamples;
0121
0122 if (sample < 2) {
0123
0124 shr_hasSwitchToGain6[idx.local] = shr_hasSwitchToGain6[idx.local] || shr_hasSwitchToGain6[idx.local + 2] ||
0125 shr_hasSwitchToGain6[idx.local + 3];
0126 shr_hasSwitchToGain1[idx.local] = shr_hasSwitchToGain1[idx.local] || shr_hasSwitchToGain1[idx.local + 2] ||
0127 shr_hasSwitchToGain1[idx.local + 3];
0128
0129 shr_hasSwitchToGain0_tmp[idx.local] = shr_hasSwitchToGain0_tmp[idx.local] ||
0130 shr_hasSwitchToGain0_tmp[idx.local + 2] ||
0131 shr_hasSwitchToGain0_tmp[idx.local + 3];
0132
0133
0134
0135
0136 shr_isSaturated[idx.local] = shr_isSaturated[idx.local + 3] || shr_isSaturated[idx.local + 4];
0137 }
0138 }
0139
0140 alpaka::syncBlockThreads(acc);
0141
0142 for (auto idx : cms::alpakatools::uniform_group_elements(acc, block, totalElements)) {
0143 auto const ch = idx.global / nsamples;
0144 auto const sample = idx.local % nsamples;
0145
0146 if (sample == 0) {
0147 shr_hasSwitchToGain6[idx.local] = shr_hasSwitchToGain6[idx.local] || shr_hasSwitchToGain6[idx.local + 1];
0148 shr_hasSwitchToGain1[idx.local] = shr_hasSwitchToGain1[idx.local] || shr_hasSwitchToGain1[idx.local + 1];
0149 shr_hasSwitchToGain0_tmp[idx.local] =
0150 shr_hasSwitchToGain0_tmp[idx.local] || shr_hasSwitchToGain0_tmp[idx.local + 1];
0151
0152 hasSwitchToGain6[ch] = shr_hasSwitchToGain6[idx.local];
0153 hasSwitchToGain1[ch] = shr_hasSwitchToGain1[idx.local];
0154
0155 shr_isSaturated[idx.local + 3] = shr_isSaturated[idx.local] || shr_isSaturated[idx.local + 1];
0156 isSaturated[ch] = shr_isSaturated[idx.local + 3];
0157 }
0158 }
0159
0160
0161
0162
0163 alpaka::syncBlockThreads(acc);
0164
0165 for (auto idx : cms::alpakatools::uniform_group_elements(acc, block, totalElements)) {
0166 auto const ch = idx.global / nsamples;
0167 auto const sample = idx.local % nsamples;
0168
0169
0170 int const inputCh = ch >= nchannelsEB ? ch - nchannelsEB : ch;
0171 int const inputTx = ch >= nchannelsEB ? idx.global - nchannelsEB * nsamples : idx.global;
0172
0173 auto const* dids = ch >= nchannelsEB ? digisDevEE.id() : digisDevEB.id();
0174 auto const did = DetId{dids[inputCh]};
0175 auto const isBarrel = did.subdetId() == EcalBarrel;
0176
0177 auto const hashedId = isBarrel ? reconstruction::hashedIndexEB(did.rawId())
0178 : offsetForHashes + reconstruction::hashedIndexEE(did.rawId());
0179
0180
0181 auto const* digis_in = ch >= nchannelsEB ? digisDevEE.data()->data() : digisDevEB.data()->data();
0182
0183 auto* amplitudesForMinimization = reinterpret_cast<::ecal::multifit::SampleVector*>(
0184 ch >= nchannelsEB ? uncalibRecHitsEE.outOfTimeAmplitudes()->data()
0185 : uncalibRecHitsEB.outOfTimeAmplitudes()->data());
0186 auto* energies = ch >= nchannelsEB ? uncalibRecHitsEE.amplitude() : uncalibRecHitsEB.amplitude();
0187 auto* chi2 = ch >= nchannelsEB ? uncalibRecHitsEE.chi2() : uncalibRecHitsEB.chi2();
0188 auto* g_pedestal = ch >= nchannelsEB ? uncalibRecHitsEE.pedestal() : uncalibRecHitsEB.pedestal();
0189 auto* dids_out = ch >= nchannelsEB ? uncalibRecHitsEE.id() : uncalibRecHitsEB.id();
0190 auto* flags = ch >= nchannelsEB ? uncalibRecHitsEE.flags() : uncalibRecHitsEB.flags();
0191
0192 auto const adc = ecalMGPA::adc(digis_in[inputTx]);
0193 auto const gainId = ecalMGPA::gainId(digis_in[inputTx]);
0194 ::ecal::multifit::SampleVector::Scalar amplitude = 0.;
0195 ::ecal::multifit::SampleVector::Scalar pedestal = 0.;
0196 ::ecal::multifit::SampleVector::Scalar gainratio = 0.;
0197
0198
0199 if (gainId == 0 || gainId == 3) {
0200 pedestal = conditionsDev.pedestals_mean_x1()[hashedId];
0201 gainratio = conditionsDev.gain6Over1()[hashedId] * conditionsDev.gain12Over6()[hashedId];
0202 gainsNoise[ch](sample) = 2;
0203 } else if (gainId == 1) {
0204 pedestal = conditionsDev.pedestals_mean_x12()[hashedId];
0205 gainratio = 1.;
0206 gainsNoise[ch](sample) = 0;
0207 } else if (gainId == 2) {
0208 pedestal = conditionsDev.pedestals_mean_x6()[hashedId];
0209 gainratio = conditionsDev.gain12Over6()[hashedId];
0210 gainsNoise[ch](sample) = 1;
0211 }
0212
0213
0214 if (dynamicPedestal)
0215 amplitude = static_cast<::ecal::multifit::SampleVector::Scalar>(adc) * gainratio;
0216 else
0217 amplitude = (static_cast<::ecal::multifit::SampleVector::Scalar>(adc) - pedestal) * gainratio;
0218 amplitudes[ch][sample] = amplitude;
0219
0220 #ifdef ECAL_RECO_ALPAKA_DEBUG
0221 printf("%d %d %d %d %f %f %f\n", idx.global, ch, sample, adc, amplitude, pedestal, gainratio);
0222 if (adc == 0)
0223 printf("adc is zero\n");
0224 #endif
0225
0226
0227
0228
0229 amplitudesForMinimization[inputCh](sample) = 0;
0230 bxs[ch](sample) = sample - 5;
0231
0232
0233
0234 if (sample == sample_max) {
0235
0236
0237
0238 acState[ch] = static_cast<char>(MinimizationState::NotFinished);
0239 energies[inputCh] = 0;
0240 chi2[inputCh] = 0;
0241 g_pedestal[inputCh] = 0;
0242 uint32_t flag = 0;
0243 dids_out[inputCh] = did.rawId();
0244
0245
0246 auto const chStart = idx.local - sample_max;
0247
0248 auto const threadMax = idx.local;
0249 auto const gainSwitchUseMaxSample = isBarrel ? gainSwitchUseMaxSampleEB : gainSwitchUseMaxSampleEE;
0250
0251
0252 if (shr_hasSwitchToGain6[chStart])
0253 flag |= 0x1 << EcalUncalibratedRecHit::kHasSwitchToGain6;
0254 if (shr_hasSwitchToGain1[chStart])
0255 flag |= 0x1 << EcalUncalibratedRecHit::kHasSwitchToGain1;
0256
0257
0258
0259
0260 if (sample == 0 && shr_hasSwitchToGain0_tmp[idx.local]) {
0261
0262
0263 energies[inputCh] = amplitude;
0264
0265
0266 bool saturated_before_max = false;
0267 CMS_UNROLL_LOOP
0268 for (char ii = 0; ii < 5; ++ii)
0269 saturated_before_max = saturated_before_max || shr_hasSwitchToGain0[chStart + ii];
0270
0271
0272 if (!saturated_before_max && shr_hasSwitchToGain0[threadMax])
0273 energies[inputCh] = 49140;
0274
0275
0276
0277
0278
0279 acState[ch] = static_cast<char>(MinimizationState::Precomputed);
0280 flag |= 0x1 << EcalUncalibratedRecHit::kSaturated;
0281 flags[inputCh] = flag;
0282 continue;
0283 }
0284
0285
0286
0287 auto const max_amplitude = amplitude;
0288
0289 auto shape_value = conditionsDev.pulseShapes()[hashedId][full_pulse_max - 7];
0290
0291 bool hasGainSwitch =
0292 shr_hasSwitchToGain6[chStart] || shr_hasSwitchToGain1[chStart] || shr_isSaturated[chStart + 3];
0293
0294
0295 g_pedestal[inputCh] = pedestal;
0296 if (hasGainSwitch && gainSwitchUseMaxSample) {
0297
0298 energies[inputCh] = max_amplitude / shape_value;
0299 acState[ch] = static_cast<char>(MinimizationState::Precomputed);
0300 flags[inputCh] = flag;
0301 continue;
0302 }
0303
0304
0305 auto const rmsForChecking = conditionsDev.pedestals_rms_x12()[hashedId];
0306
0307
0308
0309
0310 if (rmsForChecking == 0) {
0311 acState[ch] = static_cast<char>(MinimizationState::Precomputed);
0312 flags[inputCh] = flag;
0313 continue;
0314 }
0315
0316
0317 flags[inputCh] = flag;
0318 }
0319 }
0320 }
0321 }
0322 };
0323
0324
0325
0326
0327
0328 class Kernel_prep_2d {
0329 public:
0330 ALPAKA_FN_ACC void operator()(Acc2D const& acc,
0331 EcalDigiDeviceCollection::ConstView digisDevEB,
0332 EcalDigiDeviceCollection::ConstView digisDevEE,
0333 EcalMultifitConditionsDevice::ConstView conditionsDev,
0334 ::ecal::multifit::SampleGainVector const* gainsNoise,
0335 ::ecal::multifit::SampleMatrix* noisecov,
0336 ::ecal::multifit::PulseMatrixType* pulse_matrix,
0337 bool const* hasSwitchToGain6,
0338 bool const* hasSwitchToGain1,
0339 bool const* isSaturated) const {
0340 constexpr auto nsamples = EcalDataFrame::MAXSAMPLES;
0341 auto const offsetForHashes = conditionsDev.offsetEE();
0342 auto const nchannelsEB = digisDevEB.size();
0343 constexpr float addPedestalUncertainty = 0.f;
0344 constexpr bool dynamicPedestal = false;
0345 constexpr bool simplifiedNoiseModelForGainSwitch = true;
0346
0347
0348 auto const* pulse_shapes = reinterpret_cast<const EcalPulseShape*>(conditionsDev.pulseShapes()->data());
0349
0350 auto const blockDimX = alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[1u];
0351 auto const elemsPerBlockX = alpaka::getWorkDiv<alpaka::Block, alpaka::Elems>(acc)[1u];
0352 auto const elemsPerBlockY = alpaka::getWorkDiv<alpaka::Block, alpaka::Elems>(acc)[0u];
0353 Vec2D const size_2d = {elemsPerBlockY, blockDimX * elemsPerBlockX};
0354
0355 for (auto ndindex : cms::alpakatools::uniform_elements_nd(acc, size_2d)) {
0356 auto const ch = ndindex[1] / nsamples;
0357 auto const tx = ndindex[1] % nsamples;
0358 auto const ty = ndindex[0];
0359
0360
0361 int const inputCh = ch >= nchannelsEB ? ch - nchannelsEB : ch;
0362 auto const* dids = ch >= nchannelsEB ? digisDevEE.id() : digisDevEB.id();
0363
0364 auto const did = DetId{dids[inputCh]};
0365 auto const isBarrel = did.subdetId() == EcalBarrel;
0366 auto const hashedId = isBarrel ? ecal::reconstruction::hashedIndexEB(did.rawId())
0367 : offsetForHashes + ecal::reconstruction::hashedIndexEE(did.rawId());
0368 auto const* G12SamplesCorrelation = isBarrel ? conditionsDev.sampleCorrelation_EB_G12().data()
0369 : conditionsDev.sampleCorrelation_EE_G12().data();
0370 auto const* G6SamplesCorrelation =
0371 isBarrel ? conditionsDev.sampleCorrelation_EB_G6().data() : conditionsDev.sampleCorrelation_EE_G6().data();
0372 auto const* G1SamplesCorrelation =
0373 isBarrel ? conditionsDev.sampleCorrelation_EB_G1().data() : conditionsDev.sampleCorrelation_EE_G1().data();
0374 auto const hasGainSwitch = hasSwitchToGain6[ch] || hasSwitchToGain1[ch] || isSaturated[ch];
0375
0376 auto const vidx = std::abs(static_cast<int>(ty) - static_cast<int>(tx));
0377
0378
0379 if (hasGainSwitch) {
0380
0381 float noise_value = 0;
0382
0383
0384
0385
0386
0387 if (simplifiedNoiseModelForGainSwitch) {
0388 constexpr int isample_max = 5;
0389 auto const gainidx = gainsNoise[ch][isample_max];
0390
0391
0392 if (gainidx == 0) {
0393 auto const rms_x12 = conditionsDev.pedestals_rms_x12()[hashedId];
0394 noise_value = rms_x12 * rms_x12 * G12SamplesCorrelation[vidx];
0395 } else if (gainidx == 1) {
0396 auto const gain12Over6 = conditionsDev.gain12Over6()[hashedId];
0397 auto const rms_x6 = conditionsDev.pedestals_rms_x6()[hashedId];
0398 noise_value = gain12Over6 * gain12Over6 * rms_x6 * rms_x6 * G6SamplesCorrelation[vidx];
0399 } else if (gainidx == 2) {
0400 auto const gain12Over6 = conditionsDev.gain12Over6()[hashedId];
0401 auto const gain6Over1 = conditionsDev.gain6Over1()[hashedId];
0402 auto const gain12Over1 = gain12Over6 * gain6Over1;
0403 auto const rms_x1 = conditionsDev.pedestals_rms_x1()[hashedId];
0404 noise_value = gain12Over1 * gain12Over1 * rms_x1 * rms_x1 * G1SamplesCorrelation[vidx];
0405 }
0406 if (!dynamicPedestal && addPedestalUncertainty > 0.f)
0407 noise_value += addPedestalUncertainty * addPedestalUncertainty;
0408 } else {
0409 int gainidx = 0;
0410 char mask = gainidx;
0411 int pedestal = gainsNoise[ch][ty] == mask ? 1 : 0;
0412
0413 auto const rms_x12 = conditionsDev.pedestals_rms_x12()[hashedId];
0414 noise_value += rms_x12 * rms_x12 * pedestal * G12SamplesCorrelation[vidx];
0415
0416 if (!dynamicPedestal && addPedestalUncertainty > 0.f) {
0417 noise_value += addPedestalUncertainty * addPedestalUncertainty * pedestal;
0418 }
0419
0420
0421 gainidx = 1;
0422 mask = gainidx;
0423 pedestal = gainsNoise[ch][ty] == mask ? 1 : 0;
0424 auto const gain12Over6 = conditionsDev.gain12Over6()[hashedId];
0425 auto const rms_x6 = conditionsDev.pedestals_rms_x6()[hashedId];
0426 noise_value += gain12Over6 * gain12Over6 * rms_x6 * rms_x6 * pedestal * G6SamplesCorrelation[vidx];
0427
0428 if (!dynamicPedestal && addPedestalUncertainty > 0.f) {
0429 noise_value += gain12Over6 * gain12Over6 * addPedestalUncertainty * addPedestalUncertainty * pedestal;
0430 }
0431
0432
0433 gainidx = 2;
0434 mask = gainidx;
0435 pedestal = gainsNoise[ch][ty] == mask ? 1 : 0;
0436 auto const gain6Over1 = conditionsDev.gain6Over1()[hashedId];
0437 auto const gain12Over1 = gain12Over6 * gain6Over1;
0438 auto const rms_x1 = conditionsDev.pedestals_rms_x1()[hashedId];
0439 noise_value += gain12Over1 * gain12Over1 * rms_x1 * rms_x1 * pedestal * G1SamplesCorrelation[vidx];
0440
0441 if (!dynamicPedestal && addPedestalUncertainty > 0.f) {
0442 noise_value += gain12Over1 * gain12Over1 * addPedestalUncertainty * addPedestalUncertainty * pedestal;
0443 }
0444 }
0445
0446 noisecov[ch](ty, tx) = noise_value;
0447 } else {
0448 auto const rms = conditionsDev.pedestals_rms_x12()[hashedId];
0449 float noise_value = rms * rms * G12SamplesCorrelation[vidx];
0450 if (!dynamicPedestal && addPedestalUncertainty > 0.f) {
0451
0452 noise_value += addPedestalUncertainty * addPedestalUncertainty;
0453 }
0454 noisecov[ch](ty, tx) = noise_value;
0455 }
0456
0457 auto const posToAccess = 9 - static_cast<int>(tx) + static_cast<int>(ty);
0458 float const value = posToAccess >= 7 ? pulse_shapes[hashedId].pdfval[posToAccess - 7] : 0;
0459 pulse_matrix[ch](ty, tx) = value;
0460 }
0461 }
0462 };
0463
0464 }
0465
0466 namespace alpaka::trait {
0467 using namespace ALPAKA_ACCELERATOR_NAMESPACE;
0468 using namespace ALPAKA_ACCELERATOR_NAMESPACE::ecal::multifit;
0469
0470
0471 template <>
0472 struct BlockSharedMemDynSizeBytes<Kernel_prep_1d_and_initialize, Acc1D> {
0473
0474 template <typename TVec, typename... TArgs>
0475 ALPAKA_FN_HOST_ACC static auto getBlockSharedMemDynSizeBytes(Kernel_prep_1d_and_initialize const&,
0476 TVec const& threadsPerBlock,
0477 TVec const& elemsPerThread,
0478 TArgs const&...) -> std::size_t {
0479
0480 std::size_t bytes = threadsPerBlock[0u] * elemsPerThread[0u] * (5 * sizeof(bool) + sizeof(char));
0481 return bytes;
0482 }
0483 };
0484 }
0485
0486 #endif