File indexing completed on 2025-01-14 23:17:06
0001 #include <iostream>
0002 #include <limits>
0003 #include <alpaka/alpaka.hpp>
0004
0005 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0006
0007 #include "AmplitudeComputationCommonKernels.h"
0008 #include "AmplitudeComputationKernels.h"
0009 #include "EcalUncalibRecHitMultiFitAlgoPortable.h"
0010 #include "TimeComputationKernels.h"
0011
0012
0013
0014
0015 namespace ALPAKA_ACCELERATOR_NAMESPACE::ecal::multifit {
0016
0017 using namespace cms::alpakatools;
0018
0019 void launchKernels(Queue& queue,
0020 InputProduct const& digisDevEB,
0021 InputProduct const& digisDevEE,
0022 OutputProduct& uncalibRecHitsDevEB,
0023 OutputProduct& uncalibRecHitsDevEE,
0024 EcalMultifitConditionsDevice const& conditionsDev,
0025 EcalMultifitParameters const* paramsDev,
0026 ConfigurationParameters const& configParams) {
0027 using digis_type = std::vector<uint16_t>;
0028 using dids_type = std::vector<uint32_t>;
0029
0030 bool constexpr gainSwitchUseMaxSampleEB = true;
0031
0032 bool constexpr gainSwitchUseMaxSampleEE = false;
0033 auto constexpr kMaxSamples = EcalDataFrame::MAXSAMPLES;
0034
0035 auto const ebSize = static_cast<uint32_t>(uncalibRecHitsDevEB.const_view().metadata().size());
0036 auto const totalChannels = ebSize + static_cast<uint32_t>(uncalibRecHitsDevEE.const_view().metadata().size());
0037
0038 EventDataForScratchDevice scratch(configParams, totalChannels, queue);
0039
0040
0041
0042
0043 uint32_t constexpr nchannels_per_block = 32;
0044 auto constexpr threads_1d = kMaxSamples * nchannels_per_block;
0045 auto const blocks_1d = cms::alpakatools::divide_up_by(totalChannels * kMaxSamples, threads_1d);
0046 auto workDivPrep1D = cms::alpakatools::make_workdiv<Acc1D>(blocks_1d, threads_1d);
0047
0048
0049 alpaka::exec<Acc1D>(queue,
0050 workDivPrep1D,
0051 Kernel_prep_1d_and_initialize{},
0052 digisDevEB.const_view(),
0053 digisDevEE.const_view(),
0054 uncalibRecHitsDevEB.view(),
0055 uncalibRecHitsDevEE.view(),
0056 conditionsDev.const_view(),
0057 reinterpret_cast<::ecal::multifit::SampleVector*>(scratch.samplesDevBuf.data()),
0058 reinterpret_cast<::ecal::multifit::SampleGainVector*>(scratch.gainsNoiseDevBuf.data()),
0059 scratch.hasSwitchToGain6DevBuf.data(),
0060 scratch.hasSwitchToGain1DevBuf.data(),
0061 scratch.isSaturatedDevBuf.data(),
0062 scratch.acStateDevBuf.data(),
0063 reinterpret_cast<::ecal::multifit::BXVectorType*>(scratch.activeBXsDevBuf.data()),
0064 gainSwitchUseMaxSampleEB,
0065 gainSwitchUseMaxSampleEE);
0066
0067
0068
0069
0070 Vec2D const blocks_2d{1u, totalChannels};
0071 Vec2D const threads_2d{kMaxSamples, kMaxSamples};
0072 auto workDivPrep2D = cms::alpakatools::make_workdiv<Acc2D>(blocks_2d, threads_2d);
0073 alpaka::exec<Acc2D>(queue,
0074 workDivPrep2D,
0075 Kernel_prep_2d{},
0076 digisDevEB.const_view(),
0077 digisDevEE.const_view(),
0078 conditionsDev.const_view(),
0079 reinterpret_cast<::ecal::multifit::SampleGainVector*>(scratch.gainsNoiseDevBuf.data()),
0080 reinterpret_cast<::ecal::multifit::SampleMatrix*>(scratch.noisecovDevBuf.data()),
0081 reinterpret_cast<::ecal::multifit::PulseMatrixType*>(scratch.pulse_matrixDevBuf.data()),
0082 scratch.hasSwitchToGain6DevBuf.data(),
0083 scratch.hasSwitchToGain1DevBuf.data(),
0084 scratch.isSaturatedDevBuf.data());
0085
0086
0087 minimization_procedure(queue,
0088 digisDevEB,
0089 digisDevEE,
0090 uncalibRecHitsDevEB,
0091 uncalibRecHitsDevEE,
0092 scratch,
0093 conditionsDev,
0094 configParams,
0095 totalChannels);
0096
0097 if (configParams.shouldRunTimingComputation) {
0098
0099
0100
0101
0102 auto const blocks_time_init = blocks_1d;
0103 auto const threads_time_init = threads_1d;
0104 auto workDivTimeCompInit1D = cms::alpakatools::make_workdiv<Acc1D>(blocks_time_init, threads_time_init);
0105 alpaka::exec<Acc1D>(queue,
0106 workDivTimeCompInit1D,
0107 Kernel_time_computation_init{},
0108 digisDevEB.const_view(),
0109 digisDevEE.const_view(),
0110 conditionsDev.const_view(),
0111 scratch.sample_valuesDevBuf.value().data(),
0112 scratch.sample_value_errorsDevBuf.value().data(),
0113 scratch.ampMaxErrorDevBuf.value().data(),
0114 scratch.useless_sample_valuesDevBuf.value().data(),
0115 scratch.pedestal_numsDevBuf.value().data());
0116
0117
0118
0119
0120
0121 if (ebSize > 0) {
0122
0123
0124 auto const threadsFixMGPA = threads_1d;
0125 auto const blocksFixMGPA = cms::alpakatools::divide_up_by(kMaxSamples * ebSize, threadsFixMGPA);
0126 auto workDivTimeFixMGPAslew1D = cms::alpakatools::make_workdiv<Acc1D>(blocksFixMGPA, threadsFixMGPA);
0127 alpaka::exec<Acc1D>(queue,
0128 workDivTimeFixMGPAslew1D,
0129 Kernel_time_compute_fixMGPAslew{},
0130 digisDevEB.const_view(),
0131 digisDevEE.const_view(),
0132 conditionsDev.const_view(),
0133 scratch.sample_valuesDevBuf.value().data(),
0134 scratch.sample_value_errorsDevBuf.value().data(),
0135 scratch.useless_sample_valuesDevBuf.value().data());
0136 }
0137
0138 auto const threads_nullhypot = threads_1d;
0139 auto const blocks_nullhypot = blocks_1d;
0140 auto workDivTimeNullhypot1D = cms::alpakatools::make_workdiv<Acc1D>(blocks_nullhypot, threads_nullhypot);
0141 alpaka::exec<Acc1D>(queue,
0142 workDivTimeNullhypot1D,
0143 Kernel_time_compute_nullhypot{},
0144 scratch.sample_valuesDevBuf.value().data(),
0145 scratch.sample_value_errorsDevBuf.value().data(),
0146 scratch.useless_sample_valuesDevBuf.value().data(),
0147 scratch.chi2sNullHypotDevBuf.value().data(),
0148 scratch.sum0sNullHypotDevBuf.value().data(),
0149 scratch.sumAAsNullHypotDevBuf.value().data(),
0150 totalChannels);
0151
0152 constexpr uint32_t nchannels_per_block_makeratio = kMaxSamples;
0153 constexpr auto nthreads_per_channel =
0154 nchannels_per_block_makeratio * (nchannels_per_block_makeratio - 1) / 2;
0155 constexpr auto threads_makeratio = nthreads_per_channel * nchannels_per_block_makeratio;
0156 auto const blocks_makeratio =
0157 cms::alpakatools::divide_up_by(nthreads_per_channel * totalChannels, threads_makeratio);
0158 auto workDivTimeMakeRatio1D = cms::alpakatools::make_workdiv<Acc1D>(blocks_makeratio, threads_makeratio);
0159 alpaka::exec<Acc1D>(queue,
0160 workDivTimeMakeRatio1D,
0161 Kernel_time_compute_makeratio{},
0162 digisDevEB.const_view(),
0163 digisDevEE.const_view(),
0164 scratch.sample_valuesDevBuf.value().data(),
0165 scratch.sample_value_errorsDevBuf.value().data(),
0166 scratch.useless_sample_valuesDevBuf.value().data(),
0167 scratch.pedestal_numsDevBuf.value().data(),
0168 scratch.sumAAsNullHypotDevBuf.value().data(),
0169 scratch.sum0sNullHypotDevBuf.value().data(),
0170 scratch.tMaxAlphaBetasDevBuf.value().data(),
0171 scratch.tMaxErrorAlphaBetasDevBuf.value().data(),
0172 scratch.accTimeMaxDevBuf.value().data(),
0173 scratch.accTimeWgtDevBuf.value().data(),
0174 scratch.tcStateDevBuf.value().data(),
0175 paramsDev,
0176 configParams.timeFitLimitsFirstEB,
0177 configParams.timeFitLimitsFirstEE,
0178 configParams.timeFitLimitsSecondEB,
0179 configParams.timeFitLimitsSecondEE);
0180
0181 auto const threads_findamplchi2 = threads_1d;
0182 auto const blocks_findamplchi2 = blocks_1d;
0183 auto workDivTimeFindAmplChi21D = cms::alpakatools::make_workdiv<Acc1D>(blocks_findamplchi2, threads_findamplchi2);
0184 alpaka::exec<Acc1D>(queue,
0185 workDivTimeFindAmplChi21D,
0186 Kernel_time_compute_findamplchi2_and_finish{},
0187 digisDevEB.const_view(),
0188 digisDevEE.const_view(),
0189 scratch.sample_valuesDevBuf.value().data(),
0190 scratch.sample_value_errorsDevBuf.value().data(),
0191 scratch.useless_sample_valuesDevBuf.value().data(),
0192 scratch.tMaxAlphaBetasDevBuf.value().data(),
0193 scratch.tMaxErrorAlphaBetasDevBuf.value().data(),
0194 scratch.accTimeMaxDevBuf.value().data(),
0195 scratch.accTimeWgtDevBuf.value().data(),
0196 scratch.sumAAsNullHypotDevBuf.value().data(),
0197 scratch.sum0sNullHypotDevBuf.value().data(),
0198 scratch.chi2sNullHypotDevBuf.value().data(),
0199 scratch.tcStateDevBuf.value().data(),
0200 scratch.ampMaxAlphaBetaDevBuf.value().data(),
0201 scratch.ampMaxErrorDevBuf.value().data(),
0202 scratch.timeMaxDevBuf.value().data(),
0203 scratch.timeErrorDevBuf.value().data(),
0204 paramsDev);
0205
0206 auto const threads_timecorr = 32;
0207 auto const blocks_timecorr = cms::alpakatools::divide_up_by(totalChannels, threads_timecorr);
0208 auto workDivCorrFinal1D = cms::alpakatools::make_workdiv<Acc1D>(blocks_timecorr, threads_timecorr);
0209 alpaka::exec<Acc1D>(queue,
0210 workDivCorrFinal1D,
0211 Kernel_time_correction_and_finalize{},
0212 digisDevEB.const_view(),
0213 digisDevEE.const_view(),
0214 uncalibRecHitsDevEB.view(),
0215 uncalibRecHitsDevEE.view(),
0216 conditionsDev.const_view(),
0217 scratch.timeMaxDevBuf.value().data(),
0218 scratch.timeErrorDevBuf.value().data(),
0219 configParams.timeConstantTermEB,
0220 configParams.timeConstantTermEE,
0221 configParams.timeNconstEB,
0222 configParams.timeNconstEE,
0223 configParams.amplitudeThreshEB,
0224 configParams.amplitudeThreshEE,
0225 configParams.outOfTimeThreshG12pEB,
0226 configParams.outOfTimeThreshG12pEE,
0227 configParams.outOfTimeThreshG12mEB,
0228 configParams.outOfTimeThreshG12mEE,
0229 configParams.outOfTimeThreshG61pEB,
0230 configParams.outOfTimeThreshG61pEE,
0231 configParams.outOfTimeThreshG61mEB,
0232 configParams.outOfTimeThreshG61mEE);
0233 }
0234 }
0235
0236 }