Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-06-28 02:36:34

0001 #include <iostream>
0002 #include <limits>
0003 #include <alpaka/alpaka.hpp>
0004 
0005 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0006 
0007 #include "AmplitudeComputationCommonKernels.h"
0008 #include "AmplitudeComputationKernels.h"
0009 #include "EcalUncalibRecHitMultiFitAlgoPortable.h"
0010 #include "TimeComputationKernels.h"
0011 
0012 //#define DEBUG
0013 //#define ECAL_RECO_ALPAKA_DEBUG
0014 
0015 namespace ALPAKA_ACCELERATOR_NAMESPACE::ecal::multifit {
0016 
0017   using namespace cms::alpakatools;
0018 
0019   void launchKernels(Queue& queue,
0020                      InputProduct const& digisDevEB,
0021                      InputProduct const& digisDevEE,
0022                      OutputProduct& uncalibRecHitsDevEB,
0023                      OutputProduct& uncalibRecHitsDevEE,
0024                      EcalMultifitConditionsDevice const& conditionsDev,
0025                      EcalMultifitParametersDevice const& paramsDev,
0026                      ConfigurationParameters const& configParams) {
0027     using digis_type = std::vector<uint16_t>;
0028     using dids_type = std::vector<uint32_t>;
0029     // according to the cpu setup  //----> hardcoded
0030     bool constexpr gainSwitchUseMaxSampleEB = true;
0031     // according to the cpu setup  //----> hardcoded
0032     bool constexpr gainSwitchUseMaxSampleEE = false;
0033     auto constexpr kMaxSamples = EcalDataFrame::MAXSAMPLES;
0034 
0035     auto const ebSize = static_cast<uint32_t>(uncalibRecHitsDevEB.const_view().metadata().size());
0036     auto const totalChannels = ebSize + static_cast<uint32_t>(uncalibRecHitsDevEE.const_view().metadata().size());
0037 
0038     EventDataForScratchDevice scratch(configParams, totalChannels, queue);
0039 
0040     //
0041     // 1d preparation kernel
0042     //
0043     uint32_t constexpr nchannels_per_block = 32;
0044     auto constexpr threads_1d = kMaxSamples * nchannels_per_block;
0045     auto const blocks_1d = cms::alpakatools::divide_up_by(totalChannels * kMaxSamples, threads_1d);
0046     auto workDivPrep1D = cms::alpakatools::make_workdiv<Acc1D>(blocks_1d, threads_1d);
0047     // Since the ::ecal::multifit::X objects are non-dynamic Eigen::Matrix types the returned pointers from the buffers
0048     // and the ::ecal::multifit::X* both point to the data.
0049     alpaka::exec<Acc1D>(queue,
0050                         workDivPrep1D,
0051                         Kernel_prep_1d_and_initialize{},
0052                         digisDevEB.const_view(),
0053                         digisDevEE.const_view(),
0054                         uncalibRecHitsDevEB.view(),
0055                         uncalibRecHitsDevEE.view(),
0056                         conditionsDev.const_view(),
0057                         reinterpret_cast<::ecal::multifit::SampleVector*>(scratch.samplesDevBuf.data()),
0058                         reinterpret_cast<::ecal::multifit::SampleGainVector*>(scratch.gainsNoiseDevBuf.data()),
0059                         scratch.hasSwitchToGain6DevBuf.data(),
0060                         scratch.hasSwitchToGain1DevBuf.data(),
0061                         scratch.isSaturatedDevBuf.data(),
0062                         scratch.acStateDevBuf.data(),
0063                         reinterpret_cast<::ecal::multifit::BXVectorType*>(scratch.activeBXsDevBuf.data()),
0064                         gainSwitchUseMaxSampleEB,
0065                         gainSwitchUseMaxSampleEE);
0066 
0067     //
0068     // 2d preparation kernel
0069     //
0070     Vec2D const blocks_2d{1u, totalChannels};  // {y, x} coordiantes
0071     Vec2D const threads_2d{kMaxSamples, kMaxSamples};
0072     auto workDivPrep2D = cms::alpakatools::make_workdiv<Acc2D>(blocks_2d, threads_2d);
0073     alpaka::exec<Acc2D>(queue,
0074                         workDivPrep2D,
0075                         Kernel_prep_2d{},
0076                         digisDevEB.const_view(),
0077                         digisDevEE.const_view(),
0078                         conditionsDev.const_view(),
0079                         reinterpret_cast<::ecal::multifit::SampleGainVector*>(scratch.gainsNoiseDevBuf.data()),
0080                         reinterpret_cast<::ecal::multifit::SampleMatrix*>(scratch.noisecovDevBuf.data()),
0081                         reinterpret_cast<::ecal::multifit::PulseMatrixType*>(scratch.pulse_matrixDevBuf.data()),
0082                         scratch.hasSwitchToGain6DevBuf.data(),
0083                         scratch.hasSwitchToGain1DevBuf.data(),
0084                         scratch.isSaturatedDevBuf.data());
0085 
0086     // run minimization kernels
0087     minimization_procedure(queue,
0088                            digisDevEB,
0089                            digisDevEE,
0090                            uncalibRecHitsDevEB,
0091                            uncalibRecHitsDevEE,
0092                            scratch,
0093                            conditionsDev,
0094                            configParams,
0095                            totalChannels);
0096 
0097     if (configParams.shouldRunTimingComputation) {
0098       //
0099       // TODO: this guy can run concurrently with other kernels,
0100       // there is no dependence on the order of execution
0101       //
0102       auto const blocks_time_init = blocks_1d;
0103       auto const threads_time_init = threads_1d;
0104       auto workDivTimeCompInit1D = cms::alpakatools::make_workdiv<Acc1D>(blocks_time_init, threads_time_init);
0105       alpaka::exec<Acc1D>(queue,
0106                           workDivTimeCompInit1D,
0107                           Kernel_time_computation_init{},
0108                           digisDevEB.const_view(),
0109                           digisDevEE.const_view(),
0110                           conditionsDev.const_view(),
0111                           scratch.sample_valuesDevBuf.value().data(),
0112                           scratch.sample_value_errorsDevBuf.value().data(),
0113                           scratch.ampMaxErrorDevBuf.value().data(),
0114                           scratch.useless_sample_valuesDevBuf.value().data(),
0115                           scratch.pedestal_numsDevBuf.value().data());
0116 
0117       //
0118       // TODO: small kernel only for EB. It needs to be checked if
0119       /// fusing such small kernels is beneficial in here
0120       //
0121       if (ebSize > 0) {
0122         // we are running only over EB digis
0123         // therefore we need to create threads/blocks only for that
0124         auto const threadsFixMGPA = threads_1d;
0125         auto const blocksFixMGPA = cms::alpakatools::divide_up_by(kMaxSamples * ebSize, threadsFixMGPA);
0126         auto workDivTimeFixMGPAslew1D = cms::alpakatools::make_workdiv<Acc1D>(blocksFixMGPA, threadsFixMGPA);
0127         alpaka::exec<Acc1D>(queue,
0128                             workDivTimeFixMGPAslew1D,
0129                             Kernel_time_compute_fixMGPAslew{},
0130                             digisDevEB.const_view(),
0131                             digisDevEE.const_view(),
0132                             conditionsDev.const_view(),
0133                             scratch.sample_valuesDevBuf.value().data(),
0134                             scratch.sample_value_errorsDevBuf.value().data(),
0135                             scratch.useless_sample_valuesDevBuf.value().data());
0136       }
0137 
0138       auto const threads_nullhypot = threads_1d;
0139       auto const blocks_nullhypot = blocks_1d;
0140       auto workDivTimeNullhypot1D = cms::alpakatools::make_workdiv<Acc1D>(blocks_nullhypot, threads_nullhypot);
0141       alpaka::exec<Acc1D>(queue,
0142                           workDivTimeNullhypot1D,
0143                           Kernel_time_compute_nullhypot{},
0144                           scratch.sample_valuesDevBuf.value().data(),
0145                           scratch.sample_value_errorsDevBuf.value().data(),
0146                           scratch.useless_sample_valuesDevBuf.value().data(),
0147                           scratch.chi2sNullHypotDevBuf.value().data(),
0148                           scratch.sum0sNullHypotDevBuf.value().data(),
0149                           scratch.sumAAsNullHypotDevBuf.value().data(),
0150                           totalChannels);
0151 
0152       constexpr uint32_t nchannels_per_block_makeratio = kMaxSamples;
0153       constexpr auto nthreads_per_channel =
0154           nchannels_per_block_makeratio * (nchannels_per_block_makeratio - 1) / 2;  // n(n-1)/2
0155       constexpr auto threads_makeratio = nthreads_per_channel * nchannels_per_block_makeratio;
0156       auto const blocks_makeratio =
0157           cms::alpakatools::divide_up_by(nthreads_per_channel * totalChannels, threads_makeratio);
0158       auto workDivTimeMakeRatio1D = cms::alpakatools::make_workdiv<Acc1D>(blocks_makeratio, threads_makeratio);
0159       alpaka::exec<Acc1D>(queue,
0160                           workDivTimeMakeRatio1D,
0161                           Kernel_time_compute_makeratio{},
0162                           digisDevEB.const_view(),
0163                           digisDevEE.const_view(),
0164                           scratch.sample_valuesDevBuf.value().data(),
0165                           scratch.sample_value_errorsDevBuf.value().data(),
0166                           scratch.useless_sample_valuesDevBuf.value().data(),
0167                           scratch.pedestal_numsDevBuf.value().data(),
0168                           scratch.sumAAsNullHypotDevBuf.value().data(),
0169                           scratch.sum0sNullHypotDevBuf.value().data(),
0170                           scratch.tMaxAlphaBetasDevBuf.value().data(),
0171                           scratch.tMaxErrorAlphaBetasDevBuf.value().data(),
0172                           scratch.accTimeMaxDevBuf.value().data(),
0173                           scratch.accTimeWgtDevBuf.value().data(),
0174                           scratch.tcStateDevBuf.value().data(),
0175                           paramsDev.const_view(),
0176                           configParams.timeFitLimitsFirstEB,
0177                           configParams.timeFitLimitsFirstEE,
0178                           configParams.timeFitLimitsSecondEB,
0179                           configParams.timeFitLimitsSecondEE);
0180 
0181       auto const threads_findamplchi2 = threads_1d;
0182       auto const blocks_findamplchi2 = blocks_1d;
0183       auto workDivTimeFindAmplChi21D = cms::alpakatools::make_workdiv<Acc1D>(blocks_findamplchi2, threads_findamplchi2);
0184       alpaka::exec<Acc1D>(queue,
0185                           workDivTimeFindAmplChi21D,
0186                           Kernel_time_compute_findamplchi2_and_finish{},
0187                           digisDevEB.const_view(),
0188                           digisDevEE.const_view(),
0189                           scratch.sample_valuesDevBuf.value().data(),
0190                           scratch.sample_value_errorsDevBuf.value().data(),
0191                           scratch.useless_sample_valuesDevBuf.value().data(),
0192                           scratch.tMaxAlphaBetasDevBuf.value().data(),
0193                           scratch.tMaxErrorAlphaBetasDevBuf.value().data(),
0194                           scratch.accTimeMaxDevBuf.value().data(),
0195                           scratch.accTimeWgtDevBuf.value().data(),
0196                           scratch.sumAAsNullHypotDevBuf.value().data(),
0197                           scratch.sum0sNullHypotDevBuf.value().data(),
0198                           scratch.chi2sNullHypotDevBuf.value().data(),
0199                           scratch.tcStateDevBuf.value().data(),
0200                           scratch.ampMaxAlphaBetaDevBuf.value().data(),
0201                           scratch.ampMaxErrorDevBuf.value().data(),
0202                           scratch.timeMaxDevBuf.value().data(),
0203                           scratch.timeErrorDevBuf.value().data(),
0204                           paramsDev.const_view());
0205 
0206       auto const threads_timecorr = 32;
0207       auto const blocks_timecorr = cms::alpakatools::divide_up_by(totalChannels, threads_timecorr);
0208       auto workDivCorrFinal1D = cms::alpakatools::make_workdiv<Acc1D>(blocks_timecorr, threads_timecorr);
0209       alpaka::exec<Acc1D>(queue,
0210                           workDivCorrFinal1D,
0211                           Kernel_time_correction_and_finalize{},
0212                           digisDevEB.const_view(),
0213                           digisDevEE.const_view(),
0214                           uncalibRecHitsDevEB.view(),
0215                           uncalibRecHitsDevEE.view(),
0216                           conditionsDev.const_view(),
0217                           scratch.timeMaxDevBuf.value().data(),
0218                           scratch.timeErrorDevBuf.value().data(),
0219                           configParams.timeConstantTermEB,
0220                           configParams.timeConstantTermEE,
0221                           configParams.timeNconstEB,
0222                           configParams.timeNconstEE,
0223                           configParams.amplitudeThreshEB,
0224                           configParams.amplitudeThreshEE,
0225                           configParams.outOfTimeThreshG12pEB,
0226                           configParams.outOfTimeThreshG12pEE,
0227                           configParams.outOfTimeThreshG12mEB,
0228                           configParams.outOfTimeThreshG12mEE,
0229                           configParams.outOfTimeThreshG61pEB,
0230                           configParams.outOfTimeThreshG61pEE,
0231                           configParams.outOfTimeThreshG61mEB,
0232                           configParams.outOfTimeThreshG61mEE);
0233     }
0234   }
0235 
0236 }  // namespace ALPAKA_ACCELERATOR_NAMESPACE::ecal::multifit