Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:25:41

0001 #include <iostream>
0002 #include <limits>
0003 #include <alpaka/alpaka.hpp>
0004 
0005 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0006 
0007 #include "AmplitudeComputationCommonKernels.h"
0008 #include "AmplitudeComputationKernels.h"
0009 #include "EcalUncalibRecHitMultiFitAlgoPortable.h"
0010 #include "TimeComputationKernels.h"
0011 
0012 //#define DEBUG
0013 //#define ECAL_RECO_ALPAKA_DEBUG
0014 
0015 namespace ALPAKA_ACCELERATOR_NAMESPACE::ecal::multifit {
0016 
0017   using namespace cms::alpakatools;
0018 
0019   void launchKernels(Queue& queue,
0020                      InputProduct const& digisDevEB,
0021                      InputProduct const& digisDevEE,
0022                      OutputProduct& uncalibRecHitsDevEB,
0023                      OutputProduct& uncalibRecHitsDevEE,
0024                      EcalMultifitConditionsDevice const& conditionsDev,
0025                      EcalMultifitParametersDevice const& paramsDev,
0026                      ConfigurationParameters const& configParams) {
0027     using digis_type = std::vector<uint16_t>;
0028     using dids_type = std::vector<uint32_t>;
0029     // according to the cpu setup  //----> hardcoded
0030     bool constexpr gainSwitchUseMaxSampleEB = true;
0031     // according to the cpu setup  //----> hardcoded
0032     bool constexpr gainSwitchUseMaxSampleEE = false;
0033     auto constexpr kMaxSamples = EcalDataFrame::MAXSAMPLES;
0034 
0035     auto const ebSize = static_cast<uint32_t>(uncalibRecHitsDevEB.const_view().metadata().size());
0036     auto const totalChannels = ebSize + static_cast<uint32_t>(uncalibRecHitsDevEE.const_view().metadata().size());
0037 
0038     EventDataForScratchDevice scratch(configParams, totalChannels, queue);
0039 
0040     //
0041     // 1d preparation kernel
0042     //
0043     uint32_t constexpr nchannels_per_block = 32;
0044     auto constexpr threads_1d = kMaxSamples * nchannels_per_block;
0045     auto const blocks_1d = cms::alpakatools::divide_up_by(totalChannels * kMaxSamples, threads_1d);
0046     auto workDivPrep1D = cms::alpakatools::make_workdiv<Acc1D>(blocks_1d, threads_1d);
0047     // Since the ::ecal::multifit::X objects are non-dynamic Eigen::Matrix types the returned pointers from the buffers
0048     // and the ::ecal::multifit::X* both point to the data.
0049     alpaka::exec<Acc1D>(queue,
0050                         workDivPrep1D,
0051                         Kernel_prep_1d_and_initialize{},
0052                         digisDevEB.const_view(),
0053                         digisDevEE.const_view(),
0054                         uncalibRecHitsDevEB.view(),
0055                         uncalibRecHitsDevEE.view(),
0056                         conditionsDev.const_view(),
0057                         reinterpret_cast<::ecal::multifit::SampleVector*>(scratch.samplesDevBuf.data()),
0058                         reinterpret_cast<::ecal::multifit::SampleGainVector*>(scratch.gainsNoiseDevBuf.data()),
0059                         scratch.hasSwitchToGain6DevBuf.data(),
0060                         scratch.hasSwitchToGain1DevBuf.data(),
0061                         scratch.isSaturatedDevBuf.data(),
0062                         scratch.acStateDevBuf.data(),
0063                         reinterpret_cast<::ecal::multifit::BXVectorType*>(scratch.activeBXsDevBuf.data()),
0064                         gainSwitchUseMaxSampleEB,
0065                         gainSwitchUseMaxSampleEE);
0066 
0067     //
0068     // 2d preparation kernel
0069     //
0070     Vec2D const blocks_2d{1u, totalChannels};  // {y, x} coordiantes
0071     Vec2D const threads_2d{kMaxSamples, kMaxSamples};
0072     auto workDivPrep2D = cms::alpakatools::make_workdiv<Acc2D>(blocks_2d, threads_2d);
0073     alpaka::exec<Acc2D>(queue,
0074                         workDivPrep2D,
0075                         Kernel_prep_2d{},
0076                         digisDevEB.const_view(),
0077                         digisDevEE.const_view(),
0078                         conditionsDev.const_view(),
0079                         reinterpret_cast<::ecal::multifit::SampleGainVector*>(scratch.gainsNoiseDevBuf.data()),
0080                         reinterpret_cast<::ecal::multifit::SampleMatrix*>(scratch.noisecovDevBuf.data()),
0081                         reinterpret_cast<::ecal::multifit::PulseMatrixType*>(scratch.pulse_matrixDevBuf.data()),
0082                         scratch.hasSwitchToGain6DevBuf.data(),
0083                         scratch.hasSwitchToGain1DevBuf.data(),
0084                         scratch.isSaturatedDevBuf.data());
0085 
0086     // run minimization kernels
0087     minimization_procedure(queue,
0088                            digisDevEB,
0089                            digisDevEE,
0090                            uncalibRecHitsDevEB,
0091                            uncalibRecHitsDevEE,
0092                            scratch,
0093                            conditionsDev,
0094                            configParams,
0095                            totalChannels);
0096 
0097     if (configParams.shouldRunTimingComputation) {
0098       //
0099       // TODO: this guy can run concurrently with other kernels,
0100       // there is no dependence on the order of execution
0101       //
0102       auto const blocks_time_init = blocks_1d;
0103       auto const threads_time_init = threads_1d;
0104       auto workDivTimeCompInit1D = cms::alpakatools::make_workdiv<Acc1D>(blocks_time_init, threads_time_init);
0105       alpaka::exec<Acc1D>(queue,
0106                           workDivTimeCompInit1D,
0107                           Kernel_time_computation_init{},
0108                           digisDevEB.const_view(),
0109                           digisDevEE.const_view(),
0110                           conditionsDev.const_view(),
0111                           scratch.sample_valuesDevBuf.value().data(),
0112                           scratch.sample_value_errorsDevBuf.value().data(),
0113                           scratch.ampMaxErrorDevBuf.value().data(),
0114                           scratch.useless_sample_valuesDevBuf.value().data(),
0115                           scratch.pedestal_numsDevBuf.value().data());
0116 
0117       //
0118       // TODO: small kernel only for EB. It needs to be checked if
0119       /// fusing such small kernels is beneficial in here
0120       //
0121       // we are running only over EB digis
0122       // therefore we need to create threads/blocks only for that
0123       auto const threadsFixMGPA = threads_1d;
0124       auto const blocksFixMGPA = cms::alpakatools::divide_up_by(kMaxSamples * ebSize, threadsFixMGPA);
0125       auto workDivTimeFixMGPAslew1D = cms::alpakatools::make_workdiv<Acc1D>(blocksFixMGPA, threadsFixMGPA);
0126       alpaka::exec<Acc1D>(queue,
0127                           workDivTimeFixMGPAslew1D,
0128                           Kernel_time_compute_fixMGPAslew{},
0129                           digisDevEB.const_view(),
0130                           digisDevEE.const_view(),
0131                           conditionsDev.const_view(),
0132                           scratch.sample_valuesDevBuf.value().data(),
0133                           scratch.sample_value_errorsDevBuf.value().data(),
0134                           scratch.useless_sample_valuesDevBuf.value().data());
0135 
0136       auto const threads_nullhypot = threads_1d;
0137       auto const blocks_nullhypot = blocks_1d;
0138       auto workDivTimeNullhypot1D = cms::alpakatools::make_workdiv<Acc1D>(blocks_nullhypot, threads_nullhypot);
0139       alpaka::exec<Acc1D>(queue,
0140                           workDivTimeNullhypot1D,
0141                           Kernel_time_compute_nullhypot{},
0142                           scratch.sample_valuesDevBuf.value().data(),
0143                           scratch.sample_value_errorsDevBuf.value().data(),
0144                           scratch.useless_sample_valuesDevBuf.value().data(),
0145                           scratch.chi2sNullHypotDevBuf.value().data(),
0146                           scratch.sum0sNullHypotDevBuf.value().data(),
0147                           scratch.sumAAsNullHypotDevBuf.value().data(),
0148                           totalChannels);
0149 
0150       constexpr uint32_t nchannels_per_block_makeratio = kMaxSamples;
0151       constexpr auto nthreads_per_channel =
0152           nchannels_per_block_makeratio * (nchannels_per_block_makeratio - 1) / 2;  // n(n-1)/2
0153       constexpr auto threads_makeratio = nthreads_per_channel * nchannels_per_block_makeratio;
0154       auto const blocks_makeratio =
0155           cms::alpakatools::divide_up_by(nthreads_per_channel * totalChannels, threads_makeratio);
0156       auto workDivTimeMakeRatio1D = cms::alpakatools::make_workdiv<Acc1D>(blocks_makeratio, threads_makeratio);
0157       alpaka::exec<Acc1D>(queue,
0158                           workDivTimeMakeRatio1D,
0159                           Kernel_time_compute_makeratio{},
0160                           digisDevEB.const_view(),
0161                           digisDevEE.const_view(),
0162                           scratch.sample_valuesDevBuf.value().data(),
0163                           scratch.sample_value_errorsDevBuf.value().data(),
0164                           scratch.useless_sample_valuesDevBuf.value().data(),
0165                           scratch.pedestal_numsDevBuf.value().data(),
0166                           scratch.sumAAsNullHypotDevBuf.value().data(),
0167                           scratch.sum0sNullHypotDevBuf.value().data(),
0168                           scratch.tMaxAlphaBetasDevBuf.value().data(),
0169                           scratch.tMaxErrorAlphaBetasDevBuf.value().data(),
0170                           scratch.accTimeMaxDevBuf.value().data(),
0171                           scratch.accTimeWgtDevBuf.value().data(),
0172                           scratch.tcStateDevBuf.value().data(),
0173                           paramsDev.const_view(),
0174                           configParams.timeFitLimitsFirstEB,
0175                           configParams.timeFitLimitsFirstEE,
0176                           configParams.timeFitLimitsSecondEB,
0177                           configParams.timeFitLimitsSecondEE);
0178 
0179       auto const threads_findamplchi2 = threads_1d;
0180       auto const blocks_findamplchi2 = blocks_1d;
0181       auto workDivTimeFindAmplChi21D = cms::alpakatools::make_workdiv<Acc1D>(blocks_findamplchi2, threads_findamplchi2);
0182       alpaka::exec<Acc1D>(queue,
0183                           workDivTimeFindAmplChi21D,
0184                           Kernel_time_compute_findamplchi2_and_finish{},
0185                           digisDevEB.const_view(),
0186                           digisDevEE.const_view(),
0187                           scratch.sample_valuesDevBuf.value().data(),
0188                           scratch.sample_value_errorsDevBuf.value().data(),
0189                           scratch.useless_sample_valuesDevBuf.value().data(),
0190                           scratch.tMaxAlphaBetasDevBuf.value().data(),
0191                           scratch.tMaxErrorAlphaBetasDevBuf.value().data(),
0192                           scratch.accTimeMaxDevBuf.value().data(),
0193                           scratch.accTimeWgtDevBuf.value().data(),
0194                           scratch.sumAAsNullHypotDevBuf.value().data(),
0195                           scratch.sum0sNullHypotDevBuf.value().data(),
0196                           scratch.chi2sNullHypotDevBuf.value().data(),
0197                           scratch.tcStateDevBuf.value().data(),
0198                           scratch.ampMaxAlphaBetaDevBuf.value().data(),
0199                           scratch.ampMaxErrorDevBuf.value().data(),
0200                           scratch.timeMaxDevBuf.value().data(),
0201                           scratch.timeErrorDevBuf.value().data(),
0202                           paramsDev.const_view());
0203 
0204       auto const threads_timecorr = 32;
0205       auto const blocks_timecorr = cms::alpakatools::divide_up_by(totalChannels, threads_timecorr);
0206       auto workDivCorrFinal1D = cms::alpakatools::make_workdiv<Acc1D>(blocks_timecorr, threads_timecorr);
0207       alpaka::exec<Acc1D>(queue,
0208                           workDivCorrFinal1D,
0209                           Kernel_time_correction_and_finalize{},
0210                           digisDevEB.const_view(),
0211                           digisDevEE.const_view(),
0212                           uncalibRecHitsDevEB.view(),
0213                           uncalibRecHitsDevEE.view(),
0214                           conditionsDev.const_view(),
0215                           scratch.timeMaxDevBuf.value().data(),
0216                           scratch.timeErrorDevBuf.value().data(),
0217                           configParams.timeConstantTermEB,
0218                           configParams.timeConstantTermEE,
0219                           configParams.timeNconstEB,
0220                           configParams.timeNconstEE,
0221                           configParams.amplitudeThreshEB,
0222                           configParams.amplitudeThreshEE,
0223                           configParams.outOfTimeThreshG12pEB,
0224                           configParams.outOfTimeThreshG12pEE,
0225                           configParams.outOfTimeThreshG12mEB,
0226                           configParams.outOfTimeThreshG12mEE,
0227                           configParams.outOfTimeThreshG61pEB,
0228                           configParams.outOfTimeThreshG61pEE,
0229                           configParams.outOfTimeThreshG61mEB,
0230                           configParams.outOfTimeThreshG61mEE);
0231     }
0232   }
0233 
0234 }  // namespace ALPAKA_ACCELERATOR_NAMESPACE::ecal::multifit