Warning, /RecoLocalCalo/EcalRecProducers/plugins/EcalUncalibRecHitMultiFitAlgoGPU.cu is written in an unsupported language. File is not indexed.
0001 #include <iostream>
0002 #include <limits>
0003
0004 #include <cuda.h>
0005
0006 #include "CondFormats/EcalObjects/interface/EcalMGPAGainRatio.h"
0007 #include "CondFormats/EcalObjects/interface/EcalPedestals.h"
0008 #include "CondFormats/EcalObjects/interface/EcalPulseCovariances.h"
0009 #include "CondFormats/EcalObjects/interface/EcalPulseShapes.h"
0010 #include "CondFormats/EcalObjects/interface/EcalSampleMask.h"
0011 #include "CondFormats/EcalObjects/interface/EcalSamplesCorrelation.h"
0012 #include "CondFormats/EcalObjects/interface/EcalXtalGroupId.h"
0013 #include "DataFormats/EcalDigi/interface/EcalDataFrame.h"
0014 #include "DataFormats/EcalDigi/interface/EcalDigiCollections.h"
0015
0016 #include "AmplitudeComputationCommonKernels.h"
0017 #include "AmplitudeComputationKernels.h"
0018 #include "EcalUncalibRecHitMultiFitAlgoGPU.h"
0019 #include "TimeComputationKernels.h"
0020
0021 //#define DEBUG
0022
0023 //#define ECAL_RECO_CUDA_DEBUG
0024
0025 namespace ecal {
0026 namespace multifit {
0027
0028 void entryPoint(EventInputDataGPU const& eventInputGPU,
0029 EventOutputDataGPU& eventOutputGPU,
0030 EventDataForScratchGPU& scratch,
0031 ConditionsProducts const& conditions,
0032 ConfigurationParameters const& configParameters,
0033 cudaStream_t cudaStream) {
0034 using digis_type = std::vector<uint16_t>;
0035 using dids_type = std::vector<uint32_t>;
0036 // accodring to the cpu setup //----> hardcoded
0037 bool const gainSwitchUseMaxSampleEB = true;
0038 // accodring to the cpu setup //----> hardcoded
0039 bool const gainSwitchUseMaxSampleEE = false;
0040
0041 uint32_t const offsetForHashes = conditions.offsetForHashes;
0042 uint32_t const offsetForInputs = eventInputGPU.ebDigis.size;
0043 unsigned int totalChannels = eventInputGPU.ebDigis.size + eventInputGPU.eeDigis.size;
0044
0045 //
0046 // 1d preparation kernel
0047 //
0048 unsigned int nchannels_per_block = 32;
0049 unsigned int threads_1d = 10 * nchannels_per_block;
0050 unsigned int blocks_1d = threads_1d > 10 * totalChannels ? 1 : (totalChannels * 10 + threads_1d - 1) / threads_1d;
0051 int shared_bytes = nchannels_per_block * EcalDataFrame::MAXSAMPLES *
0052 (sizeof(bool) + sizeof(bool) + sizeof(bool) + sizeof(bool) + sizeof(char) + sizeof(bool));
0053 kernel_prep_1d_and_initialize<<<blocks_1d, threads_1d, shared_bytes, cudaStream>>>(
0054 conditions.pulseShapes.values,
0055 eventInputGPU.ebDigis.data.get(),
0056 eventInputGPU.ebDigis.ids.get(),
0057 eventInputGPU.eeDigis.data.get(),
0058 eventInputGPU.eeDigis.ids.get(),
0059 (SampleVector*)scratch.samples.get(),
0060 (SampleVector*)eventOutputGPU.recHitsEB.amplitudesAll.get(),
0061 (SampleVector*)eventOutputGPU.recHitsEE.amplitudesAll.get(),
0062 (SampleGainVector*)scratch.gainsNoise.get(),
0063 conditions.pedestals.mean_x1,
0064 conditions.pedestals.mean_x12,
0065 conditions.pedestals.rms_x12,
0066 conditions.pedestals.mean_x6,
0067 conditions.gainRatios.gain6Over1,
0068 conditions.gainRatios.gain12Over6,
0069 scratch.hasSwitchToGain6.get(),
0070 scratch.hasSwitchToGain1.get(),
0071 scratch.isSaturated.get(),
0072 eventOutputGPU.recHitsEB.amplitude.get(),
0073 eventOutputGPU.recHitsEE.amplitude.get(),
0074 eventOutputGPU.recHitsEB.chi2.get(),
0075 eventOutputGPU.recHitsEE.chi2.get(),
0076 eventOutputGPU.recHitsEB.pedestal.get(),
0077 eventOutputGPU.recHitsEE.pedestal.get(),
0078 eventOutputGPU.recHitsEB.did.get(),
0079 eventOutputGPU.recHitsEE.did.get(),
0080 eventOutputGPU.recHitsEB.flags.get(),
0081 eventOutputGPU.recHitsEE.flags.get(),
0082 scratch.acState.get(),
0083 (BXVectorType*)scratch.activeBXs.get(),
0084 offsetForHashes,
0085 offsetForInputs,
0086 gainSwitchUseMaxSampleEB,
0087 gainSwitchUseMaxSampleEE,
0088 totalChannels);
0089 cudaCheck(cudaGetLastError());
0090
0091 //
0092 // 2d preparation kernel
0093 //
0094 int blocks_2d = totalChannels;
0095 dim3 threads_2d{10, 10};
0096 kernel_prep_2d<<<blocks_2d, threads_2d, 0, cudaStream>>>((SampleGainVector*)scratch.gainsNoise.get(),
0097 eventInputGPU.ebDigis.ids.get(),
0098 eventInputGPU.eeDigis.ids.get(),
0099 conditions.pedestals.rms_x12,
0100 conditions.pedestals.rms_x6,
0101 conditions.pedestals.rms_x1,
0102 conditions.gainRatios.gain12Over6,
0103 conditions.gainRatios.gain6Over1,
0104 conditions.samplesCorrelation.EBG12SamplesCorrelation,
0105 conditions.samplesCorrelation.EBG6SamplesCorrelation,
0106 conditions.samplesCorrelation.EBG1SamplesCorrelation,
0107 conditions.samplesCorrelation.EEG12SamplesCorrelation,
0108 conditions.samplesCorrelation.EEG6SamplesCorrelation,
0109 conditions.samplesCorrelation.EEG1SamplesCorrelation,
0110 (SampleMatrix*)scratch.noisecov.get(),
0111 (PulseMatrixType*)scratch.pulse_matrix.get(),
0112 conditions.pulseShapes.values,
0113 scratch.hasSwitchToGain6.get(),
0114 scratch.hasSwitchToGain1.get(),
0115 scratch.isSaturated.get(),
0116 offsetForHashes,
0117 offsetForInputs);
0118 cudaCheck(cudaGetLastError());
0119
0120 // run minimization kernels
0121 v1::minimization_procedure(eventInputGPU, eventOutputGPU, scratch, conditions, configParameters, cudaStream);
0122
0123 if (configParameters.shouldRunTimingComputation) {
0124 //
0125 // TODO: this guy can run concurrently with other kernels,
0126 // there is no dependence on the order of execution
0127 //
0128 unsigned int threads_time_init = threads_1d;
0129 unsigned int blocks_time_init = blocks_1d;
0130 int sharedBytesInit = 2 * threads_time_init * sizeof(SampleVector::Scalar);
0131 kernel_time_computation_init<<<blocks_time_init, threads_time_init, sharedBytesInit, cudaStream>>>(
0132 eventInputGPU.ebDigis.data.get(),
0133 eventInputGPU.ebDigis.ids.get(),
0134 eventInputGPU.eeDigis.data.get(),
0135 eventInputGPU.eeDigis.ids.get(),
0136 conditions.pedestals.rms_x12,
0137 conditions.pedestals.rms_x6,
0138 conditions.pedestals.rms_x1,
0139 conditions.pedestals.mean_x12,
0140 conditions.pedestals.mean_x6,
0141 conditions.pedestals.mean_x1,
0142 conditions.gainRatios.gain12Over6,
0143 conditions.gainRatios.gain6Over1,
0144 scratch.sample_values.get(),
0145 scratch.sample_value_errors.get(),
0146 scratch.ampMaxError.get(),
0147 scratch.useless_sample_values.get(),
0148 scratch.pedestal_nums.get(),
0149 offsetForHashes,
0150 offsetForInputs,
0151 conditions.sampleMask.getEcalSampleMaskRecordEB(),
0152 conditions.sampleMask.getEcalSampleMaskRecordEE(),
0153 totalChannels);
0154 cudaCheck(cudaGetLastError());
0155
0156 //
0157 // TODO: small kernel only for EB. It needs to be checked if
0158 /// fusing such small kernels is beneficial in here
0159 //
0160 // we are running only over EB digis
0161 // therefore we need to create threads/blocks only for that
0162 unsigned int const threadsFixMGPA = threads_1d;
0163 unsigned int const blocksFixMGPA =
0164 threadsFixMGPA > 10 * eventInputGPU.ebDigis.size
0165 ? 1
0166 : (10 * eventInputGPU.ebDigis.size + threadsFixMGPA - 1) / threadsFixMGPA;
0167 kernel_time_compute_fixMGPAslew<<<blocksFixMGPA, threadsFixMGPA, 0, cudaStream>>>(
0168 eventInputGPU.ebDigis.data.get(),
0169 eventInputGPU.eeDigis.data.get(),
0170 scratch.sample_values.get(),
0171 scratch.sample_value_errors.get(),
0172 scratch.useless_sample_values.get(),
0173 conditions.sampleMask.getEcalSampleMaskRecordEB(),
0174 totalChannels,
0175 offsetForInputs);
0176 cudaCheck(cudaGetLastError());
0177
0178 int sharedBytes = EcalDataFrame::MAXSAMPLES * nchannels_per_block * 4 * sizeof(SampleVector::Scalar);
0179 auto const threads_nullhypot = threads_1d;
0180 auto const blocks_nullhypot = blocks_1d;
0181 kernel_time_compute_nullhypot<<<blocks_nullhypot, threads_nullhypot, sharedBytes, cudaStream>>>(
0182 scratch.sample_values.get(),
0183 scratch.sample_value_errors.get(),
0184 scratch.useless_sample_values.get(),
0185 scratch.chi2sNullHypot.get(),
0186 scratch.sum0sNullHypot.get(),
0187 scratch.sumAAsNullHypot.get(),
0188 totalChannels);
0189 cudaCheck(cudaGetLastError());
0190
0191 unsigned int nchannels_per_block_makeratio = 10;
0192 unsigned int threads_makeratio = 45 * nchannels_per_block_makeratio;
0193 unsigned int blocks_makeratio = threads_makeratio > 45 * totalChannels
0194 ? 1
0195 : (totalChannels * 45 + threads_makeratio - 1) / threads_makeratio;
0196 int sharedBytesMakeRatio = 5 * threads_makeratio * sizeof(SampleVector::Scalar);
0197 kernel_time_compute_makeratio<<<blocks_makeratio, threads_makeratio, sharedBytesMakeRatio, cudaStream>>>(
0198 scratch.sample_values.get(),
0199 scratch.sample_value_errors.get(),
0200 eventInputGPU.ebDigis.ids.get(),
0201 eventInputGPU.eeDigis.ids.get(),
0202 scratch.useless_sample_values.get(),
0203 scratch.pedestal_nums.get(),
0204 configParameters.amplitudeFitParametersEB,
0205 configParameters.amplitudeFitParametersEE,
0206 configParameters.timeFitParametersEB,
0207 configParameters.timeFitParametersEE,
0208 scratch.sumAAsNullHypot.get(),
0209 scratch.sum0sNullHypot.get(),
0210 scratch.tMaxAlphaBetas.get(),
0211 scratch.tMaxErrorAlphaBetas.get(),
0212 scratch.accTimeMax.get(),
0213 scratch.accTimeWgt.get(),
0214 scratch.tcState.get(),
0215 configParameters.timeFitParametersSizeEB,
0216 configParameters.timeFitParametersSizeEE,
0217 configParameters.timeFitLimitsFirstEB,
0218 configParameters.timeFitLimitsFirstEE,
0219 configParameters.timeFitLimitsSecondEB,
0220 configParameters.timeFitLimitsSecondEE,
0221 totalChannels,
0222 offsetForInputs);
0223 cudaCheck(cudaGetLastError());
0224
0225 auto const threads_findamplchi2 = threads_1d;
0226 auto const blocks_findamplchi2 = blocks_1d;
0227 int const sharedBytesFindAmplChi2 = 2 * threads_findamplchi2 * sizeof(SampleVector::Scalar);
0228 kernel_time_compute_findamplchi2_and_finish<<<blocks_findamplchi2,
0229 threads_findamplchi2,
0230 sharedBytesFindAmplChi2,
0231 cudaStream>>>(scratch.sample_values.get(),
0232 scratch.sample_value_errors.get(),
0233 eventInputGPU.ebDigis.ids.get(),
0234 eventInputGPU.eeDigis.ids.get(),
0235 scratch.useless_sample_values.get(),
0236 scratch.tMaxAlphaBetas.get(),
0237 scratch.tMaxErrorAlphaBetas.get(),
0238 scratch.accTimeMax.get(),
0239 scratch.accTimeWgt.get(),
0240 configParameters.amplitudeFitParametersEB,
0241 configParameters.amplitudeFitParametersEE,
0242 scratch.sumAAsNullHypot.get(),
0243 scratch.sum0sNullHypot.get(),
0244 scratch.chi2sNullHypot.get(),
0245 scratch.tcState.get(),
0246 scratch.ampMaxAlphaBeta.get(),
0247 scratch.ampMaxError.get(),
0248 scratch.timeMax.get(),
0249 scratch.timeError.get(),
0250 totalChannels,
0251 offsetForInputs);
0252 cudaCheck(cudaGetLastError());
0253
0254 auto const threads_timecorr = 32;
0255 auto const blocks_timecorr =
0256 threads_timecorr > totalChannels ? 1 : (totalChannels + threads_timecorr - 1) / threads_timecorr;
0257 kernel_time_correction_and_finalize<<<blocks_timecorr, threads_timecorr, 0, cudaStream>>>(
0258 eventOutputGPU.recHitsEB.amplitude.get(),
0259 eventOutputGPU.recHitsEE.amplitude.get(),
0260 eventInputGPU.ebDigis.data.get(),
0261 eventInputGPU.ebDigis.ids.get(),
0262 eventInputGPU.eeDigis.data.get(),
0263 eventInputGPU.eeDigis.ids.get(),
0264 conditions.timeBiasCorrections.ebTimeCorrAmplitudeBins,
0265 conditions.timeBiasCorrections.eeTimeCorrAmplitudeBins,
0266 conditions.timeBiasCorrections.ebTimeCorrShiftBins,
0267 conditions.timeBiasCorrections.eeTimeCorrShiftBins,
0268 scratch.timeMax.get(),
0269 scratch.timeError.get(),
0270 conditions.pedestals.rms_x12,
0271 conditions.timeCalibConstants.values,
0272 eventOutputGPU.recHitsEB.jitter.get(),
0273 eventOutputGPU.recHitsEE.jitter.get(),
0274 eventOutputGPU.recHitsEB.jitterError.get(),
0275 eventOutputGPU.recHitsEE.jitterError.get(),
0276 eventOutputGPU.recHitsEB.flags.get(),
0277 eventOutputGPU.recHitsEE.flags.get(),
0278 conditions.timeBiasCorrections.ebTimeCorrAmplitudeBinsSize,
0279 conditions.timeBiasCorrections.eeTimeCorrAmplitudeBinsSize,
0280 configParameters.timeConstantTermEB,
0281 configParameters.timeConstantTermEE,
0282 conditions.timeOffsetConstant.getEBValue(),
0283 conditions.timeOffsetConstant.getEEValue(),
0284 configParameters.timeNconstEB,
0285 configParameters.timeNconstEE,
0286 configParameters.amplitudeThreshEB,
0287 configParameters.amplitudeThreshEE,
0288 configParameters.outOfTimeThreshG12pEB,
0289 configParameters.outOfTimeThreshG12pEE,
0290 configParameters.outOfTimeThreshG12mEB,
0291 configParameters.outOfTimeThreshG12mEE,
0292 configParameters.outOfTimeThreshG61pEB,
0293 configParameters.outOfTimeThreshG61pEE,
0294 configParameters.outOfTimeThreshG61mEB,
0295 configParameters.outOfTimeThreshG61mEE,
0296 offsetForHashes,
0297 offsetForInputs,
0298 totalChannels);
0299 cudaCheck(cudaGetLastError());
0300 }
0301 }
0302
0303 } // namespace multifit
0304 } // namespace ecal