File indexing completed on 2025-01-22 07:34:22
0001 #ifndef RecoLocalCalo_EcalRecProducers_plugins_alpaka_TimeComputationKernels_h
0002 #define RecoLocalCalo_EcalRecProducers_plugins_alpaka_TimeComputationKernels_h
0003
0004 #include <cassert>
0005 #include <cmath>
0006 #include <limits>
0007
0008 #include "CondFormats/EcalObjects/interface/alpaka/EcalMultifitConditionsDevice.h"
0009 #include "DataFormats/EcalDigi/interface/EcalDataFrame.h"
0010 #include "DataFormats/EcalDigi/interface/EcalMGPASample.h"
0011 #include "DataFormats/EcalRecHit/interface/EcalUncalibratedRecHit.h"
0012 #include "DataFormats/Math/interface/approx_exp.h"
0013 #include "DataFormats/Math/interface/approx_log.h"
0014 #include "FWCore/Utilities/interface/CMSUnrollLoop.h"
0015 #include "RecoLocalCalo/EcalRecProducers/interface/EigenMatrixTypes_gpu.h"
0016
0017 #include "DeclsForKernels.h"
0018 #include "KernelHelpers.h"
0019 #include "EcalMultifitParameters.h"
0020
0021
0022
0023 namespace ALPAKA_ACCELERATOR_NAMESPACE::ecal::multifit {
0024
0025 ALPAKA_FN_ACC ALPAKA_FN_INLINE bool use_sample(unsigned int sample_mask, unsigned int sample) {
0026 return sample_mask & (0x1 << (EcalDataFrame::MAXSAMPLES - (sample + 1)));
0027 }
0028
0029 ALPAKA_FN_ACC constexpr float fast_expf(float x) { return unsafe_expf<6>(x); }
0030 ALPAKA_FN_ACC constexpr float fast_logf(float x) { return unsafe_logf<7>(x); }
0031
0032 class Kernel_time_compute_nullhypot {
0033 using ScalarType = ::ecal::multifit::SampleVector::Scalar;
0034
0035 public:
0036 ALPAKA_FN_ACC void operator()(Acc1D const& acc,
0037 ScalarType* const sample_values,
0038 ScalarType* const sample_value_errors,
0039 bool* const useless_sample_values,
0040 ScalarType* chi2s,
0041 ScalarType* sum0s,
0042 ScalarType* sumAAs,
0043 uint32_t const nchannels) const {
0044 constexpr auto nsamples = EcalDataFrame::MAXSAMPLES;
0045
0046
0047 auto const elemsPerBlock = alpaka::getWorkDiv<alpaka::Block, alpaka::Elems>(acc)[0u];
0048
0049
0050 auto* s_sum0 = alpaka::getDynSharedMem<char>(acc);
0051 auto* s_sum1 = reinterpret_cast<ScalarType*>(s_sum0 + elemsPerBlock);
0052 auto* s_sumA = s_sum1 + elemsPerBlock;
0053 auto* s_sumAA = s_sumA + elemsPerBlock;
0054
0055 for (auto txforward : cms::alpakatools::uniform_elements(acc, nchannels * nsamples)) {
0056
0057 auto tx = nchannels * nsamples - 1 - txforward;
0058 auto const ch = tx / nsamples;
0059
0060 auto const sample = tx % nsamples;
0061 auto const ltx = tx % elemsPerBlock;
0062
0063
0064 auto const inv_error =
0065 useless_sample_values[tx] ? 0. : 1. / (sample_value_errors[tx] * sample_value_errors[tx]);
0066 auto const sample_value = sample_values[tx];
0067 s_sum0[ltx] = useless_sample_values[tx] ? 0 : 1;
0068 s_sum1[ltx] = inv_error;
0069 s_sumA[ltx] = sample_value * inv_error;
0070 s_sumAA[ltx] = sample_value * sample_value * inv_error;
0071 alpaka::syncBlockThreads(acc);
0072
0073
0074 if (sample < 5) {
0075 s_sum0[ltx] += s_sum0[ltx + 5];
0076 s_sum1[ltx] += s_sum1[ltx + 5];
0077 s_sumA[ltx] += s_sumA[ltx + 5];
0078 s_sumAA[ltx] += s_sumAA[ltx + 5];
0079 }
0080 alpaka::syncBlockThreads(acc);
0081
0082 if (sample < 2) {
0083
0084 s_sum0[ltx] += s_sum0[ltx + 2] + s_sum0[ltx + 3];
0085 s_sum1[ltx] += s_sum1[ltx + 2] + s_sum1[ltx + 3];
0086 s_sumA[ltx] += s_sumA[ltx + 2] + s_sumA[ltx + 3];
0087 s_sumAA[ltx] += s_sumAA[ltx + 2] + s_sumAA[ltx + 3];
0088 }
0089 alpaka::syncBlockThreads(acc);
0090
0091 if (sample == 0) {
0092
0093 auto const sum0 = s_sum0[ltx] + s_sum0[ltx + 1] - s_sum0[ltx + 3];
0094 auto const sum1 = s_sum1[ltx] + s_sum1[ltx + 1] - s_sum1[ltx + 3];
0095 auto const sumA = s_sumA[ltx] + s_sumA[ltx + 1] - s_sumA[ltx + 3];
0096 auto const sumAA = s_sumAA[ltx] + s_sumAA[ltx + 1] - s_sumAA[ltx + 3];
0097 auto const chi2 = sum0 > 0 ? (sumAA - sumA * sumA / sum1) / sum0 : static_cast<ScalarType>(0);
0098 chi2s[ch] = chi2;
0099 sum0s[ch] = sum0;
0100 sumAAs[ch] = sumAA;
0101
0102 #ifdef DEBUG_TC_NULLHYPOT
0103 if (ch == 0) {
0104 printf("chi2 = %f sum0 = %d sumAA = %f\n", chi2, static_cast<int>(sum0), sumAA);
0105 }
0106 #endif
0107 }
0108 }
0109 }
0110 };
0111
0112
0113
0114
0115
0116
0117
0118 class Kernel_time_compute_makeratio {
0119 using ScalarType = ::ecal::multifit::SampleVector::Scalar;
0120
0121 public:
0122 ALPAKA_FN_ACC void operator()(Acc1D const& acc,
0123 EcalDigiDeviceCollection::ConstView digisDevEB,
0124 EcalDigiDeviceCollection::ConstView digisDevEE,
0125 ScalarType* const sample_values,
0126 ScalarType* const sample_value_errors,
0127 bool* const useless_sample_values,
0128 char* const pedestal_nums,
0129 ScalarType* const sumAAsNullHypot,
0130 ScalarType* const sum0sNullHypot,
0131 ScalarType* tMaxAlphaBetas,
0132 ScalarType* tMaxErrorAlphaBetas,
0133 ScalarType* g_accTimeMax,
0134 ScalarType* g_accTimeWgt,
0135 TimeComputationState* g_state,
0136 EcalMultifitParameters const* paramsDev,
0137 ConfigurationParameters::type const timeFitLimits_firstEB,
0138 ConfigurationParameters::type const timeFitLimits_firstEE,
0139 ConfigurationParameters::type const timeFitLimits_secondEB,
0140 ConfigurationParameters::type const timeFitLimits_secondEE) const {
0141
0142 constexpr uint32_t nchannels_per_block = 10;
0143 constexpr auto nthreads_per_channel = nchannels_per_block * (nchannels_per_block - 1) / 2;
0144 constexpr auto nsamples = EcalDataFrame::MAXSAMPLES;
0145 auto const nchannels = digisDevEB.size() + digisDevEE.size();
0146 auto const offsetForInputs = digisDevEB.size();
0147 auto const totalElements = nthreads_per_channel * nchannels;
0148
0149 auto const elemsPerBlock = alpaka::getWorkDiv<alpaka::Block, alpaka::Elems>(acc)[0u];
0150 ALPAKA_ASSERT_ACC(nthreads_per_channel * nchannels_per_block == elemsPerBlock);
0151
0152 auto* shr_chi2s = alpaka::getDynSharedMem<ScalarType>(acc);
0153 auto* shr_time_wgt = shr_chi2s + elemsPerBlock;
0154 auto* shr_time_max = shr_time_wgt + elemsPerBlock;
0155 auto* shrTimeMax = shr_time_max + elemsPerBlock;
0156 auto* shrTimeWgt = shrTimeMax + elemsPerBlock;
0157 auto* shr_chi2 = shrTimeWgt + elemsPerBlock;
0158 auto* shr_tmax = shr_chi2 + elemsPerBlock;
0159 auto* shr_tmaxerr = shr_tmax + elemsPerBlock;
0160 auto* shr_condForUselessSamples = reinterpret_cast<bool*>(shr_tmaxerr + elemsPerBlock);
0161 auto* shr_internalCondForSkipping1 = shr_condForUselessSamples + elemsPerBlock;
0162 auto* shr_internalCondForSkipping2 = shr_internalCondForSkipping1 + elemsPerBlock;
0163
0164 for (auto block : cms::alpakatools::uniform_groups(acc, totalElements)) {
0165 for (auto idx : cms::alpakatools::uniform_group_elements(acc, block, totalElements)) {
0166 auto const ch = idx.global / nthreads_per_channel;
0167 auto const ltx = idx.global % nthreads_per_channel;
0168
0169 auto const ch_start = ch * nsamples;
0170 auto const inputCh = ch >= offsetForInputs ? ch - offsetForInputs : ch;
0171 auto const* dids = ch >= offsetForInputs ? digisDevEE.id() : digisDevEB.id();
0172
0173 auto const did = DetId{dids[inputCh]};
0174 auto const isBarrel = did.subdetId() == EcalBarrel;
0175 auto* const amplitudeFitParameters =
0176 isBarrel ? paramsDev->amplitudeFitParamsEB.data() : paramsDev->amplitudeFitParamsEE.data();
0177 auto* const timeFitParameters =
0178 isBarrel ? paramsDev->timeFitParamsEB.data() : paramsDev->timeFitParamsEE.data();
0179 auto const timeFitParameters_size =
0180 isBarrel ? paramsDev->timeFitParamsEB.size() : paramsDev->timeFitParamsEE.size();
0181 auto const timeFitLimits_first = isBarrel ? timeFitLimits_firstEB : timeFitLimits_firstEE;
0182 auto const timeFitLimits_second = isBarrel ? timeFitLimits_secondEB : timeFitLimits_secondEE;
0183
0184
0185 int sample_i = 0;
0186 int sample_j = 0;
0187 if (ltx <= 8) {
0188 sample_i = 0;
0189 sample_j = 1 + ltx;
0190 } else if (ltx <= 16) {
0191 sample_i = 1;
0192 sample_j = 2 + ltx - 9;
0193 } else if (ltx <= 23) {
0194 sample_i = 2;
0195 sample_j = 3 + ltx - 17;
0196 } else if (ltx <= 29) {
0197 sample_i = 3;
0198 sample_j = 4 + ltx - 24;
0199 } else if (ltx <= 34) {
0200 sample_i = 4;
0201 sample_j = 5 + ltx - 30;
0202 } else if (ltx <= 38) {
0203 sample_i = 5;
0204 sample_j = 6 + ltx - 35;
0205 } else if (ltx <= 41) {
0206 sample_i = 6;
0207 sample_j = 7 + ltx - 39;
0208 } else if (ltx <= 43) {
0209 sample_i = 7;
0210 sample_j = 8 + ltx - 42;
0211 } else if (ltx <= 44) {
0212 sample_i = 8;
0213 sample_j = 9;
0214 } else {
0215
0216 ALPAKA_ASSERT_ACC(false);
0217 }
0218
0219 auto const tx_i = ch_start + sample_i;
0220 auto const tx_j = ch_start + sample_j;
0221
0222
0223
0224
0225
0226
0227 bool const condForUselessSamples = useless_sample_values[tx_i] || useless_sample_values[tx_j] ||
0228 sample_values[tx_i] <= 1 || sample_values[tx_j] <= 1;
0229
0230
0231
0232
0233 ScalarType chi2 = std::numeric_limits<ScalarType>::max();
0234 ScalarType tmax = 0;
0235 ScalarType tmaxerr = 0;
0236 shrTimeMax[idx.local] = 0;
0237 shrTimeWgt[idx.local] = 0;
0238
0239 bool internalCondForSkipping1 = true;
0240 bool internalCondForSkipping2 = true;
0241 if (!condForUselessSamples) {
0242 auto const rtmp = sample_values[tx_i] / sample_values[tx_j];
0243 auto const invampl_i = 1. / sample_values[tx_i];
0244 auto const relErr2_i = sample_value_errors[tx_i] * sample_value_errors[tx_i] * invampl_i * invampl_i;
0245 auto const invampl_j = 1. / sample_values[tx_j];
0246 auto const relErr2_j = sample_value_errors[tx_j] * sample_value_errors[tx_j] * invampl_j * invampl_j;
0247 auto const err1 = rtmp * rtmp * (relErr2_i + relErr2_j);
0248 auto err2 =
0249 sample_value_errors[tx_j] * (sample_values[tx_i] - sample_values[tx_j]) * (invampl_j * invampl_j);
0250
0251
0252
0253 if (pedestal_nums[ch] == 2)
0254 err2 *= err2 * 0.5;
0255 auto const err3 = (0.289 * 0.289) * (invampl_j * invampl_j);
0256 auto const total_error = std::sqrt(err1 + err2 + err3);
0257
0258 auto const alpha = amplitudeFitParameters[0];
0259 auto const beta = amplitudeFitParameters[1];
0260 auto const alphabeta = alpha * beta;
0261 auto const invalphabeta = 1. / alphabeta;
0262
0263
0264 auto const ratio_index = sample_i;
0265 auto const ratio_step = sample_j - sample_i;
0266 auto const ratio_value = rtmp;
0267 auto const ratio_error = total_error;
0268
0269 auto const rlim_i_j = fast_expf(static_cast<ScalarType>(sample_j - sample_i) / beta) - 0.001;
0270 internalCondForSkipping1 = !(total_error < 1. && rtmp > 0.001 && rtmp < rlim_i_j);
0271 if (!internalCondForSkipping1) {
0272
0273
0274
0275
0276
0277
0278 auto const l_timeFitLimits_first = timeFitLimits_first;
0279 auto const l_timeFitLimits_second = timeFitLimits_second;
0280 if (ratio_step == 1 && ratio_value >= l_timeFitLimits_first && ratio_value <= l_timeFitLimits_second) {
0281 auto const time_max_i = static_cast<ScalarType>(ratio_index);
0282 auto u = timeFitParameters[timeFitParameters_size - 1];
0283 CMS_UNROLL_LOOP
0284 for (int k = timeFitParameters_size - 2; k >= 0; --k)
0285 u = u * ratio_value + timeFitParameters[k];
0286
0287 auto du = (timeFitParameters_size - 1) * (timeFitParameters[timeFitParameters_size - 1]);
0288 for (int k = timeFitParameters_size - 2; k >= 1; --k)
0289 du = du * ratio_value + k * timeFitParameters[k];
0290
0291 auto const error2 = ratio_error * ratio_error * du * du;
0292 auto const time_max = error2 > 0 ? (time_max_i - u) / error2 : static_cast<ScalarType>(0);
0293 auto const time_wgt = error2 > 0 ? 1. / error2 : static_cast<ScalarType>(0);
0294
0295
0296
0297
0298 shrTimeMax[idx.local] = error2 > 0 ? time_max : 0;
0299 shrTimeWgt[idx.local] = error2 > 0 ? time_wgt : 0;
0300 } else {
0301 shrTimeMax[idx.local] = 0;
0302 shrTimeWgt[idx.local] = 0;
0303 }
0304
0305
0306 auto const stepOverBeta = static_cast<ScalarType>(ratio_step) / beta;
0307 auto const offset = static_cast<ScalarType>(ratio_index) + alphabeta;
0308 auto const rmin = std::max(ratio_value - ratio_error, 0.001);
0309 auto const rmax =
0310 std::min(ratio_value + ratio_error, fast_expf(static_cast<ScalarType>(ratio_step) / beta) - 0.001);
0311 auto const time1 = offset - ratio_step / (fast_expf((stepOverBeta - fast_logf(rmin)) / alpha) - 1.);
0312 auto const time2 = offset - ratio_step / (fast_expf((stepOverBeta - fast_logf(rmax)) / alpha) - 1.);
0313
0314
0315 tmax = 0.5 * (time1 + time2);
0316 tmaxerr = 0.5 * std::sqrt((time1 - time2) * (time1 - time2));
0317 #ifdef DEBUG_TC_MAKERATIO
0318 if (ch == 1 || ch == 0)
0319 printf(
0320 "ch = %d ltx = %d tmax = %f tmaxerr = %f time1 = %f time2 = %f offset = %f rmin = %f rmax = "
0321 "%f\n",
0322 ch,
0323 ltx,
0324 tmax,
0325 tmaxerr,
0326 time1,
0327 time2,
0328 offset,
0329 rmin,
0330 rmax);
0331 #endif
0332
0333 ScalarType sumAf = 0;
0334 ScalarType sumff = 0;
0335 const int itmin = std::max(-1, static_cast<int>(std::floor(tmax - alphabeta)));
0336 auto loffset = (static_cast<ScalarType>(itmin) - tmax) * invalphabeta;
0337
0338 for (int it = itmin + 1; it < nsamples; ++it) {
0339 loffset += invalphabeta;
0340 if (useless_sample_values[ch_start + it])
0341 continue;
0342 auto const inverr2 = 1. / (sample_value_errors[ch_start + it] * sample_value_errors[ch_start + it]);
0343 auto const term1 = 1. + loffset;
0344 auto const f = (term1 > 1e-6) ? fast_expf(alpha * (fast_logf(term1) - loffset)) : 0;
0345 sumAf += sample_values[ch_start + it] * (f * inverr2);
0346 sumff += f * (f * inverr2);
0347 }
0348
0349 auto const sumAA = sumAAsNullHypot[ch];
0350 auto const sum0 = sum0sNullHypot[ch];
0351 chi2 = sumAA;
0352
0353 if (sumff > 0) {
0354 chi2 = sumAA - sumAf * (sumAf / sumff);
0355 }
0356 chi2 /= sum0;
0357
0358 #ifdef DEBUG_TC_MAKERATIO
0359 if (ch == 1 || ch == 0)
0360 printf(
0361 "ch = %d ltx = %d sumAf = %f sumff = %f sumAA = %f sum0 = %d tmax = %f tmaxerr = %f chi2 = "
0362 "%f\n",
0363 ch,
0364 ltx,
0365 sumAf,
0366 sumff,
0367 sumAA,
0368 static_cast<int>(sum0),
0369 tmax,
0370 tmaxerr,
0371 chi2);
0372 #endif
0373
0374 if (chi2 > 0 && tmax > 0 && tmaxerr > 0)
0375 internalCondForSkipping2 = false;
0376 else
0377 chi2 = std::numeric_limits<ScalarType>::max();
0378 }
0379 }
0380
0381
0382 shr_chi2s[idx.local] = chi2;
0383 shr_chi2[idx.local] = chi2;
0384 shr_tmax[idx.local] = tmax;
0385 shr_tmaxerr[idx.local] = tmaxerr;
0386 shr_condForUselessSamples[idx.local] = condForUselessSamples;
0387 shr_internalCondForSkipping1[idx.local] = internalCondForSkipping1;
0388 shr_internalCondForSkipping2[idx.local] = internalCondForSkipping2;
0389 }
0390
0391 alpaka::syncBlockThreads(acc);
0392
0393
0394
0395 auto iter = nthreads_per_channel / 2 + nthreads_per_channel % 2;
0396 bool oddElements = nthreads_per_channel % 2;
0397 CMS_UNROLL_LOOP
0398 while (iter >= 1) {
0399 for (auto idx : cms::alpakatools::uniform_group_elements(acc, block, totalElements)) {
0400 auto const ltx = idx.global % nthreads_per_channel;
0401
0402 if (ltx < iter && !(oddElements && (ltx == iter - 1 && ltx > 0))) {
0403
0404
0405 shr_chi2s[idx.local] = std::min(shr_chi2s[idx.local], shr_chi2s[idx.local + iter]);
0406 }
0407 }
0408 alpaka::syncBlockThreads(acc);
0409
0410 oddElements = iter % 2;
0411 iter = iter == 1 ? iter / 2 : iter / 2 + iter % 2;
0412 }
0413
0414 for (auto idx : cms::alpakatools::uniform_group_elements(acc, block, totalElements)) {
0415 auto const ltx = idx.global % nthreads_per_channel;
0416
0417
0418 auto const condForUselessSamples = shr_condForUselessSamples[idx.local];
0419 auto const internalCondForSkipping1 = shr_internalCondForSkipping1[idx.local];
0420 auto const internalCondForSkipping2 = shr_internalCondForSkipping2[idx.local];
0421
0422 if (!condForUselessSamples && !internalCondForSkipping1 && !internalCondForSkipping2) {
0423
0424
0425 auto const chi2 = shr_chi2[idx.local];
0426 auto const chi2min = shr_chi2s[idx.local - ltx];
0427 auto const chi2Limit = chi2min + 1.;
0428 auto const tmaxerr = shr_tmaxerr[idx.local];
0429 auto const inverseSigmaSquared = chi2 < chi2Limit ? 1. / (tmaxerr * tmaxerr) : 0.;
0430
0431 #ifdef DEBUG_TC_MAKERATIO
0432 if (ch == 1 || ch == 0) {
0433 auto const ch = idx.global / nthreads_per_channel;
0434 printf("ch = %d ltx = %d chi2min = %f chi2Limit = %f inverseSigmaSquared = %f\n",
0435 ch,
0436 ltx,
0437 chi2min,
0438 chi2Limit,
0439 inverseSigmaSquared);
0440 }
0441 #endif
0442
0443
0444
0445
0446 auto const tmax = shr_tmax[idx.local];
0447 shr_time_wgt[idx.local] = inverseSigmaSquared;
0448 shr_time_max[idx.local] = tmax * inverseSigmaSquared;
0449 } else {
0450 shr_time_wgt[idx.local] = 0;
0451 shr_time_max[idx.local] = 0;
0452 }
0453 }
0454
0455 alpaka::syncBlockThreads(acc);
0456
0457
0458 iter = nthreads_per_channel / 2 + nthreads_per_channel % 2;
0459 oddElements = nthreads_per_channel % 2;
0460 CMS_UNROLL_LOOP
0461 while (iter >= 1) {
0462 for (auto idx : cms::alpakatools::uniform_group_elements(acc, block, totalElements)) {
0463 auto const ltx = idx.global % nthreads_per_channel;
0464
0465 if (ltx < iter && !(oddElements && (ltx == iter - 1 && ltx > 0))) {
0466 shr_time_wgt[idx.local] += shr_time_wgt[idx.local + iter];
0467 shr_time_max[idx.local] += shr_time_max[idx.local + iter];
0468 shrTimeMax[idx.local] += shrTimeMax[idx.local + iter];
0469 shrTimeWgt[idx.local] += shrTimeWgt[idx.local + iter];
0470 }
0471 }
0472
0473 alpaka::syncBlockThreads(acc);
0474 oddElements = iter % 2;
0475 iter = iter == 1 ? iter / 2 : iter / 2 + iter % 2;
0476 }
0477
0478 for (auto idx : cms::alpakatools::uniform_group_elements(acc, block, totalElements)) {
0479 auto const ltx = idx.global % nthreads_per_channel;
0480
0481
0482
0483
0484 if (ltx == 0) {
0485 auto const ch = idx.global / nthreads_per_channel;
0486 auto const tmp_time_max = shr_time_max[idx.local];
0487 auto const tmp_time_wgt = shr_time_wgt[idx.local];
0488
0489
0490 if (tmp_time_wgt == 0 && tmp_time_max == 0) {
0491 g_state[ch] = TimeComputationState::Finished;
0492 continue;
0493 }
0494
0495
0496 auto const tMaxAlphaBeta = tmp_time_max / tmp_time_wgt;
0497 auto const tMaxErrorAlphaBeta = 1. / std::sqrt(tmp_time_wgt);
0498
0499 tMaxAlphaBetas[ch] = tMaxAlphaBeta;
0500 tMaxErrorAlphaBetas[ch] = tMaxErrorAlphaBeta;
0501 g_accTimeMax[ch] = shrTimeMax[idx.local];
0502 g_accTimeWgt[ch] = shrTimeWgt[idx.local];
0503 g_state[ch] = TimeComputationState::NotFinished;
0504
0505 #ifdef DEBUG_TC_MAKERATIO
0506 printf("ch = %d time_max = %f time_wgt = %f\n", ch, tmp_time_max, tmp_time_wgt);
0507 printf("ch = %d tMaxAlphaBeta = %f tMaxErrorAlphaBeta = %f timeMax = %f timeWgt = %f\n",
0508 ch,
0509 tMaxAlphaBeta,
0510 tMaxErrorAlphaBeta,
0511 shrTimeMax[idx.local],
0512 shrTimeWgt[idx.local]);
0513 #endif
0514 }
0515 }
0516 }
0517 }
0518 };
0519
0520 class Kernel_time_compute_findamplchi2_and_finish {
0521 using ScalarType = ::ecal::multifit::SampleVector::Scalar;
0522
0523 public:
0524 ALPAKA_FN_ACC void operator()(Acc1D const& acc,
0525 EcalDigiDeviceCollection::ConstView digisDevEB,
0526 EcalDigiDeviceCollection::ConstView digisDevEE,
0527 ScalarType* const sample_values,
0528 ScalarType* const sample_value_errors,
0529 bool* const useless_samples,
0530 ScalarType* const g_tMaxAlphaBeta,
0531 ScalarType* const g_tMaxErrorAlphaBeta,
0532 ScalarType* const g_accTimeMax,
0533 ScalarType* const g_accTimeWgt,
0534 ScalarType* const sumAAsNullHypot,
0535 ScalarType* const sum0sNullHypot,
0536 ScalarType* const chi2sNullHypot,
0537 TimeComputationState* g_state,
0538 ScalarType* g_ampMaxAlphaBeta,
0539 ScalarType* g_ampMaxError,
0540 ScalarType* g_timeMax,
0541 ScalarType* g_timeError,
0542 EcalMultifitParameters const* paramsDev) const {
0543
0544
0545
0546
0547
0548
0549 constexpr auto nsamples = EcalDataFrame::MAXSAMPLES;
0550 auto const nchannels = digisDevEB.size() + digisDevEE.size();
0551 auto const offsetForInputs = digisDevEB.size();
0552
0553 auto const elemsPerBlock = alpaka::getWorkDiv<alpaka::Block, alpaka::Elems>(acc)[0u];
0554
0555
0556
0557
0558 auto* shr_sumAf = alpaka::getDynSharedMem<ScalarType>(acc);
0559 auto* shr_sumff = shr_sumAf + elemsPerBlock;
0560
0561 for (auto gtxforward : cms::alpakatools::uniform_elements(acc, nchannels * nsamples)) {
0562
0563 auto gtx = nchannels * nsamples - 1 - gtxforward;
0564 auto const ch = gtx / nsamples;
0565 auto const elemIdx = gtx % elemsPerBlock;
0566 auto const sample = elemIdx % nsamples;
0567
0568 auto const* dids = ch >= offsetForInputs ? digisDevEE.id() : digisDevEB.id();
0569 auto const inputCh = ch >= offsetForInputs ? ch - offsetForInputs : ch;
0570
0571 auto state = g_state[ch];
0572 auto const did = DetId{dids[inputCh]};
0573 auto* const amplitudeFitParameters = did.subdetId() == EcalBarrel ? paramsDev->amplitudeFitParamsEB.data()
0574 : paramsDev->amplitudeFitParamsEE.data();
0575
0576
0577
0578 if (state == TimeComputationState::NotFinished) {
0579 auto const alpha = amplitudeFitParameters[0];
0580 auto const beta = amplitudeFitParameters[1];
0581 auto const alphabeta = alpha * beta;
0582 auto const invalphabeta = 1. / alphabeta;
0583 auto const tMaxAlphaBeta = g_tMaxAlphaBeta[ch];
0584 auto const sample_value = sample_values[gtx];
0585 auto const sample_value_error = sample_value_errors[gtx];
0586 auto const inverr2 =
0587 useless_samples[gtx] ? static_cast<ScalarType>(0) : 1. / (sample_value_error * sample_value_error);
0588 auto const offset = (static_cast<ScalarType>(sample) - tMaxAlphaBeta) * invalphabeta;
0589 auto const term1 = 1. + offset;
0590 auto const f = term1 > 1e-6 ? fast_expf(alpha * (fast_logf(term1) - offset)) : static_cast<ScalarType>(0.);
0591 auto const sumAf = sample_value * (f * inverr2);
0592 auto const sumff = f * (f * inverr2);
0593
0594
0595 shr_sumAf[elemIdx] = sumAf;
0596 shr_sumff[elemIdx] = sumff;
0597 } else {
0598 shr_sumAf[elemIdx] = 0;
0599 shr_sumff[elemIdx] = 0;
0600 }
0601
0602 alpaka::syncBlockThreads(acc);
0603
0604
0605
0606 if (sample < 5) {
0607 shr_sumAf[elemIdx] += shr_sumAf[elemIdx + 5];
0608 shr_sumff[elemIdx] += shr_sumff[elemIdx + 5];
0609 }
0610
0611 alpaka::syncBlockThreads(acc);
0612
0613 if (sample < 2) {
0614
0615 shr_sumAf[elemIdx] += shr_sumAf[elemIdx + 2] + shr_sumAf[elemIdx + 3];
0616 shr_sumff[elemIdx] += shr_sumff[elemIdx + 2] + shr_sumff[elemIdx + 3];
0617 }
0618
0619 alpaka::syncBlockThreads(acc);
0620
0621 if (sample == 0) {
0622
0623
0624 if (state == TimeComputationState::Finished) {
0625 g_timeMax[ch] = 5;
0626 g_timeError[ch] = -999;
0627 continue;
0628 }
0629
0630
0631 auto const sumff = shr_sumff[elemIdx] + shr_sumff[elemIdx + 1] - shr_sumff[elemIdx + 3];
0632 auto const sumAf = shr_sumAf[elemIdx] + shr_sumAf[elemIdx + 1] - shr_sumAf[elemIdx + 3];
0633
0634 auto const ampMaxAlphaBeta = sumff > 0 ? sumAf / sumff : 0;
0635 auto const sumAA = sumAAsNullHypot[ch];
0636 auto const sum0 = sum0sNullHypot[ch];
0637 auto const nullChi2 = chi2sNullHypot[ch];
0638 if (sumff > 0) {
0639 auto const chi2AlphaBeta = (sumAA - sumAf * sumAf / sumff) / sum0;
0640 if (chi2AlphaBeta > nullChi2) {
0641
0642 state = TimeComputationState::Finished;
0643 #ifdef DEBUG_FINDAMPLCHI2_AND_FINISH
0644 printf("ch = %d chi2AlphaBeta = %f nullChi2 = %f sumAA = %f sumAf = %f sumff = %f sum0 = %f\n",
0645 ch,
0646 chi2AlphaBeta,
0647 nullChi2,
0648 sumAA,
0649 sumAf,
0650 sumff,
0651 sum0);
0652 #endif
0653 }
0654
0655
0656 g_ampMaxAlphaBeta[ch] = ampMaxAlphaBeta;
0657 } else {
0658 #ifdef DEBUG_FINDAMPLCHI2_AND_FINISH
0659 printf("ch = %d sum0 = %f sumAA = %f sumff = %f sumAf = %f\n", ch, sum0, sumAA, sumff, sumAf);
0660 #endif
0661 state = TimeComputationState::Finished;
0662 }
0663
0664
0665 g_state[ch] = state;
0666 if (state == TimeComputationState::Finished) {
0667
0668 g_timeMax[ch] = 5;
0669 g_timeError[ch] = -999;
0670 #ifdef DEBUG_FINDAMPLCHI2_AND_FINISH
0671 printf("ch = %d finished state\n", ch);
0672 #endif
0673 continue;
0674 }
0675
0676 auto const ampMaxError = g_ampMaxError[ch];
0677 auto const test_ratio = ampMaxAlphaBeta / ampMaxError;
0678 auto const accTimeMax = g_accTimeMax[ch];
0679 auto const accTimeWgt = g_accTimeWgt[ch];
0680 auto const tMaxAlphaBeta = g_tMaxAlphaBeta[ch];
0681 auto const tMaxErrorAlphaBeta = g_tMaxErrorAlphaBeta[ch];
0682
0683
0684 if (test_ratio > 5. && accTimeWgt > 0) {
0685 auto const tMaxRatio = accTimeWgt > 0 ? accTimeMax / accTimeWgt : static_cast<ScalarType>(0);
0686 auto const tMaxErrorRatio = accTimeWgt > 0 ? 1. / std::sqrt(accTimeWgt) : static_cast<ScalarType>(0);
0687
0688 if (test_ratio > 10.) {
0689 g_timeMax[ch] = tMaxRatio;
0690 g_timeError[ch] = tMaxErrorRatio;
0691
0692 #ifdef DEBUG_FINDAMPLCHI2_AND_FINISH
0693 printf("ch = %d tMaxRatio = %f tMaxErrorRatio = %f\n", ch, tMaxRatio, tMaxErrorRatio);
0694 #endif
0695 } else {
0696 auto const timeMax = (tMaxAlphaBeta * (10. - ampMaxAlphaBeta / ampMaxError) +
0697 tMaxRatio * (ampMaxAlphaBeta / ampMaxError - 5.)) /
0698 5.;
0699 auto const timeError = (tMaxErrorAlphaBeta * (10. - ampMaxAlphaBeta / ampMaxError) +
0700 tMaxErrorRatio * (ampMaxAlphaBeta / ampMaxError - 5.)) /
0701 5.;
0702 state = TimeComputationState::Finished;
0703 g_state[ch] = state;
0704 g_timeMax[ch] = timeMax;
0705 g_timeError[ch] = timeError;
0706
0707 #ifdef DEBUG_FINDAMPLCHI2_AND_FINISH
0708 printf("ch = %d timeMax = %f timeError = %f\n", ch, timeMax, timeError);
0709 #endif
0710 }
0711 } else {
0712 state = TimeComputationState::Finished;
0713 g_state[ch] = state;
0714 g_timeMax[ch] = tMaxAlphaBeta;
0715 g_timeError[ch] = tMaxErrorAlphaBeta;
0716
0717 #ifdef DEBUG_FINDAMPLCHI2_AND_FINISH
0718 printf("ch = %d tMaxAlphaBeta = %f tMaxErrorAlphaBeta = %f\n", ch, tMaxAlphaBeta, tMaxErrorAlphaBeta);
0719 #endif
0720 }
0721 }
0722 }
0723 }
0724 };
0725
0726 class Kernel_time_compute_fixMGPAslew {
0727 using ScalarType = ::ecal::multifit::SampleVector::Scalar;
0728
0729 public:
0730 ALPAKA_FN_ACC void operator()(Acc1D const& acc,
0731 EcalDigiDeviceCollection::ConstView digisDevEB,
0732 EcalDigiDeviceCollection::ConstView digisDevEE,
0733 EcalMultifitConditionsDevice::ConstView conditionsDev,
0734 ScalarType* sample_values,
0735 ScalarType* sample_value_errors,
0736 bool* useless_sample_values) const {
0737
0738 constexpr auto nsamples = EcalDataFrame::MAXSAMPLES;
0739
0740 auto const nchannelsEB = digisDevEB.size();
0741 auto const offsetForInputs = nchannelsEB;
0742
0743 auto const elemsPerBlock = alpaka::getWorkDiv<alpaka::Block, alpaka::Elems>(acc)[0u];
0744
0745 for (auto gtx : cms::alpakatools::uniform_elements(acc, nchannelsEB * nsamples)) {
0746 auto const elemIdx = gtx % elemsPerBlock;
0747 auto const sample = elemIdx % nsamples;
0748 auto const ch = gtx / nsamples;
0749
0750
0751 if (sample == 0)
0752 continue;
0753
0754 if (!use_sample(conditionsDev.sampleMask_EB(), sample))
0755 continue;
0756
0757 int const inputGtx = ch >= offsetForInputs ? gtx - offsetForInputs * nsamples : gtx;
0758 auto const* digis = ch >= offsetForInputs ? digisDevEE.data()->data() : digisDevEB.data()->data();
0759
0760 auto const gainIdPrev = ecalMGPA::gainId(digis[inputGtx - 1]);
0761 auto const gainIdNext = ecalMGPA::gainId(digis[inputGtx]);
0762 if (gainIdPrev >= 1 && gainIdPrev <= 3 && gainIdNext >= 1 && gainIdNext <= 3 && gainIdPrev < gainIdNext) {
0763 sample_values[gtx - 1] = 0;
0764 sample_value_errors[gtx - 1] = 1e+9;
0765 useless_sample_values[gtx - 1] = true;
0766 }
0767 }
0768 }
0769 };
0770
0771
0772 class Kernel_time_computation_init {
0773 using ScalarType = ::ecal::multifit::SampleVector::Scalar;
0774
0775 public:
0776 ALPAKA_FN_ACC void operator()(Acc1D const& acc,
0777 EcalDigiDeviceCollection::ConstView digisDevEB,
0778 EcalDigiDeviceCollection::ConstView digisDevEE,
0779 EcalMultifitConditionsDevice::ConstView conditionsDev,
0780 ScalarType* sample_values,
0781 ScalarType* sample_value_errors,
0782 ScalarType* ampMaxError,
0783 bool* useless_sample_values,
0784 char* pedestal_nums) const {
0785
0786 constexpr auto nsamples = EcalDataFrame::MAXSAMPLES;
0787
0788
0789 auto const nchannelsEB = digisDevEB.size();
0790 auto const nchannels = nchannelsEB + digisDevEE.size();
0791 auto const offsetForInputs = nchannelsEB;
0792 auto const offsetForHashes = conditionsDev.offsetEE();
0793
0794 auto const elemsPerBlock = alpaka::getWorkDiv<alpaka::Block, alpaka::Elems>(acc)[0u];
0795
0796
0797 auto* shrSampleValues = alpaka::getDynSharedMem<ScalarType>(acc);
0798 auto* shrSampleValueErrors = shrSampleValues + elemsPerBlock;
0799
0800 for (auto txforward : cms::alpakatools::uniform_elements(acc, nchannels * nsamples)) {
0801
0802 auto tx = nchannels * nsamples - 1 - txforward;
0803 auto const ch = tx / nsamples;
0804 auto const elemIdx = tx % elemsPerBlock;
0805
0806 int const inputTx = ch >= offsetForInputs ? tx - offsetForInputs * nsamples : tx;
0807 int const inputCh = ch >= offsetForInputs ? ch - offsetForInputs : ch;
0808 auto const* digis = ch >= offsetForInputs ? digisDevEE.data()->data() : digisDevEB.data()->data();
0809 auto const* dids = ch >= offsetForInputs ? digisDevEE.id() : digisDevEB.id();
0810
0811
0812 auto const sample = tx % nsamples;
0813 auto const input_ch_start = inputCh * nsamples;
0814 ScalarType pedestal = 0.;
0815 int num = 0;
0816
0817
0818 auto const adc0 = ecalMGPA::adc(digis[input_ch_start]);
0819 auto const gainId0 = ecalMGPA::gainId(digis[input_ch_start]);
0820 auto const adc1 = ecalMGPA::adc(digis[input_ch_start + 1]);
0821 auto const gainId1 = ecalMGPA::gainId(digis[input_ch_start + 1]);
0822 auto const did = DetId{dids[inputCh]};
0823 auto const isBarrel = did.subdetId() == EcalBarrel;
0824 auto const sample_mask = isBarrel ? conditionsDev.sampleMask_EB() : conditionsDev.sampleMask_EE();
0825 auto const hashedId = isBarrel ? ecal::reconstruction::hashedIndexEB(did.rawId())
0826 : offsetForHashes + ecal::reconstruction::hashedIndexEE(did.rawId());
0827
0828
0829
0830 if (gainId0 == 1 && use_sample(sample_mask, 0)) {
0831 pedestal = static_cast<ScalarType>(adc0);
0832 num = 1;
0833
0834 auto const diff = adc1 - adc0;
0835 if (gainId1 == 1 && use_sample(sample_mask, 1) &&
0836 std::abs(diff) < 3 * conditionsDev.pedestals_rms_x12()[hashedId]) {
0837 pedestal = (pedestal + static_cast<ScalarType>(adc1)) / 2.;
0838 num = 2;
0839 }
0840 } else {
0841 pedestal = conditionsDev.pedestals_mean_x12()[ch];
0842 }
0843
0844
0845 auto const gainId = ecalMGPA::gainId(digis[inputTx]);
0846 auto const adc = ecalMGPA::adc(digis[inputTx]);
0847
0848 bool bad = false;
0849 ScalarType sample_value, sample_value_error;
0850
0851
0852
0853 if (!use_sample(sample_mask, sample)) {
0854 bad = true;
0855 sample_value = 0;
0856 sample_value_error = 0;
0857 } else if (gainId == 1) {
0858 sample_value = static_cast<ScalarType>(adc) - pedestal;
0859 sample_value_error = conditionsDev.pedestals_rms_x12()[hashedId];
0860 } else if (gainId == 2) {
0861 auto const mean_x6 = conditionsDev.pedestals_mean_x6()[hashedId];
0862 auto const rms_x6 = conditionsDev.pedestals_rms_x6()[hashedId];
0863 auto const gain12Over6 = conditionsDev.gain12Over6()[hashedId];
0864 sample_value = (static_cast<ScalarType>(adc) - mean_x6) * gain12Over6;
0865 sample_value_error = rms_x6 * gain12Over6;
0866 } else if (gainId == 3) {
0867 auto const mean_x1 = conditionsDev.pedestals_mean_x1()[hashedId];
0868 auto const rms_x1 = conditionsDev.pedestals_rms_x1()[hashedId];
0869 auto const gain12Over6 = conditionsDev.gain12Over6()[hashedId];
0870 auto const gain6Over1 = conditionsDev.gain6Over1()[hashedId];
0871 sample_value = (static_cast<ScalarType>(adc) - mean_x1) * gain6Over1 * gain12Over6;
0872 sample_value_error = rms_x1 * gain6Over1 * gain12Over6;
0873 } else {
0874 sample_value = 0;
0875 sample_value_error = 0;
0876 bad = true;
0877 }
0878
0879
0880 auto const useless_sample = (sample_value_error <= 0) | bad;
0881 useless_sample_values[tx] = useless_sample;
0882 sample_values[tx] = sample_value;
0883 sample_value_errors[tx] = useless_sample ? 1e+9 : sample_value_error;
0884
0885
0886 #ifdef ECAL_RECO_ALPAKA_TC_INIT_DEBUG
0887 if (ch == 0) {
0888 printf("sample = %d sample_value = %f sample_value_error = %f useless = %c\n",
0889 sample,
0890 sample_value,
0891 sample_value_error,
0892 useless_sample ? '1' : '0');
0893 }
0894 #endif
0895
0896
0897 shrSampleValues[elemIdx] = sample_value_error > 0 ? sample_value : std::numeric_limits<ScalarType>::min();
0898 shrSampleValueErrors[elemIdx] = sample_value_error;
0899 alpaka::syncBlockThreads(acc);
0900
0901
0902 if (sample < 5) {
0903
0904 shrSampleValueErrors[elemIdx] = shrSampleValues[elemIdx] < shrSampleValues[elemIdx + 5]
0905 ? shrSampleValueErrors[elemIdx + 5]
0906 : shrSampleValueErrors[elemIdx];
0907 shrSampleValues[elemIdx] = std::max(shrSampleValues[elemIdx], shrSampleValues[elemIdx + 5]);
0908 }
0909 alpaka::syncBlockThreads(acc);
0910
0911
0912 if (sample < 3) {
0913 shrSampleValueErrors[elemIdx] = shrSampleValues[elemIdx] < shrSampleValues[elemIdx + 3]
0914 ? shrSampleValueErrors[elemIdx + 3]
0915 : shrSampleValueErrors[elemIdx];
0916 shrSampleValues[elemIdx] = std::max(shrSampleValues[elemIdx], shrSampleValues[elemIdx + 3]);
0917 }
0918 alpaka::syncBlockThreads(acc);
0919
0920 if (sample < 2) {
0921 shrSampleValueErrors[elemIdx] = shrSampleValues[elemIdx] < shrSampleValues[elemIdx + 2]
0922 ? shrSampleValueErrors[elemIdx + 2]
0923 : shrSampleValueErrors[elemIdx];
0924 shrSampleValues[elemIdx] = std::max(shrSampleValues[elemIdx], shrSampleValues[elemIdx + 2]);
0925 }
0926 alpaka::syncBlockThreads(acc);
0927
0928 if (sample == 0) {
0929
0930 auto const maxSampleValueError = shrSampleValues[elemIdx] < shrSampleValues[elemIdx + 1]
0931 ? shrSampleValueErrors[elemIdx + 1]
0932 : shrSampleValueErrors[elemIdx];
0933
0934
0935 pedestal_nums[ch] = num;
0936
0937 ampMaxError[ch] = maxSampleValueError;
0938
0939
0940 #ifdef ECAL_RECO_ALPAKA_TC_INIT_DEBUG
0941 if (ch == 0) {
0942 printf("pedestal_nums = %d ampMaxError = %f\n", num, maxSampleValueError);
0943 }
0944 #endif
0945 }
0946 }
0947 }
0948 };
0949
0950
0951
0952
0953
0954 class Kernel_time_correction_and_finalize {
0955 using ScalarType = ::ecal::multifit::SampleVector::Scalar;
0956
0957 public:
0958 ALPAKA_FN_ACC void operator()(Acc1D const& acc,
0959 EcalDigiDeviceCollection::ConstView digisDevEB,
0960 EcalDigiDeviceCollection::ConstView digisDevEE,
0961 EcalUncalibratedRecHitDeviceCollection::View uncalibRecHitsEB,
0962 EcalUncalibratedRecHitDeviceCollection::View uncalibRecHitsEE,
0963 EcalMultifitConditionsDevice::ConstView conditionsDev,
0964 ScalarType* const g_timeMax,
0965 ScalarType* const g_timeError,
0966 ConfigurationParameters::type const timeConstantTermEB,
0967 ConfigurationParameters::type const timeConstantTermEE,
0968 ConfigurationParameters::type const timeNconstEB,
0969 ConfigurationParameters::type const timeNconstEE,
0970 ConfigurationParameters::type const amplitudeThresholdEB,
0971 ConfigurationParameters::type const amplitudeThresholdEE,
0972 ConfigurationParameters::type const outOfTimeThreshG12pEB,
0973 ConfigurationParameters::type const outOfTimeThreshG12pEE,
0974 ConfigurationParameters::type const outOfTimeThreshG12mEB,
0975 ConfigurationParameters::type const outOfTimeThreshG12mEE,
0976 ConfigurationParameters::type const outOfTimeThreshG61pEB,
0977 ConfigurationParameters::type const outOfTimeThreshG61pEE,
0978 ConfigurationParameters::type const outOfTimeThreshG61mEB,
0979 ConfigurationParameters::type const outOfTimeThreshG61mEE) const {
0980
0981 constexpr auto nsamples = EcalDataFrame::MAXSAMPLES;
0982 auto const nchannelsEB = digisDevEB.size();
0983 auto const nchannels = nchannelsEB + digisDevEE.size();
0984 auto const offsetForInputs = nchannelsEB;
0985 auto const offsetForHashes = conditionsDev.offsetEE();
0986
0987 for (auto gtx : cms::alpakatools::uniform_elements(acc, nchannels)) {
0988 const int inputGtx = gtx >= offsetForInputs ? gtx - offsetForInputs : gtx;
0989 auto const* dids = gtx >= offsetForInputs ? digisDevEE.id() : digisDevEB.id();
0990 auto const* digis = gtx >= offsetForInputs ? digisDevEE.data()->data() : digisDevEB.data()->data();
0991
0992 auto* g_amplitude = gtx >= nchannelsEB ? uncalibRecHitsEE.amplitude() : uncalibRecHitsEB.amplitude();
0993 auto* g_jitter = gtx >= nchannelsEB ? uncalibRecHitsEE.jitter() : uncalibRecHitsEB.jitter();
0994 auto* g_jitterError = gtx >= nchannelsEB ? uncalibRecHitsEE.jitterError() : uncalibRecHitsEB.jitterError();
0995 auto* flags = gtx >= nchannelsEB ? uncalibRecHitsEE.flags() : uncalibRecHitsEB.flags();
0996
0997 auto const did = DetId{dids[inputGtx]};
0998 auto const isBarrel = did.subdetId() == EcalBarrel;
0999 auto const hashedId = isBarrel ? ecal::reconstruction::hashedIndexEB(did.rawId())
1000 : offsetForHashes + ecal::reconstruction::hashedIndexEE(did.rawId());
1001
1002 auto* const amplitudeBins = isBarrel ? conditionsDev.timeBiasCorrections_amplitude_EB().data()
1003 : conditionsDev.timeBiasCorrections_amplitude_EE().data();
1004 auto* const shiftBins = isBarrel ? conditionsDev.timeBiasCorrections_shift_EB().data()
1005 : conditionsDev.timeBiasCorrections_shift_EE().data();
1006 auto const amplitudeBinsSize =
1007 isBarrel ? conditionsDev.timeBiasCorrectionSizeEB() : conditionsDev.timeBiasCorrectionSizeEE();
1008 auto const timeConstantTerm = isBarrel ? timeConstantTermEB : timeConstantTermEE;
1009 auto const timeNconst = isBarrel ? timeNconstEB : timeNconstEE;
1010 auto const offsetTimeValue = isBarrel ? conditionsDev.timeOffset_EB() : conditionsDev.timeOffset_EE();
1011 auto const amplitudeThreshold = isBarrel ? amplitudeThresholdEB : amplitudeThresholdEE;
1012 auto const outOfTimeThreshG12p = isBarrel ? outOfTimeThreshG12pEB : outOfTimeThreshG12pEE;
1013 auto const outOfTimeThreshG12m = isBarrel ? outOfTimeThreshG12mEB : outOfTimeThreshG12mEE;
1014 auto const outOfTimeThreshG61p = isBarrel ? outOfTimeThreshG61pEB : outOfTimeThreshG61pEE;
1015 auto const outOfTimeThreshG61m = isBarrel ? outOfTimeThreshG61mEB : outOfTimeThreshG61mEE;
1016
1017
1018 auto const amplitude = g_amplitude[inputGtx];
1019 auto const rms_x12 = conditionsDev.pedestals_rms_x12()[hashedId];
1020 auto const timeCalibConst = conditionsDev.timeCalibConstants()[hashedId];
1021
1022 int myBin = -1;
1023 for (size_t bin = 0; bin < amplitudeBinsSize; ++bin) {
1024 if (amplitude > amplitudeBins[bin])
1025 myBin = bin;
1026 else
1027 break;
1028 }
1029
1030 ScalarType correction = 0;
1031 if (myBin == -1) {
1032 correction = shiftBins[0];
1033 } else if (myBin == static_cast<int>(amplitudeBinsSize) - 1) {
1034 correction = shiftBins[myBin];
1035 } else {
1036 correction = shiftBins[myBin + 1] - shiftBins[myBin];
1037 correction *= (amplitude - amplitudeBins[myBin]) / (amplitudeBins[myBin + 1] - amplitudeBins[myBin]);
1038 correction += shiftBins[myBin];
1039 }
1040
1041
1042 correction = correction * 0.04;
1043 auto const timeMax = g_timeMax[gtx];
1044 auto const timeError = g_timeError[gtx];
1045 auto const jitter = timeMax - 5 + correction;
1046 auto const jitterError =
1047 std::sqrt(timeError * timeError + timeConstantTerm * timeConstantTerm * 0.04 * 0.04);
1048
1049 #ifdef DEBUG_TIME_CORRECTION
1050 printf("ch = %d timeMax = %f timeError = %f jitter = %f correction = %f\n",
1051 gtx,
1052 timeMax,
1053 timeError,
1054 jitter,
1055 correction);
1056 #endif
1057
1058
1059 g_jitter[inputGtx] = jitter;
1060 g_jitterError[inputGtx] = jitterError;
1061
1062
1063
1064
1065 if (amplitude > amplitudeThreshold * rms_x12) {
1066 auto threshP = outOfTimeThreshG12p;
1067 auto threshM = outOfTimeThreshG12m;
1068 if (amplitude > 3000.) {
1069 for (int isample = 0; isample < nsamples; isample++) {
1070 auto const gainid = ecalMGPA::gainId(digis[nsamples * inputGtx + isample]);
1071 if (gainid != 1) {
1072 threshP = outOfTimeThreshG61p;
1073 threshM = outOfTimeThreshG61m;
1074 break;
1075 }
1076 }
1077 }
1078
1079 auto const correctedTime = (timeMax - 5) * 25 + timeCalibConst + offsetTimeValue;
1080 auto const nterm = timeNconst * rms_x12 / amplitude;
1081 auto const sigmat = std::sqrt(nterm * nterm + timeConstantTerm * timeConstantTerm);
1082 if (correctedTime > sigmat * threshP || correctedTime < -sigmat * threshM)
1083 flags[inputGtx] |= 0x1 << EcalUncalibratedRecHit::kOutOfTime;
1084 }
1085 }
1086 }
1087 };
1088
1089 }
1090
1091 namespace alpaka::trait {
1092 using namespace ALPAKA_ACCELERATOR_NAMESPACE;
1093 using namespace ALPAKA_ACCELERATOR_NAMESPACE::ecal::multifit;
1094
1095
1096 template <>
1097 struct BlockSharedMemDynSizeBytes<Kernel_time_compute_nullhypot, Acc1D> {
1098
1099 template <typename TVec, typename... TArgs>
1100 ALPAKA_FN_HOST_ACC static auto getBlockSharedMemDynSizeBytes(Kernel_time_compute_nullhypot const&,
1101 TVec const& threadsPerBlock,
1102 TVec const& elemsPerThread,
1103 TArgs const&...) -> std::size_t {
1104 using ScalarType = ::ecal::multifit::SampleVector::Scalar;
1105
1106
1107 std::size_t bytes = threadsPerBlock[0u] * elemsPerThread[0u] * 4 * sizeof(ScalarType);
1108 return bytes;
1109 }
1110 };
1111
1112
1113 template <>
1114 struct BlockSharedMemDynSizeBytes<Kernel_time_compute_makeratio, Acc1D> {
1115 template <typename TVec, typename... TArgs>
1116 ALPAKA_FN_HOST_ACC static auto getBlockSharedMemDynSizeBytes(Kernel_time_compute_makeratio const&,
1117 TVec const& threadsPerBlock,
1118 TVec const& elemsPerThread,
1119 TArgs const&...) -> std::size_t {
1120 using ScalarType = ::ecal::multifit::SampleVector::Scalar;
1121
1122 std::size_t bytes = (8 * sizeof(ScalarType) + 3 * sizeof(bool)) * threadsPerBlock[0u] * elemsPerThread[0u];
1123 return bytes;
1124 }
1125 };
1126
1127
1128 template <>
1129 struct BlockSharedMemDynSizeBytes<Kernel_time_compute_findamplchi2_and_finish, Acc1D> {
1130 template <typename TVec, typename... TArgs>
1131 ALPAKA_FN_HOST_ACC static auto getBlockSharedMemDynSizeBytes(Kernel_time_compute_findamplchi2_and_finish const&,
1132 TVec const& threadsPerBlock,
1133 TVec const& elemsPerThread,
1134 TArgs const&...) -> std::size_t {
1135 using ScalarType = ::ecal::multifit::SampleVector::Scalar;
1136
1137 std::size_t bytes = 2 * threadsPerBlock[0u] * elemsPerThread[0u] * sizeof(ScalarType);
1138 return bytes;
1139 }
1140 };
1141
1142
1143 template <>
1144 struct BlockSharedMemDynSizeBytes<Kernel_time_computation_init, Acc1D> {
1145 template <typename TVec, typename... TArgs>
1146 ALPAKA_FN_HOST_ACC static auto getBlockSharedMemDynSizeBytes(Kernel_time_computation_init const&,
1147 TVec const& threadsPerBlock,
1148 TVec const& elemsPerThread,
1149 TArgs const&...) -> std::size_t {
1150 using ScalarType = ::ecal::multifit::SampleVector::Scalar;
1151
1152 std::size_t bytes = 2 * threadsPerBlock[0u] * elemsPerThread[0u] * sizeof(ScalarType);
1153 return bytes;
1154 }
1155 };
1156
1157 }
1158
1159 #endif