File indexing completed on 2024-04-23 22:56:27
0001 #ifndef RecoLocalCalo_EcalRecProducers_plugins_alpaka_TimeComputationKernels_h
0002 #define RecoLocalCalo_EcalRecProducers_plugins_alpaka_TimeComputationKernels_h
0003
0004 #include <cassert>
0005 #include <cmath>
0006 #include <limits>
0007
0008 #include "CondFormats/EcalObjects/interface/alpaka/EcalMultifitConditionsDevice.h"
0009 #include "CondFormats/EcalObjects/interface/alpaka/EcalMultifitParametersDevice.h"
0010 #include "DataFormats/EcalDigi/interface/EcalDataFrame.h"
0011 #include "DataFormats/EcalDigi/interface/EcalMGPASample.h"
0012 #include "DataFormats/EcalRecHit/interface/EcalUncalibratedRecHit.h"
0013 #include "DataFormats/Math/interface/approx_exp.h"
0014 #include "DataFormats/Math/interface/approx_log.h"
0015 #include "FWCore/Utilities/interface/CMSUnrollLoop.h"
0016 #include "RecoLocalCalo/EcalRecProducers/interface/EigenMatrixTypes_gpu.h"
0017
0018 #include "DeclsForKernels.h"
0019 #include "KernelHelpers.h"
0020
0021
0022
0023 namespace ALPAKA_ACCELERATOR_NAMESPACE::ecal::multifit {
0024
0025 ALPAKA_FN_ACC ALPAKA_FN_INLINE bool use_sample(unsigned int sample_mask, unsigned int sample) {
0026 return sample_mask & (0x1 << (EcalDataFrame::MAXSAMPLES - (sample + 1)));
0027 }
0028
0029 ALPAKA_FN_ACC constexpr float fast_expf(float x) { return unsafe_expf<6>(x); }
0030 ALPAKA_FN_ACC constexpr float fast_logf(float x) { return unsafe_logf<7>(x); }
0031
0032 class Kernel_time_compute_nullhypot {
0033 using ScalarType = ::ecal::multifit::SampleVector::Scalar;
0034
0035 public:
0036 template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0037 ALPAKA_FN_ACC void operator()(TAcc const& acc,
0038 ScalarType* const sample_values,
0039 ScalarType* const sample_value_errors,
0040 bool* const useless_sample_values,
0041 ScalarType* chi2s,
0042 ScalarType* sum0s,
0043 ScalarType* sumAAs,
0044 uint32_t const nchannels) const {
0045 constexpr auto nsamples = EcalDataFrame::MAXSAMPLES;
0046
0047
0048 auto const elemsPerBlock = alpaka::getWorkDiv<alpaka::Block, alpaka::Elems>(acc)[0u];
0049
0050
0051 auto* s_sum0 = alpaka::getDynSharedMem<char>(acc);
0052 auto* s_sum1 = reinterpret_cast<ScalarType*>(s_sum0 + elemsPerBlock);
0053 auto* s_sumA = s_sum1 + elemsPerBlock;
0054 auto* s_sumAA = s_sumA + elemsPerBlock;
0055
0056 for (auto txforward : cms::alpakatools::uniform_elements(acc, nchannels * nsamples)) {
0057
0058 auto tx = nchannels * nsamples - 1 - txforward;
0059 auto const ch = tx / nsamples;
0060
0061 auto const sample = tx % nsamples;
0062 auto const ltx = tx % elemsPerBlock;
0063
0064
0065 auto const inv_error =
0066 useless_sample_values[tx] ? 0. : 1. / (sample_value_errors[tx] * sample_value_errors[tx]);
0067 auto const sample_value = sample_values[tx];
0068 s_sum0[ltx] = useless_sample_values[tx] ? 0 : 1;
0069 s_sum1[ltx] = inv_error;
0070 s_sumA[ltx] = sample_value * inv_error;
0071 s_sumAA[ltx] = sample_value * sample_value * inv_error;
0072 alpaka::syncBlockThreads(acc);
0073
0074
0075 if (sample < 5) {
0076 s_sum0[ltx] += s_sum0[ltx + 5];
0077 s_sum1[ltx] += s_sum1[ltx + 5];
0078 s_sumA[ltx] += s_sumA[ltx + 5];
0079 s_sumAA[ltx] += s_sumAA[ltx + 5];
0080 }
0081 alpaka::syncBlockThreads(acc);
0082
0083 if (sample < 2) {
0084
0085 s_sum0[ltx] += s_sum0[ltx + 2] + s_sum0[ltx + 3];
0086 s_sum1[ltx] += s_sum1[ltx + 2] + s_sum1[ltx + 3];
0087 s_sumA[ltx] += s_sumA[ltx + 2] + s_sumA[ltx + 3];
0088 s_sumAA[ltx] += s_sumAA[ltx + 2] + s_sumAA[ltx + 3];
0089 }
0090 alpaka::syncBlockThreads(acc);
0091
0092 if (sample == 0) {
0093
0094 auto const sum0 = s_sum0[ltx] + s_sum0[ltx + 1] - s_sum0[ltx + 3];
0095 auto const sum1 = s_sum1[ltx] + s_sum1[ltx + 1] - s_sum1[ltx + 3];
0096 auto const sumA = s_sumA[ltx] + s_sumA[ltx + 1] - s_sumA[ltx + 3];
0097 auto const sumAA = s_sumAA[ltx] + s_sumAA[ltx + 1] - s_sumAA[ltx + 3];
0098 auto const chi2 = sum0 > 0 ? (sumAA - sumA * sumA / sum1) / sum0 : static_cast<ScalarType>(0);
0099 chi2s[ch] = chi2;
0100 sum0s[ch] = sum0;
0101 sumAAs[ch] = sumAA;
0102
0103 #ifdef DEBUG_TC_NULLHYPOT
0104 if (ch == 0) {
0105 printf("chi2 = %f sum0 = %d sumAA = %f\n", chi2, static_cast<int>(sum0), sumAA);
0106 }
0107 #endif
0108 }
0109 }
0110 }
0111 };
0112
0113
0114
0115
0116
0117
0118
0119 class Kernel_time_compute_makeratio {
0120 using ScalarType = ::ecal::multifit::SampleVector::Scalar;
0121
0122 public:
0123 template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0124 ALPAKA_FN_ACC void operator()(TAcc const& acc,
0125 EcalDigiDeviceCollection::ConstView digisDevEB,
0126 EcalDigiDeviceCollection::ConstView digisDevEE,
0127 ScalarType* const sample_values,
0128 ScalarType* const sample_value_errors,
0129 bool* const useless_sample_values,
0130 char* const pedestal_nums,
0131 ScalarType* const sumAAsNullHypot,
0132 ScalarType* const sum0sNullHypot,
0133 ScalarType* tMaxAlphaBetas,
0134 ScalarType* tMaxErrorAlphaBetas,
0135 ScalarType* g_accTimeMax,
0136 ScalarType* g_accTimeWgt,
0137 TimeComputationState* g_state,
0138 EcalMultifitParametersDevice::ConstView paramsDev,
0139 ConfigurationParameters::type const timeFitLimits_firstEB,
0140 ConfigurationParameters::type const timeFitLimits_firstEE,
0141 ConfigurationParameters::type const timeFitLimits_secondEB,
0142 ConfigurationParameters::type const timeFitLimits_secondEE) const {
0143
0144 constexpr uint32_t nchannels_per_block = 10;
0145 constexpr auto nthreads_per_channel = nchannels_per_block * (nchannels_per_block - 1) / 2;
0146 constexpr auto nsamples = EcalDataFrame::MAXSAMPLES;
0147 auto const nchannels = digisDevEB.size() + digisDevEE.size();
0148 auto const offsetForInputs = digisDevEB.size();
0149 auto const totalElements = nthreads_per_channel * nchannels;
0150
0151 auto const elemsPerBlock = alpaka::getWorkDiv<alpaka::Block, alpaka::Elems>(acc)[0u];
0152 ALPAKA_ASSERT_ACC(nthreads_per_channel * nchannels_per_block == elemsPerBlock);
0153
0154 auto* shr_chi2s = alpaka::getDynSharedMem<ScalarType>(acc);
0155 auto* shr_time_wgt = shr_chi2s + elemsPerBlock;
0156 auto* shr_time_max = shr_time_wgt + elemsPerBlock;
0157 auto* shrTimeMax = shr_time_max + elemsPerBlock;
0158 auto* shrTimeWgt = shrTimeMax + elemsPerBlock;
0159 auto* shr_chi2 = shrTimeWgt + elemsPerBlock;
0160 auto* shr_tmax = shr_chi2 + elemsPerBlock;
0161 auto* shr_tmaxerr = shr_tmax + elemsPerBlock;
0162 auto* shr_condForUselessSamples = reinterpret_cast<bool*>(shr_tmaxerr + elemsPerBlock);
0163 auto* shr_internalCondForSkipping1 = shr_condForUselessSamples + elemsPerBlock;
0164 auto* shr_internalCondForSkipping2 = shr_internalCondForSkipping1 + elemsPerBlock;
0165
0166 for (auto block : cms::alpakatools::uniform_groups(acc, totalElements)) {
0167 for (auto idx : cms::alpakatools::uniform_group_elements(acc, block, totalElements)) {
0168 auto const ch = idx.global / nthreads_per_channel;
0169 auto const ltx = idx.global % nthreads_per_channel;
0170
0171 auto const ch_start = ch * nsamples;
0172 auto const inputCh = ch >= offsetForInputs ? ch - offsetForInputs : ch;
0173 auto const* dids = ch >= offsetForInputs ? digisDevEE.id() : digisDevEB.id();
0174
0175 auto const did = DetId{dids[inputCh]};
0176 auto const isBarrel = did.subdetId() == EcalBarrel;
0177 auto* const amplitudeFitParameters =
0178 isBarrel ? paramsDev.amplitudeFitParamsEB().data() : paramsDev.amplitudeFitParamsEE().data();
0179 auto* const timeFitParameters =
0180 isBarrel ? paramsDev.timeFitParamsEB().data() : paramsDev.timeFitParamsEE().data();
0181 auto const timeFitParameters_size =
0182 isBarrel ? paramsDev.timeFitParamsEB().size() : paramsDev.timeFitParamsEE().size();
0183 auto const timeFitLimits_first = isBarrel ? timeFitLimits_firstEB : timeFitLimits_firstEE;
0184 auto const timeFitLimits_second = isBarrel ? timeFitLimits_secondEB : timeFitLimits_secondEE;
0185
0186
0187 int sample_i = 0;
0188 int sample_j = 0;
0189 if (ltx <= 8) {
0190 sample_i = 0;
0191 sample_j = 1 + ltx;
0192 } else if (ltx <= 16) {
0193 sample_i = 1;
0194 sample_j = 2 + ltx - 9;
0195 } else if (ltx <= 23) {
0196 sample_i = 2;
0197 sample_j = 3 + ltx - 17;
0198 } else if (ltx <= 29) {
0199 sample_i = 3;
0200 sample_j = 4 + ltx - 24;
0201 } else if (ltx <= 34) {
0202 sample_i = 4;
0203 sample_j = 5 + ltx - 30;
0204 } else if (ltx <= 38) {
0205 sample_i = 5;
0206 sample_j = 6 + ltx - 35;
0207 } else if (ltx <= 41) {
0208 sample_i = 6;
0209 sample_j = 7 + ltx - 39;
0210 } else if (ltx <= 43) {
0211 sample_i = 7;
0212 sample_j = 8 + ltx - 42;
0213 } else if (ltx <= 44) {
0214 sample_i = 8;
0215 sample_j = 9;
0216 } else {
0217
0218 ALPAKA_ASSERT_ACC(false);
0219 }
0220
0221 auto const tx_i = ch_start + sample_i;
0222 auto const tx_j = ch_start + sample_j;
0223
0224
0225
0226
0227
0228
0229 bool const condForUselessSamples = useless_sample_values[tx_i] || useless_sample_values[tx_j] ||
0230 sample_values[tx_i] <= 1 || sample_values[tx_j] <= 1;
0231
0232
0233
0234
0235 ScalarType chi2 = std::numeric_limits<ScalarType>::max();
0236 ScalarType tmax = 0;
0237 ScalarType tmaxerr = 0;
0238 shrTimeMax[idx.local] = 0;
0239 shrTimeWgt[idx.local] = 0;
0240
0241 bool internalCondForSkipping1 = true;
0242 bool internalCondForSkipping2 = true;
0243 if (!condForUselessSamples) {
0244 auto const rtmp = sample_values[tx_i] / sample_values[tx_j];
0245 auto const invampl_i = 1. / sample_values[tx_i];
0246 auto const relErr2_i = sample_value_errors[tx_i] * sample_value_errors[tx_i] * invampl_i * invampl_i;
0247 auto const invampl_j = 1. / sample_values[tx_j];
0248 auto const relErr2_j = sample_value_errors[tx_j] * sample_value_errors[tx_j] * invampl_j * invampl_j;
0249 auto const err1 = rtmp * rtmp * (relErr2_i + relErr2_j);
0250 auto err2 =
0251 sample_value_errors[tx_j] * (sample_values[tx_i] - sample_values[tx_j]) * (invampl_j * invampl_j);
0252
0253
0254
0255 if (pedestal_nums[ch] == 2)
0256 err2 *= err2 * 0.5;
0257 auto const err3 = (0.289 * 0.289) * (invampl_j * invampl_j);
0258 auto const total_error = std::sqrt(err1 + err2 + err3);
0259
0260 auto const alpha = amplitudeFitParameters[0];
0261 auto const beta = amplitudeFitParameters[1];
0262 auto const alphabeta = alpha * beta;
0263 auto const invalphabeta = 1. / alphabeta;
0264
0265
0266 auto const ratio_index = sample_i;
0267 auto const ratio_step = sample_j - sample_i;
0268 auto const ratio_value = rtmp;
0269 auto const ratio_error = total_error;
0270
0271 auto const rlim_i_j = fast_expf(static_cast<ScalarType>(sample_j - sample_i) / beta) - 0.001;
0272 internalCondForSkipping1 = !(total_error < 1. && rtmp > 0.001 && rtmp < rlim_i_j);
0273 if (!internalCondForSkipping1) {
0274
0275
0276
0277
0278
0279
0280 auto const l_timeFitLimits_first = timeFitLimits_first;
0281 auto const l_timeFitLimits_second = timeFitLimits_second;
0282 if (ratio_step == 1 && ratio_value >= l_timeFitLimits_first && ratio_value <= l_timeFitLimits_second) {
0283 auto const time_max_i = static_cast<ScalarType>(ratio_index);
0284 auto u = timeFitParameters[timeFitParameters_size - 1];
0285 CMS_UNROLL_LOOP
0286 for (int k = timeFitParameters_size - 2; k >= 0; --k)
0287 u = u * ratio_value + timeFitParameters[k];
0288
0289 auto du = (timeFitParameters_size - 1) * (timeFitParameters[timeFitParameters_size - 1]);
0290 for (int k = timeFitParameters_size - 2; k >= 1; --k)
0291 du = du * ratio_value + k * timeFitParameters[k];
0292
0293 auto const error2 = ratio_error * ratio_error * du * du;
0294 auto const time_max = error2 > 0 ? (time_max_i - u) / error2 : static_cast<ScalarType>(0);
0295 auto const time_wgt = error2 > 0 ? 1. / error2 : static_cast<ScalarType>(0);
0296
0297
0298
0299
0300 shrTimeMax[idx.local] = error2 > 0 ? time_max : 0;
0301 shrTimeWgt[idx.local] = error2 > 0 ? time_wgt : 0;
0302 } else {
0303 shrTimeMax[idx.local] = 0;
0304 shrTimeWgt[idx.local] = 0;
0305 }
0306
0307
0308 auto const stepOverBeta = static_cast<ScalarType>(ratio_step) / beta;
0309 auto const offset = static_cast<ScalarType>(ratio_index) + alphabeta;
0310 auto const rmin = std::max(ratio_value - ratio_error, 0.001);
0311 auto const rmax =
0312 std::min(ratio_value + ratio_error, fast_expf(static_cast<ScalarType>(ratio_step) / beta) - 0.001);
0313 auto const time1 = offset - ratio_step / (fast_expf((stepOverBeta - fast_logf(rmin)) / alpha) - 1.);
0314 auto const time2 = offset - ratio_step / (fast_expf((stepOverBeta - fast_logf(rmax)) / alpha) - 1.);
0315
0316
0317 tmax = 0.5 * (time1 + time2);
0318 tmaxerr = 0.5 * std::sqrt((time1 - time2) * (time1 - time2));
0319 #ifdef DEBUG_TC_MAKERATIO
0320 if (ch == 1 || ch == 0)
0321 printf(
0322 "ch = %d ltx = %d tmax = %f tmaxerr = %f time1 = %f time2 = %f offset = %f rmin = %f rmax = "
0323 "%f\n",
0324 ch,
0325 ltx,
0326 tmax,
0327 tmaxerr,
0328 time1,
0329 time2,
0330 offset,
0331 rmin,
0332 rmax);
0333 #endif
0334
0335 ScalarType sumAf = 0;
0336 ScalarType sumff = 0;
0337 const int itmin = std::max(-1, static_cast<int>(std::floor(tmax - alphabeta)));
0338 auto loffset = (static_cast<ScalarType>(itmin) - tmax) * invalphabeta;
0339
0340 for (int it = itmin + 1; it < nsamples; ++it) {
0341 loffset += invalphabeta;
0342 if (useless_sample_values[ch_start + it])
0343 continue;
0344 auto const inverr2 = 1. / (sample_value_errors[ch_start + it] * sample_value_errors[ch_start + it]);
0345 auto const term1 = 1. + loffset;
0346 auto const f = (term1 > 1e-6) ? fast_expf(alpha * (fast_logf(term1) - loffset)) : 0;
0347 sumAf += sample_values[ch_start + it] * (f * inverr2);
0348 sumff += f * (f * inverr2);
0349 }
0350
0351 auto const sumAA = sumAAsNullHypot[ch];
0352 auto const sum0 = sum0sNullHypot[ch];
0353 chi2 = sumAA;
0354
0355 if (sumff > 0) {
0356 chi2 = sumAA - sumAf * (sumAf / sumff);
0357 }
0358 chi2 /= sum0;
0359
0360 #ifdef DEBUG_TC_MAKERATIO
0361 if (ch == 1 || ch == 0)
0362 printf(
0363 "ch = %d ltx = %d sumAf = %f sumff = %f sumAA = %f sum0 = %d tmax = %f tmaxerr = %f chi2 = "
0364 "%f\n",
0365 ch,
0366 ltx,
0367 sumAf,
0368 sumff,
0369 sumAA,
0370 static_cast<int>(sum0),
0371 tmax,
0372 tmaxerr,
0373 chi2);
0374 #endif
0375
0376 if (chi2 > 0 && tmax > 0 && tmaxerr > 0)
0377 internalCondForSkipping2 = false;
0378 else
0379 chi2 = std::numeric_limits<ScalarType>::max();
0380 }
0381 }
0382
0383
0384 shr_chi2s[idx.local] = chi2;
0385 shr_chi2[idx.local] = chi2;
0386 shr_tmax[idx.local] = tmax;
0387 shr_tmaxerr[idx.local] = tmaxerr;
0388 shr_condForUselessSamples[idx.local] = condForUselessSamples;
0389 shr_internalCondForSkipping1[idx.local] = internalCondForSkipping1;
0390 shr_internalCondForSkipping2[idx.local] = internalCondForSkipping2;
0391 }
0392
0393 alpaka::syncBlockThreads(acc);
0394
0395
0396
0397 auto iter = nthreads_per_channel / 2 + nthreads_per_channel % 2;
0398 bool oddElements = nthreads_per_channel % 2;
0399 CMS_UNROLL_LOOP
0400 while (iter >= 1) {
0401 for (auto idx : cms::alpakatools::uniform_group_elements(acc, block, totalElements)) {
0402 auto const ltx = idx.global % nthreads_per_channel;
0403
0404 if (ltx < iter && !(oddElements && (ltx == iter - 1 && ltx > 0))) {
0405
0406
0407 shr_chi2s[idx.local] = std::min(shr_chi2s[idx.local], shr_chi2s[idx.local + iter]);
0408 }
0409 }
0410 alpaka::syncBlockThreads(acc);
0411
0412 oddElements = iter % 2;
0413 iter = iter == 1 ? iter / 2 : iter / 2 + iter % 2;
0414 }
0415
0416 for (auto idx : cms::alpakatools::uniform_group_elements(acc, block, totalElements)) {
0417 auto const ltx = idx.global % nthreads_per_channel;
0418
0419
0420 auto const condForUselessSamples = shr_condForUselessSamples[idx.local];
0421 auto const internalCondForSkipping1 = shr_internalCondForSkipping1[idx.local];
0422 auto const internalCondForSkipping2 = shr_internalCondForSkipping2[idx.local];
0423
0424 if (!condForUselessSamples && !internalCondForSkipping1 && !internalCondForSkipping2) {
0425
0426
0427 auto const chi2 = shr_chi2[idx.local];
0428 auto const chi2min = shr_chi2s[idx.local - ltx];
0429 auto const chi2Limit = chi2min + 1.;
0430 auto const tmaxerr = shr_tmaxerr[idx.local];
0431 auto const inverseSigmaSquared = chi2 < chi2Limit ? 1. / (tmaxerr * tmaxerr) : 0.;
0432
0433 #ifdef DEBUG_TC_MAKERATIO
0434 if (ch == 1 || ch == 0) {
0435 auto const ch = idx.global / nthreads_per_channel;
0436 printf("ch = %d ltx = %d chi2min = %f chi2Limit = %f inverseSigmaSquared = %f\n",
0437 ch,
0438 ltx,
0439 chi2min,
0440 chi2Limit,
0441 inverseSigmaSquared);
0442 }
0443 #endif
0444
0445
0446
0447
0448 auto const tmax = shr_tmax[idx.local];
0449 shr_time_wgt[idx.local] = inverseSigmaSquared;
0450 shr_time_max[idx.local] = tmax * inverseSigmaSquared;
0451 } else {
0452 shr_time_wgt[idx.local] = 0;
0453 shr_time_max[idx.local] = 0;
0454 }
0455 }
0456
0457 alpaka::syncBlockThreads(acc);
0458
0459
0460 iter = nthreads_per_channel / 2 + nthreads_per_channel % 2;
0461 oddElements = nthreads_per_channel % 2;
0462 CMS_UNROLL_LOOP
0463 while (iter >= 1) {
0464 for (auto idx : cms::alpakatools::uniform_group_elements(acc, block, totalElements)) {
0465 auto const ltx = idx.global % nthreads_per_channel;
0466
0467 if (ltx < iter && !(oddElements && (ltx == iter - 1 && ltx > 0))) {
0468 shr_time_wgt[idx.local] += shr_time_wgt[idx.local + iter];
0469 shr_time_max[idx.local] += shr_time_max[idx.local + iter];
0470 shrTimeMax[idx.local] += shrTimeMax[idx.local + iter];
0471 shrTimeWgt[idx.local] += shrTimeWgt[idx.local + iter];
0472 }
0473 }
0474
0475 alpaka::syncBlockThreads(acc);
0476 oddElements = iter % 2;
0477 iter = iter == 1 ? iter / 2 : iter / 2 + iter % 2;
0478 }
0479
0480 for (auto idx : cms::alpakatools::uniform_group_elements(acc, block, totalElements)) {
0481 auto const ltx = idx.global % nthreads_per_channel;
0482
0483
0484
0485
0486 if (ltx == 0) {
0487 auto const ch = idx.global / nthreads_per_channel;
0488 auto const tmp_time_max = shr_time_max[idx.local];
0489 auto const tmp_time_wgt = shr_time_wgt[idx.local];
0490
0491
0492 if (tmp_time_wgt == 0 && tmp_time_max == 0) {
0493 g_state[ch] = TimeComputationState::Finished;
0494 continue;
0495 }
0496
0497
0498 auto const tMaxAlphaBeta = tmp_time_max / tmp_time_wgt;
0499 auto const tMaxErrorAlphaBeta = 1. / std::sqrt(tmp_time_wgt);
0500
0501 tMaxAlphaBetas[ch] = tMaxAlphaBeta;
0502 tMaxErrorAlphaBetas[ch] = tMaxErrorAlphaBeta;
0503 g_accTimeMax[ch] = shrTimeMax[idx.local];
0504 g_accTimeWgt[ch] = shrTimeWgt[idx.local];
0505 g_state[ch] = TimeComputationState::NotFinished;
0506
0507 #ifdef DEBUG_TC_MAKERATIO
0508 printf("ch = %d time_max = %f time_wgt = %f\n", ch, tmp_time_max, tmp_time_wgt);
0509 printf("ch = %d tMaxAlphaBeta = %f tMaxErrorAlphaBeta = %f timeMax = %f timeWgt = %f\n",
0510 ch,
0511 tMaxAlphaBeta,
0512 tMaxErrorAlphaBeta,
0513 shrTimeMax[idx.local],
0514 shrTimeWgt[idx.local]);
0515 #endif
0516 }
0517 }
0518 }
0519 }
0520 };
0521
0522 class Kernel_time_compute_findamplchi2_and_finish {
0523 using ScalarType = ::ecal::multifit::SampleVector::Scalar;
0524
0525 public:
0526 template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0527 ALPAKA_FN_ACC void operator()(TAcc const& acc,
0528 EcalDigiDeviceCollection::ConstView digisDevEB,
0529 EcalDigiDeviceCollection::ConstView digisDevEE,
0530 ScalarType* const sample_values,
0531 ScalarType* const sample_value_errors,
0532 bool* const useless_samples,
0533 ScalarType* const g_tMaxAlphaBeta,
0534 ScalarType* const g_tMaxErrorAlphaBeta,
0535 ScalarType* const g_accTimeMax,
0536 ScalarType* const g_accTimeWgt,
0537 ScalarType* const sumAAsNullHypot,
0538 ScalarType* const sum0sNullHypot,
0539 ScalarType* const chi2sNullHypot,
0540 TimeComputationState* g_state,
0541 ScalarType* g_ampMaxAlphaBeta,
0542 ScalarType* g_ampMaxError,
0543 ScalarType* g_timeMax,
0544 ScalarType* g_timeError,
0545 EcalMultifitParametersDevice::ConstView paramsDev) const {
0546
0547
0548
0549
0550
0551
0552 constexpr auto nsamples = EcalDataFrame::MAXSAMPLES;
0553 auto const nchannels = digisDevEB.size() + digisDevEE.size();
0554 auto const offsetForInputs = digisDevEB.size();
0555
0556 auto const elemsPerBlock = alpaka::getWorkDiv<alpaka::Block, alpaka::Elems>(acc)[0u];
0557
0558
0559
0560
0561 auto* shr_sumAf = alpaka::getDynSharedMem<ScalarType>(acc);
0562 auto* shr_sumff = shr_sumAf + elemsPerBlock;
0563
0564 for (auto gtxforward : cms::alpakatools::uniform_elements(acc, nchannels * nsamples)) {
0565
0566 auto gtx = nchannels * nsamples - 1 - gtxforward;
0567 auto const ch = gtx / nsamples;
0568 auto const elemIdx = gtx % elemsPerBlock;
0569 auto const sample = elemIdx % nsamples;
0570
0571 auto const* dids = ch >= offsetForInputs ? digisDevEE.id() : digisDevEB.id();
0572 auto const inputCh = ch >= offsetForInputs ? ch - offsetForInputs : ch;
0573
0574 auto state = g_state[ch];
0575 auto const did = DetId{dids[inputCh]};
0576 auto* const amplitudeFitParameters = did.subdetId() == EcalBarrel ? paramsDev.amplitudeFitParamsEB().data()
0577 : paramsDev.amplitudeFitParamsEE().data();
0578
0579
0580
0581 if (state == TimeComputationState::NotFinished) {
0582 auto const alpha = amplitudeFitParameters[0];
0583 auto const beta = amplitudeFitParameters[1];
0584 auto const alphabeta = alpha * beta;
0585 auto const invalphabeta = 1. / alphabeta;
0586 auto const tMaxAlphaBeta = g_tMaxAlphaBeta[ch];
0587 auto const sample_value = sample_values[gtx];
0588 auto const sample_value_error = sample_value_errors[gtx];
0589 auto const inverr2 =
0590 useless_samples[gtx] ? static_cast<ScalarType>(0) : 1. / (sample_value_error * sample_value_error);
0591 auto const offset = (static_cast<ScalarType>(sample) - tMaxAlphaBeta) * invalphabeta;
0592 auto const term1 = 1. + offset;
0593 auto const f = term1 > 1e-6 ? fast_expf(alpha * (fast_logf(term1) - offset)) : static_cast<ScalarType>(0.);
0594 auto const sumAf = sample_value * (f * inverr2);
0595 auto const sumff = f * (f * inverr2);
0596
0597
0598 shr_sumAf[elemIdx] = sumAf;
0599 shr_sumff[elemIdx] = sumff;
0600 } else {
0601 shr_sumAf[elemIdx] = 0;
0602 shr_sumff[elemIdx] = 0;
0603 }
0604
0605 alpaka::syncBlockThreads(acc);
0606
0607
0608
0609 if (sample < 5) {
0610 shr_sumAf[elemIdx] += shr_sumAf[elemIdx + 5];
0611 shr_sumff[elemIdx] += shr_sumff[elemIdx + 5];
0612 }
0613
0614 alpaka::syncBlockThreads(acc);
0615
0616 if (sample < 2) {
0617
0618 shr_sumAf[elemIdx] += shr_sumAf[elemIdx + 2] + shr_sumAf[elemIdx + 3];
0619 shr_sumff[elemIdx] += shr_sumff[elemIdx + 2] + shr_sumff[elemIdx + 3];
0620 }
0621
0622 alpaka::syncBlockThreads(acc);
0623
0624 if (sample == 0) {
0625
0626
0627 if (state == TimeComputationState::Finished) {
0628 g_timeMax[ch] = 5;
0629 g_timeError[ch] = -999;
0630 continue;
0631 }
0632
0633
0634 auto const sumff = shr_sumff[elemIdx] + shr_sumff[elemIdx + 1] - shr_sumff[elemIdx + 3];
0635 auto const sumAf = shr_sumAf[elemIdx] + shr_sumAf[elemIdx + 1] - shr_sumAf[elemIdx + 3];
0636
0637 auto const ampMaxAlphaBeta = sumff > 0 ? sumAf / sumff : 0;
0638 auto const sumAA = sumAAsNullHypot[ch];
0639 auto const sum0 = sum0sNullHypot[ch];
0640 auto const nullChi2 = chi2sNullHypot[ch];
0641 if (sumff > 0) {
0642 auto const chi2AlphaBeta = (sumAA - sumAf * sumAf / sumff) / sum0;
0643 if (chi2AlphaBeta > nullChi2) {
0644
0645 state = TimeComputationState::Finished;
0646 #ifdef DEBUG_FINDAMPLCHI2_AND_FINISH
0647 printf("ch = %d chi2AlphaBeta = %f nullChi2 = %f sumAA = %f sumAf = %f sumff = %f sum0 = %f\n",
0648 ch,
0649 chi2AlphaBeta,
0650 nullChi2,
0651 sumAA,
0652 sumAf,
0653 sumff,
0654 sum0);
0655 #endif
0656 }
0657
0658
0659 g_ampMaxAlphaBeta[ch] = ampMaxAlphaBeta;
0660 } else {
0661 #ifdef DEBUG_FINDAMPLCHI2_AND_FINISH
0662 printf("ch = %d sum0 = %f sumAA = %f sumff = %f sumAf = %f\n", ch, sum0, sumAA, sumff, sumAf);
0663 #endif
0664 state = TimeComputationState::Finished;
0665 }
0666
0667
0668 g_state[ch] = state;
0669 if (state == TimeComputationState::Finished) {
0670
0671 g_timeMax[ch] = 5;
0672 g_timeError[ch] = -999;
0673 #ifdef DEBUG_FINDAMPLCHI2_AND_FINISH
0674 printf("ch = %d finished state\n", ch);
0675 #endif
0676 continue;
0677 }
0678
0679 auto const ampMaxError = g_ampMaxError[ch];
0680 auto const test_ratio = ampMaxAlphaBeta / ampMaxError;
0681 auto const accTimeMax = g_accTimeMax[ch];
0682 auto const accTimeWgt = g_accTimeWgt[ch];
0683 auto const tMaxAlphaBeta = g_tMaxAlphaBeta[ch];
0684 auto const tMaxErrorAlphaBeta = g_tMaxErrorAlphaBeta[ch];
0685
0686
0687 if (test_ratio > 5. && accTimeWgt > 0) {
0688 auto const tMaxRatio = accTimeWgt > 0 ? accTimeMax / accTimeWgt : static_cast<ScalarType>(0);
0689 auto const tMaxErrorRatio = accTimeWgt > 0 ? 1. / std::sqrt(accTimeWgt) : static_cast<ScalarType>(0);
0690
0691 if (test_ratio > 10.) {
0692 g_timeMax[ch] = tMaxRatio;
0693 g_timeError[ch] = tMaxErrorRatio;
0694
0695 #ifdef DEBUG_FINDAMPLCHI2_AND_FINISH
0696 printf("ch = %d tMaxRatio = %f tMaxErrorRatio = %f\n", ch, tMaxRatio, tMaxErrorRatio);
0697 #endif
0698 } else {
0699 auto const timeMax = (tMaxAlphaBeta * (10. - ampMaxAlphaBeta / ampMaxError) +
0700 tMaxRatio * (ampMaxAlphaBeta / ampMaxError - 5.)) /
0701 5.;
0702 auto const timeError = (tMaxErrorAlphaBeta * (10. - ampMaxAlphaBeta / ampMaxError) +
0703 tMaxErrorRatio * (ampMaxAlphaBeta / ampMaxError - 5.)) /
0704 5.;
0705 state = TimeComputationState::Finished;
0706 g_state[ch] = state;
0707 g_timeMax[ch] = timeMax;
0708 g_timeError[ch] = timeError;
0709
0710 #ifdef DEBUG_FINDAMPLCHI2_AND_FINISH
0711 printf("ch = %d timeMax = %f timeError = %f\n", ch, timeMax, timeError);
0712 #endif
0713 }
0714 } else {
0715 state = TimeComputationState::Finished;
0716 g_state[ch] = state;
0717 g_timeMax[ch] = tMaxAlphaBeta;
0718 g_timeError[ch] = tMaxErrorAlphaBeta;
0719
0720 #ifdef DEBUG_FINDAMPLCHI2_AND_FINISH
0721 printf("ch = %d tMaxAlphaBeta = %f tMaxErrorAlphaBeta = %f\n", ch, tMaxAlphaBeta, tMaxErrorAlphaBeta);
0722 #endif
0723 }
0724 }
0725 }
0726 }
0727 };
0728
0729 class Kernel_time_compute_fixMGPAslew {
0730 using ScalarType = ::ecal::multifit::SampleVector::Scalar;
0731
0732 public:
0733 template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0734 ALPAKA_FN_ACC void operator()(TAcc const& acc,
0735 EcalDigiDeviceCollection::ConstView digisDevEB,
0736 EcalDigiDeviceCollection::ConstView digisDevEE,
0737 EcalMultifitConditionsDevice::ConstView conditionsDev,
0738 ScalarType* sample_values,
0739 ScalarType* sample_value_errors,
0740 bool* useless_sample_values) const {
0741
0742 constexpr auto nsamples = EcalDataFrame::MAXSAMPLES;
0743
0744 auto const nchannelsEB = digisDevEB.size();
0745 auto const offsetForInputs = nchannelsEB;
0746
0747 auto const elemsPerBlock = alpaka::getWorkDiv<alpaka::Block, alpaka::Elems>(acc)[0u];
0748
0749 for (auto gtx : cms::alpakatools::uniform_elements(acc, nchannelsEB * nsamples)) {
0750 auto const elemIdx = gtx % elemsPerBlock;
0751 auto const sample = elemIdx % nsamples;
0752 auto const ch = gtx / nsamples;
0753
0754
0755 if (sample == 0)
0756 continue;
0757
0758 if (!use_sample(conditionsDev.sampleMask_EB(), sample))
0759 continue;
0760
0761 int const inputGtx = ch >= offsetForInputs ? gtx - offsetForInputs * nsamples : gtx;
0762 auto const* digis = ch >= offsetForInputs ? digisDevEE.data()->data() : digisDevEB.data()->data();
0763
0764 auto const gainIdPrev = ecalMGPA::gainId(digis[inputGtx - 1]);
0765 auto const gainIdNext = ecalMGPA::gainId(digis[inputGtx]);
0766 if (gainIdPrev >= 1 && gainIdPrev <= 3 && gainIdNext >= 1 && gainIdNext <= 3 && gainIdPrev < gainIdNext) {
0767 sample_values[gtx - 1] = 0;
0768 sample_value_errors[gtx - 1] = 1e+9;
0769 useless_sample_values[gtx - 1] = true;
0770 }
0771 }
0772 }
0773 };
0774
0775
0776 class Kernel_time_computation_init {
0777 using ScalarType = ::ecal::multifit::SampleVector::Scalar;
0778
0779 public:
0780 template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0781 ALPAKA_FN_ACC void operator()(TAcc const& acc,
0782 EcalDigiDeviceCollection::ConstView digisDevEB,
0783 EcalDigiDeviceCollection::ConstView digisDevEE,
0784 EcalMultifitConditionsDevice::ConstView conditionsDev,
0785 ScalarType* sample_values,
0786 ScalarType* sample_value_errors,
0787 ScalarType* ampMaxError,
0788 bool* useless_sample_values,
0789 char* pedestal_nums) const {
0790
0791 constexpr auto nsamples = EcalDataFrame::MAXSAMPLES;
0792
0793
0794 auto const nchannelsEB = digisDevEB.size();
0795 auto const nchannels = nchannelsEB + digisDevEE.size();
0796 auto const offsetForInputs = nchannelsEB;
0797 auto const offsetForHashes = conditionsDev.offsetEE();
0798
0799 auto const elemsPerBlock = alpaka::getWorkDiv<alpaka::Block, alpaka::Elems>(acc)[0u];
0800
0801
0802 auto* shrSampleValues = alpaka::getDynSharedMem<ScalarType>(acc);
0803 auto* shrSampleValueErrors = shrSampleValues + elemsPerBlock;
0804
0805 for (auto txforward : cms::alpakatools::uniform_elements(acc, nchannels * nsamples)) {
0806
0807 auto tx = nchannels * nsamples - 1 - txforward;
0808 auto const ch = tx / nsamples;
0809 auto const elemIdx = tx % elemsPerBlock;
0810
0811 int const inputTx = ch >= offsetForInputs ? tx - offsetForInputs * nsamples : tx;
0812 int const inputCh = ch >= offsetForInputs ? ch - offsetForInputs : ch;
0813 auto const* digis = ch >= offsetForInputs ? digisDevEE.data()->data() : digisDevEB.data()->data();
0814 auto const* dids = ch >= offsetForInputs ? digisDevEE.id() : digisDevEB.id();
0815
0816
0817 auto const sample = tx % nsamples;
0818 auto const input_ch_start = inputCh * nsamples;
0819 ScalarType pedestal = 0.;
0820 int num = 0;
0821
0822
0823 auto const adc0 = ecalMGPA::adc(digis[input_ch_start]);
0824 auto const gainId0 = ecalMGPA::gainId(digis[input_ch_start]);
0825 auto const adc1 = ecalMGPA::adc(digis[input_ch_start + 1]);
0826 auto const gainId1 = ecalMGPA::gainId(digis[input_ch_start + 1]);
0827 auto const did = DetId{dids[inputCh]};
0828 auto const isBarrel = did.subdetId() == EcalBarrel;
0829 auto const sample_mask = isBarrel ? conditionsDev.sampleMask_EB() : conditionsDev.sampleMask_EE();
0830 auto const hashedId = isBarrel ? ecal::reconstruction::hashedIndexEB(did.rawId())
0831 : offsetForHashes + ecal::reconstruction::hashedIndexEE(did.rawId());
0832
0833
0834
0835 if (gainId0 == 1 && use_sample(sample_mask, 0)) {
0836 pedestal = static_cast<ScalarType>(adc0);
0837 num = 1;
0838
0839 auto const diff = adc1 - adc0;
0840 if (gainId1 == 1 && use_sample(sample_mask, 1) &&
0841 std::abs(diff) < 3 * conditionsDev.pedestals_rms_x12()[hashedId]) {
0842 pedestal = (pedestal + static_cast<ScalarType>(adc1)) / 2.;
0843 num = 2;
0844 }
0845 } else {
0846 pedestal = conditionsDev.pedestals_mean_x12()[ch];
0847 }
0848
0849
0850 auto const gainId = ecalMGPA::gainId(digis[inputTx]);
0851 auto const adc = ecalMGPA::adc(digis[inputTx]);
0852
0853 bool bad = false;
0854 ScalarType sample_value, sample_value_error;
0855
0856
0857
0858 if (!use_sample(sample_mask, sample)) {
0859 bad = true;
0860 sample_value = 0;
0861 sample_value_error = 0;
0862 } else if (gainId == 1) {
0863 sample_value = static_cast<ScalarType>(adc) - pedestal;
0864 sample_value_error = conditionsDev.pedestals_rms_x12()[hashedId];
0865 } else if (gainId == 2) {
0866 auto const mean_x6 = conditionsDev.pedestals_mean_x6()[hashedId];
0867 auto const rms_x6 = conditionsDev.pedestals_rms_x6()[hashedId];
0868 auto const gain12Over6 = conditionsDev.gain12Over6()[hashedId];
0869 sample_value = (static_cast<ScalarType>(adc) - mean_x6) * gain12Over6;
0870 sample_value_error = rms_x6 * gain12Over6;
0871 } else if (gainId == 3) {
0872 auto const mean_x1 = conditionsDev.pedestals_mean_x1()[hashedId];
0873 auto const rms_x1 = conditionsDev.pedestals_rms_x1()[hashedId];
0874 auto const gain12Over6 = conditionsDev.gain12Over6()[hashedId];
0875 auto const gain6Over1 = conditionsDev.gain6Over1()[hashedId];
0876 sample_value = (static_cast<ScalarType>(adc) - mean_x1) * gain6Over1 * gain12Over6;
0877 sample_value_error = rms_x1 * gain6Over1 * gain12Over6;
0878 } else {
0879 sample_value = 0;
0880 sample_value_error = 0;
0881 bad = true;
0882 }
0883
0884
0885 auto const useless_sample = (sample_value_error <= 0) | bad;
0886 useless_sample_values[tx] = useless_sample;
0887 sample_values[tx] = sample_value;
0888 sample_value_errors[tx] = useless_sample ? 1e+9 : sample_value_error;
0889
0890
0891 #ifdef ECAL_RECO_ALPAKA_TC_INIT_DEBUG
0892 if (ch == 0) {
0893 printf("sample = %d sample_value = %f sample_value_error = %f useless = %c\n",
0894 sample,
0895 sample_value,
0896 sample_value_error,
0897 useless_sample ? '1' : '0');
0898 }
0899 #endif
0900
0901
0902 shrSampleValues[elemIdx] = sample_value_error > 0 ? sample_value : std::numeric_limits<ScalarType>::min();
0903 shrSampleValueErrors[elemIdx] = sample_value_error;
0904 alpaka::syncBlockThreads(acc);
0905
0906
0907 if (sample < 5) {
0908
0909 shrSampleValueErrors[elemIdx] = shrSampleValues[elemIdx] < shrSampleValues[elemIdx + 5]
0910 ? shrSampleValueErrors[elemIdx + 5]
0911 : shrSampleValueErrors[elemIdx];
0912 shrSampleValues[elemIdx] = std::max(shrSampleValues[elemIdx], shrSampleValues[elemIdx + 5]);
0913 }
0914 alpaka::syncBlockThreads(acc);
0915
0916
0917 if (sample < 3) {
0918 shrSampleValueErrors[elemIdx] = shrSampleValues[elemIdx] < shrSampleValues[elemIdx + 3]
0919 ? shrSampleValueErrors[elemIdx + 3]
0920 : shrSampleValueErrors[elemIdx];
0921 shrSampleValues[elemIdx] = std::max(shrSampleValues[elemIdx], shrSampleValues[elemIdx + 3]);
0922 }
0923 alpaka::syncBlockThreads(acc);
0924
0925 if (sample < 2) {
0926 shrSampleValueErrors[elemIdx] = shrSampleValues[elemIdx] < shrSampleValues[elemIdx + 2]
0927 ? shrSampleValueErrors[elemIdx + 2]
0928 : shrSampleValueErrors[elemIdx];
0929 shrSampleValues[elemIdx] = std::max(shrSampleValues[elemIdx], shrSampleValues[elemIdx + 2]);
0930 }
0931 alpaka::syncBlockThreads(acc);
0932
0933 if (sample == 0) {
0934
0935 auto const maxSampleValueError = shrSampleValues[elemIdx] < shrSampleValues[elemIdx + 1]
0936 ? shrSampleValueErrors[elemIdx + 1]
0937 : shrSampleValueErrors[elemIdx];
0938
0939
0940 pedestal_nums[ch] = num;
0941
0942 ampMaxError[ch] = maxSampleValueError;
0943
0944
0945 #ifdef ECAL_RECO_ALPAKA_TC_INIT_DEBUG
0946 if (ch == 0) {
0947 printf("pedestal_nums = %d ampMaxError = %f\n", num, maxSampleValueError);
0948 }
0949 #endif
0950 }
0951 }
0952 }
0953 };
0954
0955
0956
0957
0958
0959 class Kernel_time_correction_and_finalize {
0960 using ScalarType = ::ecal::multifit::SampleVector::Scalar;
0961
0962 public:
0963 template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0964 ALPAKA_FN_ACC void operator()(TAcc const& acc,
0965 EcalDigiDeviceCollection::ConstView digisDevEB,
0966 EcalDigiDeviceCollection::ConstView digisDevEE,
0967 EcalUncalibratedRecHitDeviceCollection::View uncalibRecHitsEB,
0968 EcalUncalibratedRecHitDeviceCollection::View uncalibRecHitsEE,
0969 EcalMultifitConditionsDevice::ConstView conditionsDev,
0970 ScalarType* const g_timeMax,
0971 ScalarType* const g_timeError,
0972 ConfigurationParameters::type const timeConstantTermEB,
0973 ConfigurationParameters::type const timeConstantTermEE,
0974 ConfigurationParameters::type const timeNconstEB,
0975 ConfigurationParameters::type const timeNconstEE,
0976 ConfigurationParameters::type const amplitudeThresholdEB,
0977 ConfigurationParameters::type const amplitudeThresholdEE,
0978 ConfigurationParameters::type const outOfTimeThreshG12pEB,
0979 ConfigurationParameters::type const outOfTimeThreshG12pEE,
0980 ConfigurationParameters::type const outOfTimeThreshG12mEB,
0981 ConfigurationParameters::type const outOfTimeThreshG12mEE,
0982 ConfigurationParameters::type const outOfTimeThreshG61pEB,
0983 ConfigurationParameters::type const outOfTimeThreshG61pEE,
0984 ConfigurationParameters::type const outOfTimeThreshG61mEB,
0985 ConfigurationParameters::type const outOfTimeThreshG61mEE) const {
0986
0987 constexpr auto nsamples = EcalDataFrame::MAXSAMPLES;
0988 auto const nchannelsEB = digisDevEB.size();
0989 auto const nchannels = nchannelsEB + digisDevEE.size();
0990 auto const offsetForInputs = nchannelsEB;
0991 auto const offsetForHashes = conditionsDev.offsetEE();
0992
0993 for (auto gtx : cms::alpakatools::uniform_elements(acc, nchannels)) {
0994 const int inputGtx = gtx >= offsetForInputs ? gtx - offsetForInputs : gtx;
0995 auto const* dids = gtx >= offsetForInputs ? digisDevEE.id() : digisDevEB.id();
0996 auto const* digis = gtx >= offsetForInputs ? digisDevEE.data()->data() : digisDevEB.data()->data();
0997
0998 auto* g_amplitude = gtx >= nchannelsEB ? uncalibRecHitsEE.amplitude() : uncalibRecHitsEB.amplitude();
0999 auto* g_jitter = gtx >= nchannelsEB ? uncalibRecHitsEE.jitter() : uncalibRecHitsEB.jitter();
1000 auto* g_jitterError = gtx >= nchannelsEB ? uncalibRecHitsEE.jitterError() : uncalibRecHitsEB.jitterError();
1001 auto* flags = gtx >= nchannelsEB ? uncalibRecHitsEE.flags() : uncalibRecHitsEB.flags();
1002
1003 auto const did = DetId{dids[inputGtx]};
1004 auto const isBarrel = did.subdetId() == EcalBarrel;
1005 auto const hashedId = isBarrel ? ecal::reconstruction::hashedIndexEB(did.rawId())
1006 : offsetForHashes + ecal::reconstruction::hashedIndexEE(did.rawId());
1007
1008 auto* const amplitudeBins = isBarrel ? conditionsDev.timeBiasCorrections_amplitude_EB().data()
1009 : conditionsDev.timeBiasCorrections_amplitude_EE().data();
1010 auto* const shiftBins = isBarrel ? conditionsDev.timeBiasCorrections_shift_EB().data()
1011 : conditionsDev.timeBiasCorrections_shift_EE().data();
1012 auto const amplitudeBinsSize =
1013 isBarrel ? conditionsDev.timeBiasCorrectionSizeEB() : conditionsDev.timeBiasCorrectionSizeEE();
1014 auto const timeConstantTerm = isBarrel ? timeConstantTermEB : timeConstantTermEE;
1015 auto const timeNconst = isBarrel ? timeNconstEB : timeNconstEE;
1016 auto const offsetTimeValue = isBarrel ? conditionsDev.timeOffset_EB() : conditionsDev.timeOffset_EE();
1017 auto const amplitudeThreshold = isBarrel ? amplitudeThresholdEB : amplitudeThresholdEE;
1018 auto const outOfTimeThreshG12p = isBarrel ? outOfTimeThreshG12pEB : outOfTimeThreshG12pEE;
1019 auto const outOfTimeThreshG12m = isBarrel ? outOfTimeThreshG12mEB : outOfTimeThreshG12mEE;
1020 auto const outOfTimeThreshG61p = isBarrel ? outOfTimeThreshG61pEB : outOfTimeThreshG61pEE;
1021 auto const outOfTimeThreshG61m = isBarrel ? outOfTimeThreshG61mEB : outOfTimeThreshG61mEE;
1022
1023
1024 auto const amplitude = g_amplitude[inputGtx];
1025 auto const rms_x12 = conditionsDev.pedestals_rms_x12()[hashedId];
1026 auto const timeCalibConst = conditionsDev.timeCalibConstants()[hashedId];
1027
1028 int myBin = -1;
1029 for (size_t bin = 0; bin < amplitudeBinsSize; ++bin) {
1030 if (amplitude > amplitudeBins[bin])
1031 myBin = bin;
1032 else
1033 break;
1034 }
1035
1036 ScalarType correction = 0;
1037 if (myBin == -1) {
1038 correction = shiftBins[0];
1039 } else if (myBin == static_cast<int>(amplitudeBinsSize) - 1) {
1040 correction = shiftBins[myBin];
1041 } else {
1042 correction = shiftBins[myBin + 1] - shiftBins[myBin];
1043 correction *= (amplitude - amplitudeBins[myBin]) / (amplitudeBins[myBin + 1] - amplitudeBins[myBin]);
1044 correction += shiftBins[myBin];
1045 }
1046
1047
1048 correction = correction * 0.04;
1049 auto const timeMax = g_timeMax[gtx];
1050 auto const timeError = g_timeError[gtx];
1051 auto const jitter = timeMax - 5 + correction;
1052 auto const jitterError =
1053 std::sqrt(timeError * timeError + timeConstantTerm * timeConstantTerm * 0.04 * 0.04);
1054
1055 #ifdef DEBUG_TIME_CORRECTION
1056 printf("ch = %d timeMax = %f timeError = %f jitter = %f correction = %f\n",
1057 gtx,
1058 timeMax,
1059 timeError,
1060 jitter,
1061 correction);
1062 #endif
1063
1064
1065 g_jitter[inputGtx] = jitter;
1066 g_jitterError[inputGtx] = jitterError;
1067
1068
1069
1070
1071 if (amplitude > amplitudeThreshold * rms_x12) {
1072 auto threshP = outOfTimeThreshG12p;
1073 auto threshM = outOfTimeThreshG12m;
1074 if (amplitude > 3000.) {
1075 for (int isample = 0; isample < nsamples; isample++) {
1076 auto const gainid = ecalMGPA::gainId(digis[nsamples * inputGtx + isample]);
1077 if (gainid != 1) {
1078 threshP = outOfTimeThreshG61p;
1079 threshM = outOfTimeThreshG61m;
1080 break;
1081 }
1082 }
1083 }
1084
1085 auto const correctedTime = (timeMax - 5) * 25 + timeCalibConst + offsetTimeValue;
1086 auto const nterm = timeNconst * rms_x12 / amplitude;
1087 auto const sigmat = std::sqrt(nterm * nterm + timeConstantTerm * timeConstantTerm);
1088 if (correctedTime > sigmat * threshP || correctedTime < -sigmat * threshM)
1089 flags[inputGtx] |= 0x1 << EcalUncalibratedRecHit::kOutOfTime;
1090 }
1091 }
1092 }
1093 };
1094
1095 }
1096
1097 namespace alpaka::trait {
1098 using namespace ALPAKA_ACCELERATOR_NAMESPACE::ecal::multifit;
1099
1100
1101 template <typename TAcc>
1102 struct BlockSharedMemDynSizeBytes<Kernel_time_compute_nullhypot, TAcc> {
1103
1104 template <typename TVec, typename... TArgs>
1105 ALPAKA_FN_HOST_ACC static auto getBlockSharedMemDynSizeBytes(Kernel_time_compute_nullhypot const&,
1106 TVec const& threadsPerBlock,
1107 TVec const& elemsPerThread,
1108 TArgs const&...) -> std::size_t {
1109 using ScalarType = ecal::multifit::SampleVector::Scalar;
1110
1111
1112 std::size_t bytes = threadsPerBlock[0u] * elemsPerThread[0u] * 4 * sizeof(ScalarType);
1113 return bytes;
1114 }
1115 };
1116
1117
1118 template <typename TAcc>
1119 struct BlockSharedMemDynSizeBytes<Kernel_time_compute_makeratio, TAcc> {
1120 template <typename TVec, typename... TArgs>
1121 ALPAKA_FN_HOST_ACC static auto getBlockSharedMemDynSizeBytes(Kernel_time_compute_makeratio const&,
1122 TVec const& threadsPerBlock,
1123 TVec const& elemsPerThread,
1124 TArgs const&...) -> std::size_t {
1125 using ScalarType = ecal::multifit::SampleVector::Scalar;
1126
1127 std::size_t bytes = (8 * sizeof(ScalarType) + 3 * sizeof(bool)) * threadsPerBlock[0u] * elemsPerThread[0u];
1128 return bytes;
1129 }
1130 };
1131
1132
1133 template <typename TAcc>
1134 struct BlockSharedMemDynSizeBytes<Kernel_time_compute_findamplchi2_and_finish, TAcc> {
1135 template <typename TVec, typename... TArgs>
1136 ALPAKA_FN_HOST_ACC static auto getBlockSharedMemDynSizeBytes(Kernel_time_compute_findamplchi2_and_finish const&,
1137 TVec const& threadsPerBlock,
1138 TVec const& elemsPerThread,
1139 TArgs const&...) -> std::size_t {
1140 using ScalarType = ecal::multifit::SampleVector::Scalar;
1141
1142 std::size_t bytes = 2 * threadsPerBlock[0u] * elemsPerThread[0u] * sizeof(ScalarType);
1143 return bytes;
1144 }
1145 };
1146
1147
1148 template <typename TAcc>
1149 struct BlockSharedMemDynSizeBytes<Kernel_time_computation_init, TAcc> {
1150 template <typename TVec, typename... TArgs>
1151 ALPAKA_FN_HOST_ACC static auto getBlockSharedMemDynSizeBytes(Kernel_time_computation_init const&,
1152 TVec const& threadsPerBlock,
1153 TVec const& elemsPerThread,
1154 TArgs const&...) -> std::size_t {
1155 using ScalarType = ecal::multifit::SampleVector::Scalar;
1156
1157 std::size_t bytes = 2 * threadsPerBlock[0u] * elemsPerThread[0u] * sizeof(ScalarType);
1158 return bytes;
1159 }
1160 };
1161
1162 }
1163
1164 #endif