File indexing completed on 2024-06-14 02:53:21
0001 #include <alpaka/alpaka.hpp>
0002
0003 #include "DataFormats/DetId/interface/DetId.h"
0004 #include "DataFormats/EcalDigi/interface/EcalConstants.h"
0005 #include "EventFilter/EcalRawToDigi/interface/ElectronicsIdGPU.h"
0006 #include "EventFilter/EcalRawToDigi/interface/DCCRawDataDefinitions.h"
0007 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0008
0009 #include "UnpackPortable.h"
0010
0011 namespace ALPAKA_ACCELERATOR_NAMESPACE::ecal::raw {
0012
0013 using namespace ::ecal::raw;
0014 using namespace cms::alpakatools;
0015
0016 class Kernel_unpack {
0017 public:
0018 template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0019 ALPAKA_FN_ACC void operator()(TAcc const& acc,
0020 unsigned char const* __restrict__ data,
0021 uint32_t const* __restrict__ offsets,
0022 int const* __restrict__ feds,
0023 EcalDigiDeviceCollection::View digisDevEB,
0024 EcalDigiDeviceCollection::View digisDevEE,
0025 EcalElectronicsMappingDevice::ConstView eid2did,
0026 uint32_t const nbytesTotal) const {
0027 constexpr auto kSampleSize = ecalPh1::sampleSize;
0028
0029
0030 auto const ifed = alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[0u];
0031 auto const threadIdx = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u];
0032
0033
0034 auto const offset = offsets[ifed];
0035
0036 auto const fed = feds[ifed];
0037 auto const isBarrel = is_barrel(static_cast<uint8_t>(fed - 600));
0038
0039 auto const gridDim = alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[0u];
0040 auto const size = ifed == gridDim - 1 ? nbytesTotal - offset : offsets[ifed + 1] - offset;
0041 auto* samples = isBarrel ? digisDevEB.data()->data() : digisDevEE.data()->data();
0042 auto* ids = isBarrel ? digisDevEB.id() : digisDevEE.id();
0043 auto* pChannelsCounter = isBarrel ? &digisDevEB.size() : &digisDevEE.size();
0044
0045
0046 uint64_t const* buffer = reinterpret_cast<uint64_t const*>(data + offset);
0047
0048
0049
0050
0051
0052
0053
0054 auto const fed_header = buffer[0];
0055 uint32_t bx = (fed_header >> H_BX_B) & H_BX_MASK;
0056 uint32_t lv1 = (fed_header >> H_L1_B) & H_L1_MASK;
0057 uint32_t triggerType = (fed_header >> H_TTYPE_B) & H_TTYPE_MASK;
0058
0059
0060 uint32_t numbChannels(0);
0061 if (triggerType == PHYSICTRIGGER) {
0062 numbChannels = NUMB_FE;
0063 } else if (triggerType == CALIBRATIONTRIGGER) {
0064 numbChannels = NUMB_FE + 2;
0065 } else {
0066
0067 return;
0068 }
0069
0070
0071
0072
0073
0074
0075 auto const w2 = buffer[2];
0076 uint8_t const fov = (w2 >> H_FOV_B) & H_FOV_MASK;
0077
0078
0079
0080 uint8_t exp_ttids[NUMB_FE + 2];
0081 uint8_t ch = 1;
0082 uint8_t nCh = 0;
0083 for (uint8_t i = 4; i < 9; ++i) {
0084 for (uint8_t j = 0; j < 14; ++j, ++ch) {
0085 const uint8_t shift = j * 4;
0086 const int chStatus = (buffer[i] >> shift) & H_CHSTATUS_MASK;
0087 const bool regular = (chStatus == CH_DISABLED || chStatus == CH_SUPPRESS);
0088 const bool problematic =
0089 (chStatus == CH_TIMEOUT || chStatus == CH_HEADERERR || chStatus == CH_LINKERR ||
0090 chStatus == CH_LENGTHERR || chStatus == CH_IFIFOFULL || chStatus == CH_L1AIFIFOFULL);
0091 if (!(regular || problematic)) {
0092 exp_ttids[nCh] = ch;
0093 ++nCh;
0094 }
0095 }
0096 }
0097
0098
0099
0100
0101 uint8_t ntccblockwords = isBarrel ? 18 : 36;
0102 auto const* tower_blocks_start = buffer + 9 + ntccblockwords + 6;
0103 auto const* trailer = buffer + (size / 8 - 1);
0104 auto const* current_tower_block = tower_blocks_start;
0105 uint8_t iCh = 0;
0106 uint8_t next_tower_id = exp_ttids[iCh];
0107 while (current_tower_block < trailer && iCh < numbChannels) {
0108 auto const w = *current_tower_block;
0109 uint8_t ttid = w & TOWER_ID_MASK;
0110 uint16_t bxlocal = (w >> TOWER_BX_B) & TOWER_BX_MASK;
0111 uint16_t lv1local = (w >> TOWER_L1_B) & TOWER_L1_MASK;
0112 uint16_t block_length = (w >> TOWER_LENGTH_B) & TOWER_LENGTH_MASK;
0113
0114
0115 while (exp_ttids[iCh] < next_tower_id) {
0116 ++iCh;
0117 }
0118 ++iCh;
0119
0120
0121
0122
0123 if (ttid != next_tower_id) {
0124 next_tower_id = find_next_tower_block(current_tower_block, trailer, bx, lv1);
0125 if (next_tower_id < TOWER_ID_MASK) {
0126 continue;
0127 } else {
0128 break;
0129 }
0130 }
0131
0132
0133 next_tower_id = exp_ttids[iCh];
0134
0135 uint16_t const dccbx = bx & 0xfff;
0136 uint16_t const dccl1 = lv1 & 0xfff;
0137
0138 if (fov >= 1 && !is_synced_towerblock(dccbx, bxlocal, dccl1, lv1local)) {
0139 current_tower_block += block_length;
0140 continue;
0141 }
0142
0143
0144
0145 uint32_t const nchannels = (block_length - 1) / 3;
0146
0147 bool bad_block = false;
0148 auto& ch_with_bad_block = alpaka::declareSharedVar<uint32_t, __COUNTER__>(acc);
0149 if (once_per_block(acc)) {
0150 ch_with_bad_block = std::numeric_limits<uint32_t>::max();
0151 }
0152
0153 alpaka::syncBlockThreads(acc);
0154
0155 auto const threadsPerBlock = alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc)[0u];
0156
0157
0158
0159 for (uint32_t i = 0; i < nchannels; i += threadsPerBlock) {
0160 auto const channel = i + threadIdx;
0161
0162 uint64_t wdata;
0163 uint8_t stripid;
0164 uint8_t xtalid;
0165
0166
0167 if (channel < nchannels && channel < ch_with_bad_block) {
0168
0169 wdata = current_tower_block[1 + channel * 3];
0170 stripid = wdata & 0x7;
0171 xtalid = (wdata >> 4) & 0x7;
0172
0173
0174 if (stripid < ElectronicsIdGPU::MIN_STRIPID || stripid > ElectronicsIdGPU::MAX_STRIPID ||
0175 xtalid < ElectronicsIdGPU::MIN_XTALID || xtalid > ElectronicsIdGPU::MAX_XTALID) {
0176 bad_block = true;
0177 }
0178 if (channel > 0) {
0179
0180 auto const prev_channel = channel - 1;
0181 auto const prevwdata = current_tower_block[1 + prev_channel * 3];
0182 uint8_t const laststripid = prevwdata & 0x7;
0183 uint8_t const lastxtalid = (prevwdata >> 4) & 0x7;
0184 if ((stripid == laststripid && xtalid <= lastxtalid) || (stripid < laststripid)) {
0185 bad_block = true;
0186 }
0187 }
0188 }
0189
0190
0191 if (bad_block && channel < ch_with_bad_block) {
0192 alpaka::atomicMin(acc, &ch_with_bad_block, channel, alpaka::hierarchy::Threads{});
0193 }
0194
0195
0196 alpaka::syncBlockThreads(acc);
0197
0198
0199 if (channel >= nchannels || channel >= ch_with_bad_block) {
0200 continue;
0201 }
0202
0203 ElectronicsIdGPU eid{fed2dcc(fed), ttid, stripid, xtalid};
0204 auto const didraw = isBarrel ? compute_ebdetid(eid) : eid2did[eid.linearIndex()].rawid();
0205
0206 if (didraw == 0)
0207 continue;
0208
0209
0210 uint16_t sampleValues[kSampleSize];
0211 sampleValues[0] = (wdata >> 16) & 0x3fff;
0212 sampleValues[1] = (wdata >> 32) & 0x3fff;
0213 sampleValues[2] = (wdata >> 48) & 0x3fff;
0214 auto const wdata1 = current_tower_block[2 + channel * 3];
0215 sampleValues[3] = wdata1 & 0x3fff;
0216 sampleValues[4] = (wdata1 >> 16) & 0x3fff;
0217 sampleValues[5] = (wdata1 >> 32) & 0x3fff;
0218 sampleValues[6] = (wdata1 >> 48) & 0x3fff;
0219 auto const wdata2 = current_tower_block[3 + channel * 3];
0220 sampleValues[7] = wdata2 & 0x3fff;
0221 sampleValues[8] = (wdata2 >> 16) & 0x3fff;
0222 sampleValues[9] = (wdata2 >> 32) & 0x3fff;
0223
0224
0225 bool isSaturation = true;
0226 short firstGainZeroSampID{-1}, firstGainZeroSampADC{-1};
0227 for (uint32_t si = 0; si < kSampleSize; ++si) {
0228 if (gainId(sampleValues[si]) == 0) {
0229 firstGainZeroSampID = si;
0230 firstGainZeroSampADC = adc(sampleValues[si]);
0231 break;
0232 }
0233 }
0234 if (firstGainZeroSampID != -1) {
0235 unsigned int plateauEnd = std::min(kSampleSize, (unsigned int)(firstGainZeroSampID + 5));
0236 for (unsigned int s = firstGainZeroSampID; s < plateauEnd; s++) {
0237 if (!(gainId(sampleValues[s]) == 0 && adc(sampleValues[s]) == firstGainZeroSampADC)) {
0238 isSaturation = false;
0239 break;
0240 }
0241 }
0242
0243 if (firstGainZeroSampID < 3) {
0244 isSaturation = false;
0245 }
0246 if (!isSaturation)
0247 continue;
0248 } else {
0249
0250 short numGain = 1;
0251 bool gainSwitchError = false;
0252 for (unsigned int si = 1; si < kSampleSize; ++si) {
0253 if ((gainId(sampleValues[si - 1]) > gainId(sampleValues[si])) && numGain < 5)
0254 gainSwitchError = true;
0255 if (gainId(sampleValues[si - 1]) == gainId(sampleValues[si]))
0256 numGain++;
0257 else
0258 numGain = 1;
0259 }
0260 if (gainSwitchError)
0261 continue;
0262 }
0263
0264 auto const pos = alpaka::atomicAdd(acc, pChannelsCounter, 1u, alpaka::hierarchy::Threads{});
0265
0266
0267 ids[pos] = didraw;
0268 std::memcpy(&samples[pos * kSampleSize], sampleValues, kSampleSize * sizeof(uint16_t));
0269 }
0270
0271 current_tower_block += block_length;
0272 }
0273 }
0274
0275 private:
0276 ALPAKA_FN_INLINE ALPAKA_FN_ACC void print_raw_buffer(uint8_t const* const buffer,
0277 uint32_t const nbytes,
0278 uint32_t const nbytes_per_row = 20) const {
0279 for (uint32_t i = 0; i < nbytes; ++i) {
0280 if (i % nbytes_per_row == 0 && i > 0)
0281 printf("\n");
0282 printf("%02X ", buffer[i]);
0283 }
0284 }
0285
0286 ALPAKA_FN_INLINE ALPAKA_FN_ACC void print_first3bits(uint64_t const* buffer, uint32_t size) const {
0287 for (uint32_t i = 0; i < size; ++i) {
0288 uint8_t const b61 = (buffer[i] >> 61) & 0x1;
0289 uint8_t const b62 = (buffer[i] >> 62) & 0x1;
0290 uint8_t const b63 = (buffer[i] >> 63) & 0x1;
0291 printf("[word: %u] %u%u%u\n", i, b63, b62, b61);
0292 }
0293 }
0294
0295 ALPAKA_FN_INLINE ALPAKA_FN_ACC bool is_barrel(uint8_t dccid) const {
0296 return dccid >= ElectronicsIdGPU::MIN_DCCID_EBM && dccid <= ElectronicsIdGPU::MAX_DCCID_EBP;
0297 }
0298
0299 ALPAKA_FN_INLINE ALPAKA_FN_ACC uint8_t fed2dcc(int fed) const { return static_cast<uint8_t>(fed - 600); }
0300
0301 ALPAKA_FN_INLINE ALPAKA_FN_ACC int zside_for_eb(ElectronicsIdGPU const& eid) const {
0302 int dcc = eid.dccId();
0303 return ((dcc >= ElectronicsIdGPU::MIN_DCCID_EBM && dcc <= ElectronicsIdGPU::MAX_DCCID_EBM)) ? -1 : 1;
0304 }
0305
0306 ALPAKA_FN_INLINE ALPAKA_FN_ACC uint8_t find_next_tower_block(uint64_t const*& current_tower_block,
0307 uint64_t const* trailer,
0308 uint32_t const bx,
0309 uint32_t const lv1) const {
0310 const auto* next_tower_block = current_tower_block + 1;
0311
0312
0313 const uint64_t lv1local = ((lv1 - 1) & TOWER_L1_MASK);
0314 const uint64_t bxlocal = (bx != 3564) ? bx : 0;
0315
0316
0317
0318
0319 const uint64_t sign = 0xC0000000C0000000 + (lv1local << TOWER_L1_B) + (bxlocal << TOWER_BX_B);
0320 const uint64_t mask =
0321 0xC0001000D0000000 + (uint64_t(TOWER_L1_MASK) << TOWER_L1_B) + (uint64_t(TOWER_BX_MASK) << TOWER_BX_B);
0322
0323 while (next_tower_block < trailer) {
0324 if ((*next_tower_block & mask) == sign) {
0325 current_tower_block = next_tower_block;
0326 return uint8_t(*next_tower_block & TOWER_ID_MASK);
0327 } else {
0328 ++next_tower_block;
0329 }
0330 }
0331 return TOWER_ID_MASK;
0332 }
0333
0334 ALPAKA_FN_INLINE ALPAKA_FN_ACC bool is_synced_towerblock(uint16_t const dccbx,
0335 uint16_t const bx,
0336 uint16_t const dccl1,
0337 uint16_t const l1) const {
0338 bool const bxsync = (bx == 0 && dccbx == 3564) || (bx == dccbx && dccbx != 3564);
0339 bool const l1sync = (l1 == ((dccl1 - 1) & 0xfff));
0340 return bxsync && l1sync;
0341 }
0342
0343 ALPAKA_FN_INLINE ALPAKA_FN_ACC bool right_tower_for_eb(int tower) const {
0344
0345 return (tower > 12 && tower < 21) || (tower > 28 && tower < 37) || (tower > 44 && tower < 53) ||
0346 (tower > 60 && tower < 69);
0347 }
0348
0349 ALPAKA_FN_INLINE ALPAKA_FN_ACC uint32_t compute_ebdetid(ElectronicsIdGPU const& eid) const {
0350
0351 auto const dcc = eid.dccId();
0352 auto const tower = eid.towerId();
0353 auto const strip = eid.stripId();
0354 auto const xtal = eid.xtalId();
0355
0356 int smid = 0;
0357 int iphi = 0;
0358 bool EBPlus = (zside_for_eb(eid) > 0);
0359 bool EBMinus = !EBPlus;
0360
0361 if (zside_for_eb(eid) < 0) {
0362 smid = dcc + 19 - ElectronicsIdGPU::DCCID_PHI0_EBM;
0363 iphi = (smid - 19) * ElectronicsIdGPU::kCrystalsInPhi;
0364 iphi += 5 * ((tower - 1) % ElectronicsIdGPU::kTowersInPhi);
0365 } else {
0366 smid = dcc + 1 - ElectronicsIdGPU::DCCID_PHI0_EBP;
0367 iphi = (smid - 1) * ElectronicsIdGPU::kCrystalsInPhi;
0368 iphi += 5 * (ElectronicsIdGPU::kTowersInPhi - ((tower - 1) % ElectronicsIdGPU::kTowersInPhi) - 1);
0369 }
0370
0371 bool RightTower = right_tower_for_eb(tower);
0372 int ieta = 5 * ((tower - 1) / ElectronicsIdGPU::kTowersInPhi) + 1;
0373 if (RightTower) {
0374 ieta += (strip - 1);
0375 if (strip % 2 == 1) {
0376 if (EBMinus)
0377 iphi += (xtal - 1) + 1;
0378 else
0379 iphi += (4 - (xtal - 1)) + 1;
0380 } else {
0381 if (EBMinus)
0382 iphi += (4 - (xtal - 1)) + 1;
0383 else
0384 iphi += (xtal - 1) + 1;
0385 }
0386 } else {
0387 ieta += 4 - (strip - 1);
0388 if (strip % 2 == 1) {
0389 if (EBMinus)
0390 iphi += (4 - (xtal - 1)) + 1;
0391 else
0392 iphi += (xtal - 1) + 1;
0393 } else {
0394 if (EBMinus)
0395 iphi += (xtal - 1) + 1;
0396 else
0397 iphi += (4 - (xtal - 1)) + 1;
0398 }
0399 }
0400
0401 if (zside_for_eb(eid) < 0)
0402 ieta = -ieta;
0403
0404 DetId did{DetId::Ecal, EcalBarrel};
0405 return did.rawId() | ((ieta > 0) ? (0x10000 | (ieta << 9)) : ((-ieta) << 9)) | (iphi & 0x1FF);
0406 }
0407
0408 ALPAKA_FN_INLINE ALPAKA_FN_ACC int adc(uint16_t sample) const { return sample & 0xfff; }
0409
0410 ALPAKA_FN_INLINE ALPAKA_FN_ACC int gainId(uint16_t sample) const { return (sample >> 12) & 0x3; }
0411 };
0412
0413 void unpackRaw(Queue& queue,
0414 InputDataHost const& inputHost,
0415 EcalDigiDeviceCollection& digisDevEB,
0416 EcalDigiDeviceCollection& digisDevEE,
0417 EcalElectronicsMappingDevice const& mapping,
0418 uint32_t const nfedsWithData,
0419 uint32_t const nbytesTotal) {
0420
0421 ecal::raw::InputDataDevice inputDevice(queue, nbytesTotal, nfedsWithData);
0422
0423
0424 alpaka::memcpy(queue, inputDevice.data, inputHost.data);
0425 alpaka::memcpy(queue, inputDevice.offsets, inputHost.offsets);
0426 alpaka::memcpy(queue, inputDevice.feds, inputHost.feds);
0427
0428 auto workDiv = cms::alpakatools::make_workdiv<Acc1D>(nfedsWithData, 32);
0429 alpaka::exec<Acc1D>(queue,
0430 workDiv,
0431 Kernel_unpack{},
0432 inputDevice.data.data(),
0433 inputDevice.offsets.data(),
0434 inputDevice.feds.data(),
0435 digisDevEB.view(),
0436 digisDevEE.view(),
0437 mapping.const_view(),
0438 nbytesTotal);
0439 }
0440
0441 }