Back to home page

Project CMSSW displayed by LXR

 
 

    


Warning, /EventFilter/HcalRawToDigi/plugins/DecodeGPU.cu is written in an unsupported language. File is not indexed.

0001 #include "DataFormats/HcalDetId/interface/HcalElectronicsId.h"
0002 #include "DataFormats/HcalDetId/interface/HcalSubdetector.h"
0003 #include "DataFormats/HcalDetId/interface/HcalDetId.h"
0004 
0005 #include "EventFilter/HcalRawToDigi/plugins/DecodeGPU.h"
0006 
0007 #include <cooperative_groups.h>
0008 using namespace cooperative_groups;
0009 
0010 namespace hcal {
0011   namespace raw {
0012 
0013     __forceinline__ __device__ char const* get_subdet_str(DetId const& did) {
0014       switch (did.subdetId()) {
0015         case HcalEmpty:
0016           return "HcalEmpty";
0017           break;
0018         case HcalBarrel:
0019           return "HcalBarrel";
0020           break;
0021         case HcalEndcap:
0022           return "HcalEndcap";
0023           break;
0024         case HcalOuter:
0025           return "HcalOuter";
0026           break;
0027         case HcalForward:
0028           return "HcalForward";
0029           break;
0030         case HcalTriggerTower:
0031           return "HcalTriggerTower";
0032           break;
0033         case HcalOther:
0034           return "HcalOther";
0035           break;
0036         default:
0037           return "Unknown";
0038           break;
0039       }
0040 
0041       return "Unknown";
0042     }
0043 
0044     __forceinline__ __device__ bool is_channel_header_word(uint16_t const* ptr) {
0045       uint8_t bit = (*ptr >> 15) & 0x1;
0046       return bit == 1;
0047     }
0048 
0049     template <typename T>
0050     constexpr bool is_power_of_two(T x) {
0051       return (x != 0) && ((x & (x - 1)) == 0);
0052     }
0053 
0054     template <int NTHREADS>
0055     __global__ void kernel_rawdecode_test(unsigned char const* data,
0056                                           uint32_t const* offsets,
0057                                           int const* feds,
0058                                           uint32_t const* eid2did,
0059                                           uint32_t const* eid2tid,
0060                                           uint16_t* digisF01HE,
0061                                           uint32_t* idsF01HE,
0062                                           uint16_t* digisF5HB,
0063                                           uint32_t* idsF5HB,
0064                                           uint8_t* npresamplesF5HB,
0065                                           uint16_t* digisF3HB,
0066                                           uint32_t* idsF3HB,
0067                                           uint32_t* pChannelsCounters,
0068                                           uint32_t const nsamplesF01HE,
0069                                           uint32_t const nsamplesF5HB,
0070                                           uint32_t const nsamplesF3HB,
0071                                           uint32_t const nBytesTotal) {
0072       // in order to properly use cooperative groups
0073       static_assert(is_power_of_two(NTHREADS) == true && NTHREADS <= 32);
0074 
0075       thread_block_tile<NTHREADS> thread_group = tiled_partition<NTHREADS>(this_thread_block());
0076 
0077       auto const iamc = threadIdx.x / NTHREADS;
0078       auto const ifed = blockIdx.x;
0079       auto const offset = offsets[ifed];
0080 
0081 #ifdef HCAL_RAWDECODE_GPUDEBUG_CG
0082       if (ifed > 0 || iamc > 0)
0083         return;
0084       printf("threadIdx.x = %d rank = %d iamc = %d\n", threadIdx.x, thread_group.thread_rank(), iamc);
0085 #endif
0086 
0087 #ifdef HCAL_RAWDECODE_GPUDEBUG
0088       auto const fed = feds[ifed];
0089       auto const size = ifed == gridDim.x - 1 ? nBytesTotal - offset : offsets[ifed + 1] - offset;
0090       printf("ifed = %d fed = %d offset = %u size = %u\n", ifed, fed, offset, size);
0091 #endif
0092 
0093       // offset to the right raw buffer
0094       uint64_t const* buffer = reinterpret_cast<uint64_t const*>(data + offset);
0095 
0096 #ifdef HCAL_RAWDECODE_GPUDEBUG
0097       //
0098       // fed header
0099       //
0100       auto const fed_header = buffer[0];
0101       uint32_t const fed_id = (fed_header >> 8) & 0xfff;
0102       uint32_t const bx = (fed_header >> 20) & 0xfff;
0103       uint32_t const lv1 = (fed_header >> 32) & 0xffffff;
0104       uint8_t const trigger_type = (fed_header >> 56) & 0xf;
0105       uint8_t const bid_fed_header = (fed_header >> 60) & 0xf;
0106 
0107       printf("fed = %d fed_id = %u bx = %u lv1 = %u trigger_type = %u bid = %u\n",
0108              fed,
0109              fed_id,
0110              bx,
0111              lv1,
0112              trigger_type,
0113              bid_fed_header);
0114 #endif
0115 
0116       // amc 13 header
0117       auto const amc13word = buffer[1];
0118       uint8_t const namc = (amc13word >> 52) & 0xf;
0119       if (iamc >= namc)
0120         return;
0121 
0122 #ifdef HCAL_RAWDECODE_GPUDEBUG
0123       uint8_t const amc13version = (amc13word >> 60) & 0xf;
0124       uint32_t const amc13OrbitNumber = (amc13word >> 4) & 0xffffffffu;
0125       printf("fed = %d namc = %u amc13version = %u amc13OrbitNumber = %u\n", fed, namc, amc13version, amc13OrbitNumber);
0126 #endif
0127 
0128       // compute the offset int to the right buffer
0129       uint32_t amcoffset = 0;
0130       for (uint8_t ii = 0u; ii < iamc; ii++) {
0131         auto const word = buffer[2 + ii];
0132         int const amcSize = (word >> 32) & 0xffffff;
0133         amcoffset += amcSize;
0134       }
0135 
0136       auto const word = buffer[2 + iamc];
0137       int const amcSize = (word >> 32) & 0xffffff;
0138 
0139 #ifdef HCAL_RAWDECODE_GPUDEBUG
0140       uint16_t const amcid = word & 0xffff;
0141       int const slot = (word >> 16) & 0xf;
0142       int const amcBlockNumber = (word >> 20) & 0xff;
0143       printf("fed = %d amcid = %u slot = %d amcBlockNumber = %d\n", fed, amcid, slot, amcBlockNumber);
0144 
0145       bool const amcmore = ((word >> 61) & 0x1) != 0;
0146       bool const amcSegmented = ((word >> 60) & 0x1) != 0;
0147       bool const amcLengthOk = ((word >> 62) & 0x1) != 0;
0148       bool const amcCROk = ((word >> 56) & 0x1) != 0;
0149       bool const amcDataPresent = ((word >> 58) & 0x1) != 0;
0150       bool const amcDataValid = ((word >> 56) & 0x1) != 0;
0151       bool const amcEnabled = ((word >> 59) & 0x1) != 0;
0152       printf(
0153           "fed = %d amcmore = %d amcSegmented = %d, amcLengthOk = %d amcCROk = %d\n>> amcDataPresent = %d amcDataValid "
0154           "= %d amcEnabled = %d\n",
0155           fed,
0156           static_cast<int>(amcmore),
0157           static_cast<int>(amcSegmented),
0158           static_cast<int>(amcLengthOk),
0159           static_cast<int>(amcCROk),
0160           static_cast<int>(amcDataPresent),
0161           static_cast<int>(amcDataValid),
0162           static_cast<int>(amcEnabled));
0163 #endif
0164 
0165       // get to the payload
0166       auto const* payload64 = buffer + 2 + namc + amcoffset;
0167 
0168 #ifdef HCAL_RAWDECODE_GPUDEBUG
0169       // uhtr header v1 1st 64 bits
0170       auto const payload64_w0 = payload64[0];
0171 #endif
0172       // uhtr n bytes comes from amcSize, according to the cpu version!
0173       uint32_t const data_length64 = amcSize;
0174 
0175 #ifdef HCAL_RAWDECODE_GPUDEBUG
0176       uint16_t bcn = (payload64_w0 >> 20) & 0xfff;
0177       uint32_t evn = (payload64_w0 >> 32) & 0xffffff;
0178       printf("fed = %d data_length64 = %u bcn = %u evn = %u\n", fed, data_length64, bcn, evn);
0179 #endif
0180 
0181       // uhtr header v1 2nd 64 bits
0182       auto const payload64_w1 = payload64[1];
0183       uint8_t const uhtrcrate = payload64_w1 & 0xff;
0184       uint8_t const uhtrslot = (payload64_w1 >> 8) & 0xf;
0185       uint8_t const presamples = (payload64_w1 >> 12) & 0xf;
0186       uint8_t const payloadFormat = (payload64_w1 >> 44) & 0xf;
0187 
0188 #ifdef HCAL_RAWDECODE_GPUDEBUG
0189       uint16_t const orbitN = (payload64_w1 >> 16) & 0xffff;
0190       uint8_t const firmFlavor = (payload64_w1 >> 32) & 0xff;
0191       uint8_t const eventType = (payload64_w1 >> 40) & 0xf;
0192       printf(
0193           "fed = %d crate = %u slot = %u presamples = %u\n>>> orbitN = %u firmFlavor = %u eventType = %u payloadFormat "
0194           "= %u\n",
0195           fed,
0196           uhtrcrate,
0197           uhtrslot,
0198           presamples,
0199           orbitN,
0200           firmFlavor,
0201           eventType,
0202           payloadFormat);
0203 #endif
0204 
0205       // this should be filtering out uMNio...
0206       if (payloadFormat != 1)
0207         return;
0208 
0209       // skip uhtr header words
0210       auto const channelDataSize = data_length64 - 2;        // 2 uhtr header v1 words
0211       auto const* channelDataBuffer64Start = payload64 + 2;  // 2 uhtr header v2 wds
0212       auto const* ptr = reinterpret_cast<uint16_t const*>(channelDataBuffer64Start);
0213       auto const* end = ptr + sizeof(uint64_t) / sizeof(uint16_t) * (channelDataSize - 1);
0214       auto const t_rank = thread_group.thread_rank();
0215 
0216       // iterate through the channel data
0217       while (ptr != end) {
0218         // this is the starting point for this thread group for this iteration
0219         // with respect to this pointer every thread will move forward afterwards
0220         auto const* const start_ptr = ptr;
0221 
0222 #ifdef HCAL_RAWDECODE_GPUDEBUG_CG
0223         thread_group.sync();
0224 #endif
0225 
0226         // skip to the header word of the right channel for this thread
0227         int counter = 0;
0228         while (counter < thread_group.thread_rank()) {
0229           // just a check for threads that land beyond the end
0230           if (ptr == end)
0231             break;
0232 
0233           // move ptr one forward past header
0234           if (is_channel_header_word(ptr))
0235             ++ptr;
0236           else {
0237             // go to the next channel and do not consider this guy as a channel
0238             while (ptr != end)
0239               if (!is_channel_header_word(ptr))
0240                 ++ptr;
0241               else
0242                 break;
0243             continue;
0244           }
0245 
0246           // skip
0247           while (ptr != end)
0248             if (!is_channel_header_word(ptr))
0249               ++ptr;
0250             else
0251               break;
0252           counter++;
0253         }
0254 
0255 #ifdef HCAL_RAWDECODE_GPUDEBUG_CG
0256         thread_group.sync();
0257         printf("ptr - start_ptr = %d counter = %d rank = %d\n", static_cast<int>(ptr - start_ptr), counter, t_rank);
0258 #endif
0259 
0260         // when the end is near, channels will land outside of the [start_ptr, end) region
0261         if (ptr != end) {
0262           // for all of the flavors, these 2 guys have the same bit layout
0263           uint8_t const flavor = (ptr[0] >> 12) & 0x7;
0264           uint8_t const channelid = ptr[0] & 0xff;
0265           auto const* const new_channel_start = ptr;
0266 
0267           // flavor dependent stuff
0268           switch (flavor) {
0269             case 0:
0270             case 1: {
0271               // treat eid and did
0272               uint8_t fiber = (channelid >> 3) & 0x1f;
0273               uint8_t fchannel = channelid & 0x7;
0274               HcalElectronicsId eid{uhtrcrate, uhtrslot, fiber, fchannel, false};
0275               auto const did = HcalDetId{eid2did[eid.linearIndex()]};
0276 
0277 #ifdef HCAL_RAWDECODE_GPUDEBUG
0278               printf("erawId = %u linearIndex = %u drawid = %u subdet = %s\n",
0279                      eid.rawId(),
0280                      eid.linearIndex(),
0281                      did.rawId(),
0282                      get_subdet_str(did));
0283               printf("flavor = %u crate = %u slot = %u channelid = %u fiber = %u fchannel = %u\n",
0284                      flavor,
0285                      uhtrcrate,
0286                      uhtrslot,
0287                      channelid,
0288                      fiber,
0289                      fchannel);
0290 #endif
0291 
0292               // remove digis not for HE
0293               if (did.subdetId() != HcalEndcap)
0294                 break;
0295 
0296               // count words
0297               auto const* channel_header_word = ptr++;
0298               while (!is_channel_header_word(ptr) && ptr != end)
0299                 ++ptr;
0300               auto const* channel_end = ptr;  // set ptr
0301               uint32_t const nwords = channel_end - channel_header_word;
0302 
0303               // filter out this digi if nwords does not equal expected
0304               auto const expected_words = compute_stride<Flavor1>(nsamplesF01HE);
0305               if (nwords != expected_words)
0306                 break;
0307 
0308               // inc the number of digis of this type
0309               auto const pos = atomicAdd(&pChannelsCounters[OutputF01HE], 1);
0310 #ifdef HCAL_RAWDECODE_GPUDEBUG_CG
0311               printf("rank = %d pos = %d\n", thread_group.thread_rank(), pos);
0312 #endif
0313 
0314               // store to global mem words for this digi
0315               idsF01HE[pos] = did.rawId();
0316 
0317               for (uint32_t iword = 0; iword < expected_words; iword++)
0318                 digisF01HE[pos * expected_words + iword] = channel_header_word[iword];
0319 
0320 #ifdef HCAL_RAWDECODE_GPUDEBUG
0321               printf("nwords = %u\n", nwords);
0322 #endif
0323 
0324               break;
0325             }
0326             case 3: {
0327               // treat eid and did
0328               uint8_t fiber = (channelid >> 3) & 0x1f;
0329               uint8_t fchannel = channelid & 0x7;
0330               HcalElectronicsId eid{uhtrcrate, uhtrslot, fiber, fchannel, false};
0331               auto const did = HcalDetId{eid2did[eid.linearIndex()]};
0332 
0333 #ifdef HCAL_RAWDECODE_GPUDEBUG
0334               printf("erawId = %u linearIndex = %u drawid = %u subdet = %s\n",
0335                      eid.rawId(),
0336                      eid.linearIndex(),
0337                      did.rawId(),
0338                      get_subdet_str(did));
0339               printf("flavor = %u crate = %u slot = %u channelid = %u fiber = %u fchannel = %u\n",
0340                      flavor,
0341                      uhtrcrate,
0342                      uhtrslot,
0343                      channelid,
0344                      fiber,
0345                      fchannel);
0346 #endif
0347 
0348               // remove digis not for HE
0349               if (did.subdetId() != HcalBarrel)
0350                 break;
0351 
0352               // count words
0353               auto const* channel_header_word = ptr++;
0354               while (!is_channel_header_word(ptr) && ptr != end)
0355                 ++ptr;
0356               auto const* channel_end = ptr;  // set ptr
0357               uint32_t const nwords = channel_end - channel_header_word;
0358 
0359               // filter out this digi if nwords does not equal expected
0360               auto const expected_words = compute_stride<Flavor3>(nsamplesF3HB);
0361               if (nwords != expected_words)
0362                 break;
0363 
0364               // inc the number of digis of this type
0365               auto const pos = atomicAdd(&pChannelsCounters[OutputF3HB], 1);
0366 
0367               // store to global mem words for this digi
0368               idsF3HB[pos] = did.rawId();
0369               for (uint32_t iword = 0; iword < expected_words; iword++)
0370                 digisF3HB[pos * expected_words + iword] = channel_header_word[iword];
0371 
0372 #ifdef HCAL_RAWDECODE_GPUDEBUG
0373               printf("nwords = %u\n", nwords);
0374 #endif
0375 
0376               break;
0377             }
0378             case 2: {
0379               uint8_t fiber = (channelid >> 3) & 0x1f;
0380               uint8_t fchannel = channelid & 0x7;
0381               HcalElectronicsId eid{uhtrcrate, uhtrslot, fiber, fchannel, false};
0382               auto const did = DetId{eid2did[eid.linearIndex()]};
0383 
0384 #ifdef HCAL_RAWDECODE_GPUDEBUG
0385               printf("erawId = %u linearIndex = %u drawid = %u subdet = %s\n",
0386                      eid.rawId(),
0387                      eid.linearIndex(),
0388                      did.rawId(),
0389                      get_subdet_str(did));
0390               printf("flavor = %u crate = %u slot = %u channelid = %u fiber = %u fchannel = %u\n",
0391                      flavor,
0392                      uhtrcrate,
0393                      uhtrslot,
0394                      channelid,
0395                      fiber,
0396                      fchannel);
0397 #endif
0398 
0399               break;
0400             }
0401             case 4: {
0402               uint8_t link = (channelid >> 4) & 0xf;
0403               uint8_t tower = channelid & 0xf;
0404               HcalElectronicsId eid{uhtrcrate, uhtrslot, link, tower, true};
0405               auto const did = DetId{eid2tid[eid.linearIndex()]};
0406 
0407 #ifdef HCAL_RAWDECODE_GPUDEBUG
0408               printf("erawId = %u linearIndex = %u drawid = %u subdet = %s\n",
0409                      eid.rawId(),
0410                      eid.linearIndex(),
0411                      did.rawId(),
0412                      get_subdet_str(did));
0413               printf("flavor = %u crate = %u slot = %u channelid = %u link = %u tower = %u\n",
0414                      flavor,
0415                      uhtrcrate,
0416                      uhtrslot,
0417                      channelid,
0418                      link,
0419                      tower);
0420 #endif
0421 
0422               break;
0423             }
0424             case 5: {
0425               uint8_t fiber = (channelid >> 2) & 0x3f;
0426               uint8_t fchannel = channelid & 0x3;
0427               HcalElectronicsId eid{uhtrcrate, uhtrslot, fiber, fchannel, false};
0428               auto const did = HcalDetId{eid2did[eid.linearIndex()]};
0429 
0430 #ifdef HCAL_RAWDECODE_GPUDEBUG
0431               printf("erawId = %u linearIndex = %u drawid = %u subdet = %s\n",
0432                      eid.rawId(),
0433                      eid.linearIndex(),
0434                      did.rawId(),
0435                      get_subdet_str(did));
0436               printf("flavor = %u crate = %u slot = %u channelid = %u fiber = %u fchannel = %u\n",
0437                      flavor,
0438                      uhtrcrate,
0439                      uhtrslot,
0440                      channelid,
0441                      fiber,
0442                      fchannel);
0443 #endif
0444 
0445               // remove digis not for HB
0446               if (did.subdetId() != HcalBarrel)
0447                 break;
0448 
0449               // count words
0450               auto const* channel_header_word = ptr++;
0451               while (!is_channel_header_word(ptr) && ptr != end)
0452                 ++ptr;
0453               auto const* channel_end = ptr;  // set ptr
0454               uint32_t const nwords = channel_end - channel_header_word;
0455 
0456               // filter out this digi if nwords does not equal expected
0457               auto const expected_words = compute_stride<Flavor5>(nsamplesF5HB);
0458               if (nwords != expected_words)
0459                 break;
0460 
0461               // inc the number of digis of this type
0462               auto const pos = atomicAdd(&pChannelsCounters[OutputF5HB], 1);
0463 
0464 #ifdef HCAL_RAWDECODE_GPUDEBUG_CG
0465               printf("rank = %d pos = %d\n", thread_group.thread_rank(), pos);
0466 #endif
0467 
0468               // store to global mem words for this digi
0469               idsF5HB[pos] = did.rawId();
0470               npresamplesF5HB[pos] = presamples;
0471               for (uint32_t iword = 0; iword < expected_words; iword++)
0472                 digisF5HB[pos * expected_words + iword] = channel_header_word[iword];
0473 
0474               break;
0475             }
0476             case 7: {
0477               uint8_t const fiber = (channelid >> 2) & 0x3f;
0478               uint8_t const fchannel = channelid & 0x3;
0479               HcalElectronicsId eid{uhtrcrate, uhtrslot, fiber, fchannel, false};
0480               auto const did = DetId{eid2did[eid.linearIndex()]};
0481 
0482               /* uncomment to check the linear index validity
0483               if (eid.rawId() >= HcalElectronicsId::maxLinearIndex) {
0484 #ifdef HCAL_RAWDECODE_GPUDEBUG
0485                   printf("*** rawid = %u has no known det id***\n", eid.rawId());
0486 #endif
0487                   break;
0488               }
0489               */
0490 
0491 #ifdef HCAL_RAWDECODE_GPUDEBUG
0492               printf("erawId = %u linearIndex = %u drawid = %u\n", eid.rawId(), eid.linearIndex(), did.rawId());
0493               printf("flavor = %u crate = %u slot = %u channelid = %u fiber = %u fchannel = %u\n",
0494                      flavor,
0495                      uhtrcrate,
0496                      uhtrslot,
0497                      channelid,
0498                      fiber,
0499                      fchannel);
0500 #endif
0501 
0502               break;
0503             }
0504             default:
0505 #ifdef HCAL_RAWDECODE_GPUDEBUG
0506               printf("flavor = %u crate = %u slot = %u channelid = %u\n", flavor, uhtrcrate, uhtrslot, channelid);
0507 #endif
0508               ;
0509           }
0510 
0511           // skip to the next word in case
0512           // 1) current channel was not treated
0513           // 2) we are in the middle of the digi words and not at the end
0514           while (new_channel_start == ptr || !is_channel_header_word(ptr) && ptr != end)
0515             ++ptr;
0516         }
0517 
0518         // thread with rank 31 of the group will have the ptr pointing to the
0519         // header word of the next channel or the end
0520         int const offset_to_shuffle = ptr - start_ptr;
0521 
0522         // always receive from the last guy in the group
0523         auto const offset_for_rank31 = thread_group.shfl(offset_to_shuffle, NTHREADS - 1);
0524 
0525 #ifdef HCAL_RAWDECODE_GPUDEBUG_CG
0526         printf("rank = %d offset_to_shuffle = %d offset_for_rank32 = %d\n",
0527                thread_group.thread_rank(),
0528                offset_to_shuffle,
0529                offset_for_rank31);
0530 #endif
0531 
0532         // update the ptr for all threads of this group
0533         // NOTE: relative to the start_ptr that is the same for all threads of
0534         // this group
0535         ptr = start_ptr + offset_for_rank31;
0536       }
0537     }
0538 
0539     void entryPoint(InputDataCPU const& inputCPU,
0540                     InputDataGPU& inputGPU,
0541                     OutputDataGPU& outputGPU,
0542                     ScratchDataGPU& scratchGPU,
0543                     OutputDataCPU& outputCPU,
0544                     ConditionsProducts const& conditions,
0545                     ConfigurationParameters const& config,
0546                     cudaStream_t cudaStream,
0547                     uint32_t const nfedsWithData,
0548                     uint32_t const nbytesTotal) {
0549       // transfer
0550       cudaCheck(cudaMemcpyAsync(inputGPU.data.get(),
0551                                 inputCPU.data.get(),
0552                                 nbytesTotal * sizeof(unsigned char),
0553                                 cudaMemcpyHostToDevice,
0554                                 cudaStream));
0555       cudaCheck(cudaMemcpyAsync(inputGPU.offsets.get(),
0556                                 inputCPU.offsets.get(),
0557                                 nfedsWithData * sizeof(uint32_t),
0558                                 cudaMemcpyHostToDevice,
0559                                 cudaStream));
0560       cudaCheck(
0561           cudaMemsetAsync(scratchGPU.pChannelsCounters.get(), 0, sizeof(uint32_t) * numOutputCollections, cudaStream));
0562       cudaCheck(cudaMemcpyAsync(
0563           inputGPU.feds.get(), inputCPU.feds.get(), nfedsWithData * sizeof(int), cudaMemcpyHostToDevice, cudaStream));
0564 
0565       // 12 is the max number of modules per crate
0566       kernel_rawdecode_test<32><<<nfedsWithData, 12 * 32, 0, cudaStream>>>(inputGPU.data.get(),
0567                                                                            inputGPU.offsets.get(),
0568                                                                            inputGPU.feds.get(),
0569                                                                            conditions.eMappingProduct.eid2did,
0570                                                                            conditions.eMappingProduct.eid2tid,
0571                                                                            outputGPU.digisF01HE.data.get(),
0572                                                                            outputGPU.digisF01HE.ids.get(),
0573                                                                            outputGPU.digisF5HB.data.get(),
0574                                                                            outputGPU.digisF5HB.ids.get(),
0575                                                                            outputGPU.digisF5HB.npresamples.get(),
0576                                                                            outputGPU.digisF3HB.data.get(),
0577                                                                            outputGPU.digisF3HB.ids.get(),
0578                                                                            scratchGPU.pChannelsCounters.get(),
0579                                                                            config.nsamplesF01HE,
0580                                                                            config.nsamplesF5HB,
0581                                                                            config.nsamplesF3HB,
0582                                                                            nbytesTotal);
0583       cudaCheck(cudaGetLastError());
0584 
0585       cudaCheck(cudaMemcpyAsync(outputCPU.nchannels.get(),
0586                                 scratchGPU.pChannelsCounters.get(),
0587                                 sizeof(uint32_t) * numOutputCollections,
0588                                 cudaMemcpyDeviceToHost,
0589                                 cudaStream));
0590     }
0591 
0592   }  // namespace raw
0593 }  // namespace hcal