Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2025-01-22 07:34:11

0001 #include <algorithm>
0002 #include <iostream>
0003 #include <limits>
0004 #include <random>
0005 
0006 #include <alpaka/alpaka.hpp>
0007 
0008 #include "FWCore/Utilities/interface/stringize.h"
0009 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0010 #include "HeterogeneousCore/AlpakaInterface/interface/memory.h"
0011 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0012 #include "HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h"
0013 
0014 using namespace cms::alpakatools;
0015 using namespace ALPAKA_ACCELERATOR_NAMESPACE;
0016 
0017 template <int NBINS, int S, int DELTA>
0018 struct mykernel {
0019   template <typename T>
0020   ALPAKA_FN_ACC void operator()(Acc1D const& acc, T const* __restrict__ v, uint32_t N) const {
0021     ALPAKA_ASSERT_ACC(v);
0022     ALPAKA_ASSERT_ACC(N == 12000);
0023 
0024     if (once_per_block(acc)) {
0025       printf("start kernel for %d data\n", N);
0026     }
0027 
0028     using Hist = HistoContainer<T, NBINS, 12000, S, uint16_t>;
0029 
0030     auto& hist = alpaka::declareSharedVar<Hist, __COUNTER__>(acc);
0031     auto& ws = alpaka::declareSharedVar<typename Hist::Counter[32], __COUNTER__>(acc);
0032 
0033     // set off zero
0034     for (auto j : uniform_elements(acc, Hist::totbins())) {
0035       hist.off[j] = 0;
0036     }
0037     alpaka::syncBlockThreads(acc);
0038 
0039     // set bins zero
0040     for (auto j : uniform_elements(acc, Hist::totbins())) {
0041       hist.content[j] = 0;
0042     }
0043     alpaka::syncBlockThreads(acc);
0044 
0045     // count
0046     for (auto j : uniform_elements(acc, N)) {
0047       hist.count(acc, v[j]);
0048     }
0049     alpaka::syncBlockThreads(acc);
0050 
0051     ALPAKA_ASSERT_ACC(0 == hist.size());
0052     alpaka::syncBlockThreads(acc);
0053 
0054     // finalize
0055     hist.finalize(acc, ws);
0056     alpaka::syncBlockThreads(acc);
0057 
0058     ALPAKA_ASSERT_ACC(N == hist.size());
0059 
0060     // verify
0061     for ([[maybe_unused]] auto j : uniform_elements(acc, Hist::nbins())) {
0062       ALPAKA_ASSERT_ACC(hist.off[j] <= hist.off[j + 1]);
0063     }
0064     alpaka::syncBlockThreads(acc);
0065 
0066     for (auto j : uniform_elements(acc, 32)) {
0067       ws[j] = 0;  // used by prefix scan...
0068     }
0069     alpaka::syncBlockThreads(acc);
0070 
0071     // fill
0072     for (auto j : uniform_elements(acc, N)) {
0073       hist.fill(acc, v[j], j);
0074     }
0075     alpaka::syncBlockThreads(acc);
0076 
0077     ALPAKA_ASSERT_ACC(0 == hist.off[0]);
0078     ALPAKA_ASSERT_ACC(N == hist.size());
0079 
0080     // bin
0081 #ifndef NDEBUG
0082     for (auto j : uniform_elements(acc, hist.size() - 1)) {
0083       auto p = hist.begin() + j;
0084       ALPAKA_ASSERT_ACC((*p) < N);
0085       [[maybe_unused]] auto k1 = Hist::bin(v[*p]);
0086       [[maybe_unused]] auto k2 = Hist::bin(v[*(p + 1)]);
0087       ALPAKA_ASSERT_ACC(k2 >= k1);
0088     }
0089 #endif
0090 
0091     // forEachInWindow
0092     for (auto i : uniform_elements(acc, hist.size())) {
0093       auto p = hist.begin() + i;
0094       auto j = *p;
0095 #ifndef NDEBUG
0096       auto b0 = Hist::bin(v[j]);
0097 #endif
0098       [[maybe_unused]] int tot = 0;
0099       auto ftest = [&](unsigned int k) {
0100         ALPAKA_ASSERT_ACC(k < N);
0101         ++tot;
0102       };
0103       forEachInWindow(hist, v[j], v[j], ftest);
0104 #ifndef NDEBUG
0105       [[maybe_unused]] int rtot = hist.size(b0);
0106       ALPAKA_ASSERT_ACC(tot == rtot);
0107 #endif
0108       tot = 0;
0109       auto vm = int(v[j]) - DELTA;
0110       auto vp = int(v[j]) + DELTA;
0111       constexpr int vmax = NBINS != 128 ? NBINS * 2 - 1 : std::numeric_limits<T>::max();
0112       vm = std::max(vm, 0);
0113       vm = std::min(vm, vmax);
0114       vp = std::min(vp, vmax);
0115       vp = std::max(vp, 0);
0116       ALPAKA_ASSERT_ACC(vp >= vm);
0117       forEachInWindow(hist, vm, vp, ftest);
0118 #ifndef NDEBUG
0119       int bp = Hist::bin(vp);
0120       int bm = Hist::bin(vm);
0121       rtot = hist.end(bp) - hist.begin(bm);
0122       ALPAKA_ASSERT_ACC(tot == rtot);
0123 #endif
0124     }
0125   }
0126 };
0127 
0128 template <typename T, int NBINS = 128, int S = 8 * sizeof(T), int DELTA = 1000>
0129 void go(const DevHost& host, const Device& device, Queue& queue) {
0130   std::mt19937 eng;
0131 
0132   int rmin = std::numeric_limits<T>::min();
0133   int rmax = std::numeric_limits<T>::max();
0134   if (NBINS != 128) {
0135     rmin = 0;
0136     rmax = NBINS * 2 - 1;
0137   }
0138 
0139   std::uniform_int_distribution<T> rgen(rmin, rmax);
0140   constexpr unsigned int N = 12000;
0141 
0142   using Hist = HistoContainer<T, NBINS, N, S>;
0143   std::cout << "HistoContainer " << Hist::nbits() << ' ' << Hist::nbins() << ' ' << Hist{}.capacity() << ' '
0144             << (rmax - rmin) / Hist::nbins() << std::endl;
0145   std::cout << "bins " << int(Hist::bin(0)) << ' ' << int(Hist::bin(rmin)) << ' ' << int(Hist::bin(rmax)) << std::endl;
0146 
0147   auto v = make_host_buffer<T[]>(queue, N);
0148   auto v_d = make_device_buffer<T[]>(queue, N);
0149 
0150   for (int it = 0; it < 5; ++it) {
0151     for (long long j = 0; j < N; j++)
0152       v[j] = rgen(eng);
0153     if (it == 2)
0154       for (long long j = N / 2; j < N / 2 + N / 4; j++)
0155         v[j] = 4;
0156 
0157     alpaka::memcpy(queue, v_d, v);
0158 
0159     const auto threadsPerBlockOrElementsPerThread = 256u;
0160     const auto blocksPerGrid = 1u;
0161     const auto workDiv = make_workdiv<Acc1D>(blocksPerGrid, threadsPerBlockOrElementsPerThread);
0162     alpaka::enqueue(queue, alpaka::createTaskKernel<Acc1D>(workDiv, mykernel<NBINS, S, DELTA>(), v_d.data(), N));
0163   }
0164   alpaka::wait(queue);
0165 }
0166 
0167 int main() {
0168   auto const& host = cms::alpakatools::host();
0169 
0170   // get the list of devices on the current platform
0171   auto const& devices = cms::alpakatools::devices<Platform>();
0172   if (devices.empty()) {
0173     std::cerr << "No devices available for the " EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE) " backend, "
0174       "the test will be skipped.\n";
0175     exit(EXIT_FAILURE);
0176   }
0177 
0178   // run the test on each device
0179   for (auto const& device : devices) {
0180     std::cout << "Test One Histo Container on " << alpaka::getName(device) << '\n';
0181 
0182     auto queue = Queue(device);
0183 
0184     go<int16_t>(host, device, queue);
0185     go<uint8_t, 128, 8, 4>(host, device, queue);
0186     go<uint16_t, 313 / 2, 9, 4>(host, device, queue);
0187   }
0188 
0189   return 0;
0190 }