Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:15:41

0001 #include <algorithm>
0002 #include <iostream>
0003 #include <limits>
0004 #include <random>
0005 
0006 #include <alpaka/alpaka.hpp>
0007 
0008 #include "FWCore/Utilities/interface/stringize.h"
0009 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0010 #include "HeterogeneousCore/AlpakaInterface/interface/memory.h"
0011 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0012 #include "HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h"
0013 
0014 using namespace cms::alpakatools;
0015 using namespace ALPAKA_ACCELERATOR_NAMESPACE;
0016 
0017 template <int NBINS, int S, int DELTA>
0018 struct mykernel {
0019   template <typename TAcc, typename T>
0020   ALPAKA_FN_ACC void operator()(const TAcc& acc, T const* __restrict__ v, uint32_t N) const {
0021     ALPAKA_ASSERT_ACC(v);
0022     ALPAKA_ASSERT_ACC(N == 12000);
0023 
0024     const uint32_t threadIdxLocal(alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]);
0025     if (threadIdxLocal == 0) {
0026       printf("start kernel for %d data\n", N);
0027     }
0028 
0029     using Hist = HistoContainer<T, NBINS, 12000, S, uint16_t>;
0030 
0031     auto& hist = alpaka::declareSharedVar<Hist, __COUNTER__>(acc);
0032     auto& ws = alpaka::declareSharedVar<typename Hist::Counter[32], __COUNTER__>(acc);
0033 
0034     // set off zero
0035     for (auto j : uniform_elements(acc, Hist::totbins())) {
0036       hist.off[j] = 0;
0037     }
0038     alpaka::syncBlockThreads(acc);
0039 
0040     // set bins zero
0041     for (auto j : uniform_elements(acc, Hist::totbins())) {
0042       hist.content[j] = 0;
0043     }
0044     alpaka::syncBlockThreads(acc);
0045 
0046     // count
0047     for (auto j : uniform_elements(acc, N)) {
0048       hist.count(acc, v[j]);
0049     }
0050     alpaka::syncBlockThreads(acc);
0051 
0052     ALPAKA_ASSERT_ACC(0 == hist.size());
0053     alpaka::syncBlockThreads(acc);
0054 
0055     // finalize
0056     hist.finalize(acc, ws);
0057     alpaka::syncBlockThreads(acc);
0058 
0059     ALPAKA_ASSERT_ACC(N == hist.size());
0060 
0061     // verify
0062     for ([[maybe_unused]] auto j : uniform_elements(acc, Hist::nbins())) {
0063       ALPAKA_ASSERT_ACC(hist.off[j] <= hist.off[j + 1]);
0064     }
0065     alpaka::syncBlockThreads(acc);
0066 
0067     for (auto j : uniform_elements(acc, 32)) {
0068       ws[j] = 0;  // used by prefix scan...
0069     }
0070     alpaka::syncBlockThreads(acc);
0071 
0072     // fill
0073     for (auto j : uniform_elements(acc, N)) {
0074       hist.fill(acc, v[j], j);
0075     }
0076     alpaka::syncBlockThreads(acc);
0077 
0078     ALPAKA_ASSERT_ACC(0 == hist.off[0]);
0079     ALPAKA_ASSERT_ACC(N == hist.size());
0080 
0081     // bin
0082 #ifndef NDEBUG
0083     for (auto j : uniform_elements(acc, hist.size() - 1)) {
0084       auto p = hist.begin() + j;
0085       ALPAKA_ASSERT_ACC((*p) < N);
0086       [[maybe_unused]] auto k1 = Hist::bin(v[*p]);
0087       [[maybe_unused]] auto k2 = Hist::bin(v[*(p + 1)]);
0088       ALPAKA_ASSERT_ACC(k2 >= k1);
0089     }
0090 #endif
0091 
0092     // forEachInWindow
0093     for (auto i : uniform_elements(acc, hist.size())) {
0094       auto p = hist.begin() + i;
0095       auto j = *p;
0096 #ifndef NDEBUG
0097       auto b0 = Hist::bin(v[j]);
0098 #endif
0099       [[maybe_unused]] int tot = 0;
0100       auto ftest = [&](unsigned int k) {
0101         ALPAKA_ASSERT_ACC(k < N);
0102         ++tot;
0103       };
0104       forEachInWindow(hist, v[j], v[j], ftest);
0105 #ifndef NDEBUG
0106       [[maybe_unused]] int rtot = hist.size(b0);
0107       ALPAKA_ASSERT_ACC(tot == rtot);
0108 #endif
0109       tot = 0;
0110       auto vm = int(v[j]) - DELTA;
0111       auto vp = int(v[j]) + DELTA;
0112       constexpr int vmax = NBINS != 128 ? NBINS * 2 - 1 : std::numeric_limits<T>::max();
0113       vm = std::max(vm, 0);
0114       vm = std::min(vm, vmax);
0115       vp = std::min(vp, vmax);
0116       vp = std::max(vp, 0);
0117       ALPAKA_ASSERT_ACC(vp >= vm);
0118       forEachInWindow(hist, vm, vp, ftest);
0119 #ifndef NDEBUG
0120       int bp = Hist::bin(vp);
0121       int bm = Hist::bin(vm);
0122       rtot = hist.end(bp) - hist.begin(bm);
0123       ALPAKA_ASSERT_ACC(tot == rtot);
0124 #endif
0125     }
0126   }
0127 };
0128 
0129 template <typename T, int NBINS = 128, int S = 8 * sizeof(T), int DELTA = 1000>
0130 void go(const DevHost& host, const Device& device, Queue& queue) {
0131   std::mt19937 eng;
0132 
0133   int rmin = std::numeric_limits<T>::min();
0134   int rmax = std::numeric_limits<T>::max();
0135   if (NBINS != 128) {
0136     rmin = 0;
0137     rmax = NBINS * 2 - 1;
0138   }
0139 
0140   std::uniform_int_distribution<T> rgen(rmin, rmax);
0141   constexpr unsigned int N = 12000;
0142 
0143   using Hist = HistoContainer<T, NBINS, N, S>;
0144   std::cout << "HistoContainer " << Hist::nbits() << ' ' << Hist::nbins() << ' ' << Hist{}.capacity() << ' '
0145             << (rmax - rmin) / Hist::nbins() << std::endl;
0146   std::cout << "bins " << int(Hist::bin(0)) << ' ' << int(Hist::bin(rmin)) << ' ' << int(Hist::bin(rmax)) << std::endl;
0147 
0148   auto v = make_host_buffer<T[]>(queue, N);
0149   auto v_d = make_device_buffer<T[]>(queue, N);
0150 
0151   for (int it = 0; it < 5; ++it) {
0152     for (long long j = 0; j < N; j++)
0153       v[j] = rgen(eng);
0154     if (it == 2)
0155       for (long long j = N / 2; j < N / 2 + N / 4; j++)
0156         v[j] = 4;
0157 
0158     alpaka::memcpy(queue, v_d, v);
0159 
0160     const auto threadsPerBlockOrElementsPerThread = 256u;
0161     const auto blocksPerGrid = 1u;
0162     const auto workDiv = make_workdiv<Acc1D>(blocksPerGrid, threadsPerBlockOrElementsPerThread);
0163     alpaka::enqueue(queue, alpaka::createTaskKernel<Acc1D>(workDiv, mykernel<NBINS, S, DELTA>(), v_d.data(), N));
0164   }
0165   alpaka::wait(queue);
0166 }
0167 
0168 int main() {
0169   auto const& host = cms::alpakatools::host();
0170 
0171   // get the list of devices on the current platform
0172   auto const& devices = cms::alpakatools::devices<Platform>();
0173   if (devices.empty()) {
0174     std::cerr << "No devices available for the " EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE) " backend, "
0175       "the test will be skipped.\n";
0176     exit(EXIT_FAILURE);
0177   }
0178 
0179   // run the test on each device
0180   for (auto const& device : devices) {
0181     std::cout << "Test One Histo Container on " << alpaka::getName(device) << '\n';
0182 
0183     auto queue = Queue(device);
0184 
0185     go<int16_t>(host, device, queue);
0186     go<uint8_t, 128, 8, 4>(host, device, queue);
0187     go<uint16_t, 313 / 2, 9, 4>(host, device, queue);
0188   }
0189 
0190   return 0;
0191 }