Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:15:41

0001 #include <algorithm>
0002 #include <cmath>
0003 #include <iostream>
0004 #include <limits>
0005 #include <random>
0006 
0007 #define CATCH_CONFIG_MAIN
0008 #include <catch.hpp>
0009 
0010 #include <alpaka/alpaka.hpp>
0011 
0012 #include "FWCore/Utilities/interface/stringize.h"
0013 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0014 #include "HeterogeneousCore/AlpakaInterface/interface/memory.h"
0015 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0016 #include "HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h"
0017 
0018 using namespace cms::alpakatools;
0019 using namespace ALPAKA_ACCELERATOR_NAMESPACE;
0020 
0021 static constexpr auto s_tag = "[" ALPAKA_TYPE_ALIAS_NAME(alpakaTestHistoContainer) "]";
0022 
0023 template <typename T, typename Hist, typename VERIFY, typename INCR>
0024 void checkContents(Hist* h,
0025                    unsigned int N,
0026                    VERIFY& verify,
0027                    INCR& incr,
0028                    T window,
0029                    uint32_t nParts,
0030                    const T* v,
0031                    const uint32_t* offsets) {
0032   for (uint32_t j = 0; j < nParts; ++j) {
0033     auto off = Hist::histOff(j);
0034     for (uint32_t i = 0; i < Hist::nbins(); ++i) {
0035       auto ii = i + off;
0036       if (0 == h->size(ii))
0037         continue;
0038       auto k = *h->begin(ii);
0039       if (j % 2)
0040         k = *(h->begin(ii) + (h->end(ii) - h->begin(ii)) / 2);
0041 #ifndef NDEBUG
0042       [[maybe_unused]] auto bk = h->bin(v[k]);
0043 #endif
0044       ALPAKA_ASSERT_ACC(bk == i);
0045       ALPAKA_ASSERT_ACC(k < offsets[j + 1]);
0046       auto kl = h->bin(v[k] - window);
0047       auto kh = h->bin(v[k] + window);
0048       ALPAKA_ASSERT_ACC(kl != i);
0049       ALPAKA_ASSERT_ACC(kh != i);
0050       // std::cout << kl << ' ' << kh << std::endl;
0051 
0052       auto me = v[k];
0053       auto tot = 0;
0054       auto nm = 0;
0055       bool l = true;
0056       auto khh = kh;
0057       incr(khh);
0058       for (auto kk = kl; kk != khh; incr(kk)) {
0059         if (kk != kl && kk != kh)
0060           nm += h->size(kk + off);
0061         for (auto p = h->begin(kk + off); p < h->end(kk + off); ++p) {
0062           if (std::min(std::abs(T(v[*p] - me)), std::abs(T(me - v[*p]))) > window) {
0063           } else {
0064             ++tot;
0065           }
0066         }
0067         if (kk == i) {
0068           l = false;
0069           continue;
0070         }
0071         if (l)
0072           for (auto p = h->begin(kk + off); p < h->end(kk + off); ++p)
0073             verify(i, k, k, (*p));
0074         else
0075           for (auto p = h->begin(kk + off); p < h->end(kk + off); ++p)
0076             verify(i, k, (*p), k);
0077       }
0078       if (!(tot >= nm)) {
0079         std::cout << "too bad " << j << ' ' << i << ' ' << int(me) << '/' << (int)T(me - window) << '/'
0080                   << (int)T(me + window) << ": " << kl << '/' << kh << ' ' << khh << ' ' << tot << '/' << nm
0081                   << std::endl;
0082       }
0083       if (l)
0084         std::cout << "what? " << j << ' ' << i << ' ' << int(me) << '/' << (int)T(me - window) << '/'
0085                   << (int)T(me + window) << ": " << kl << '/' << kh << ' ' << khh << ' ' << tot << '/' << nm
0086                   << std::endl;
0087       ALPAKA_ASSERT_ACC(!l);
0088     }
0089   }
0090   int status;
0091   auto* demangled = abi::__cxa_demangle(typeid(Hist).name(), NULL, NULL, &status);
0092   status || printf("Check contents OK with %s\n", demangled);
0093   std::free(demangled);
0094 }
0095 
0096 template <typename T>
0097 int go(const DevHost& host, const Device& device, Queue& queue) {
0098   std::mt19937 eng(2708);
0099   std::uniform_int_distribution<T> rgen(std::numeric_limits<T>::min(), std::numeric_limits<T>::max());
0100 
0101   constexpr unsigned int N = 12000;
0102   auto v = make_host_buffer<T[]>(queue, N);
0103   auto v_d = make_device_buffer<T[]>(queue, N);
0104   alpaka::memcpy(queue, v_d, v);
0105 
0106   constexpr uint32_t nParts = 10;
0107   constexpr uint32_t partSize = N / nParts;
0108 
0109   using Hist = HistoContainer<T, 128, N, 8 * sizeof(T), uint32_t, nParts>;
0110   using HistR = HistoContainer<T, 128, -1, 8 * sizeof(T), uint32_t, nParts>;
0111   std::cout << "HistoContainer " << (int)(offsetof(Hist, off)) << ' ' << Hist::nbins() << ' ' << Hist::totbins() << ' '
0112             << Hist{}.capacity() << ' ' << offsetof(Hist, content) - offsetof(Hist, off) << ' '
0113             << (std::numeric_limits<T>::max() - std::numeric_limits<T>::min()) / Hist::nbins() << std::endl;
0114   std::cout << "HistoContainer Runtime sized " << (int)(offsetof(HistR, off)) << ' ' << HistR::nbins() << ' '
0115             << HistR::totbins() << ' ' << HistR{}.capacity() << ' ' << offsetof(HistR, content) - offsetof(HistR, off)
0116             << ' ' << (std::numeric_limits<T>::max() - std::numeric_limits<T>::min()) / HistR::nbins() << std::endl;
0117 
0118   // Offsets for each histogram.
0119   auto offsets = make_host_buffer<uint32_t[]>(queue, nParts + 1);
0120   auto offsets_d = make_device_buffer<uint32_t[]>(queue, nParts + 1);
0121 
0122   // Compile sized histogram (self contained)
0123   auto h = make_host_buffer<Hist>(queue);
0124   auto h_d = make_device_buffer<Hist>(queue);
0125 
0126   // Run time sized histogram
0127   auto hr = make_host_buffer<HistR>(queue);
0128   // Data storage for histogram content (host)
0129   auto hd = make_host_buffer<typename HistR::index_type[]>(queue, N);
0130   auto hr_d = make_device_buffer<HistR>(queue);
0131   // Data storage for histogram content (device)
0132   auto hd_d = make_device_buffer<typename HistR::index_type[]>(queue, N);
0133 
0134   // We iterate the test 5 times.
0135   for (int it = 0; it < 5; ++it) {
0136     offsets[0] = 0;
0137     for (uint32_t j = 1; j < nParts + 1; ++j) {
0138       offsets[j] = offsets[j - 1] + partSize - 3 * j;
0139       ALPAKA_ASSERT_ACC(offsets[j] <= N);
0140     }
0141 
0142     if (it == 1) {  // special cases...
0143       offsets[0] = 0;
0144       offsets[1] = 0;
0145       offsets[2] = 19;
0146       offsets[3] = 32 + offsets[2];
0147       offsets[4] = 123 + offsets[3];
0148       offsets[5] = 256 + offsets[4];
0149       offsets[6] = 311 + offsets[5];
0150       offsets[7] = 2111 + offsets[6];
0151       offsets[8] = 256 * 11 + offsets[7];
0152       offsets[9] = 44 + offsets[8];
0153       offsets[10] = 3297 + offsets[9];
0154     }
0155 
0156     alpaka::memcpy(queue, offsets_d, offsets);
0157 
0158     for (long long j = 0; j < N; j++)
0159       v[j] = rgen(eng);
0160 
0161     if (it == 2) {  // big bin
0162       for (long long j = 1000; j < 2000; j++)
0163         v[j] = sizeof(T) == 1 ? 22 : 3456;
0164     }
0165 
0166     // for(unsigned int i=0;i<N;i++)
0167     // {
0168     //   std::cout << "values>" << v[i] << std::endl;
0169     // }
0170     alpaka::memcpy(queue, v_d, v);
0171 
0172     alpaka::memset(queue, h_d, 0);
0173     alpaka::memset(queue, hr_d, 0);
0174     alpaka::memset(queue, hd_d, 0);
0175 
0176     alpaka::wait(queue);
0177 
0178     std::cout << "Calling fillManyFromVector - " << h->size() << std::endl;
0179     fillManyFromVector<Acc1D>(h_d.data(), nParts, v_d.data(), offsets_d.data(), offsets[10], 256, queue);
0180 
0181     std::cout << "Calling fillManyFromVector(runtime sized) - " << h->size() << std::endl;
0182     typename HistR::View hrv_d;
0183     hrv_d.assoc = hr_d.data();
0184     hrv_d.offSize = -1;
0185     hrv_d.offStorage = nullptr;
0186     hrv_d.contentSize = N;
0187     hrv_d.contentStorage = hd_d.data();
0188     fillManyFromVector<Acc1D>(hr_d.data(), hrv_d, nParts, v_d.data(), offsets_d.data(), offsets[10], 256, queue);
0189 
0190     alpaka::memcpy(queue, h, h_d);
0191     // For the runtime sized version:
0192     // We need the histogram for non external data (here, the offsets)
0193     // .. and external data storage (here, the contents)
0194     // .. and plug the data storage address into the histo container
0195     alpaka::memcpy(queue, hr, hr_d);
0196     alpaka::memcpy(queue, hd, hd_d);
0197 
0198     // std::cout << "Calling fillManyFromVector - " <<  h->size() << std::endl;
0199     alpaka::wait(queue);
0200 
0201     // We cannot update the contents address of the histo container before the copy from device happened
0202     typename HistR::View hrv;
0203     hrv.assoc = hr.data();
0204     hrv.offSize = -1;
0205     hrv.offStorage = nullptr;
0206     hrv.contentSize = N;
0207     hrv.contentStorage = hd.data();
0208     hr->initStorage(hrv);
0209 
0210     std::cout << "Copied results" << std::endl;
0211     // for(int i =0;i<=10;i++)
0212     // {
0213     //   std::cout << offsets[i] <<" - "<< h->size() << std::endl;
0214     // }
0215 
0216     ALPAKA_ASSERT_ACC(0 == h->off[0]);
0217     ALPAKA_ASSERT_ACC(offsets[10] == h->size());
0218     ALPAKA_ASSERT_ACC(0 == hr->off[0]);
0219     ALPAKA_ASSERT_ACC(offsets[10] == hr->size());
0220 
0221     auto verify = [&](uint32_t i, uint32_t k, uint32_t t1, uint32_t t2) {
0222       ALPAKA_ASSERT_ACC(t1 < N);
0223       ALPAKA_ASSERT_ACC(t2 < N);
0224       if (T(v[t1] - v[t2]) <= 0)
0225         std::cout << "for " << i << ':' << v[k] << " failed " << v[t1] << ' ' << v[t2] << std::endl;
0226     };
0227 
0228     auto incr = [](auto& k) { return k = (k + 1) % Hist::nbins(); };
0229 
0230     // make sure it spans 3 bins...
0231     auto window = T(1300);
0232     checkContents<T>(h.data(), N, verify, incr, window, nParts, v.data(), offsets.data());
0233     checkContents<T>(hr.data(), N, verify, incr, window, nParts, v.data(), offsets.data());
0234   }
0235 
0236   return 0;
0237 }
0238 
0239 TEST_CASE("Standard checks of " ALPAKA_TYPE_ALIAS_NAME(alpakaTestHistoContainer), s_tag) {
0240   SECTION("HistoContainerKernel") {
0241     auto const& host = cms::alpakatools::host();
0242 
0243     // get the list of devices on the current platform
0244     auto const& devices = cms::alpakatools::devices<Platform>();
0245     if (devices.empty()) {
0246       FAIL("No devices available for the " EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE) " backend, "
0247            "the test will be skipped.\n");
0248     }
0249 
0250     // run the test on each device
0251     for (auto const& device : devices) {
0252       std::cout << "Test Histo Container on " << alpaka::getName(device) << '\n';
0253       auto queue = Queue(device);
0254 
0255       REQUIRE(go<int16_t>(host, device, queue) == 0);
0256       REQUIRE(go<int8_t>(host, device, queue) == 0);
0257     }
0258   }
0259 }