Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:05:16

0001 #include <cassert>
0002 #include <cstdlib>
0003 #include <memory>
0004 
0005 #include <hip/hip_runtime.h>
0006 
0007 #include <Eigen/Core>
0008 #include <Eigen/Dense>
0009 
0010 #include "DataFormats/SoATemplate/interface/SoALayout.h"
0011 #include "DataFormats/SoATemplate/interface/SoAView.h"
0012 #include "HeterogeneousCore/ROCmUtilities/interface/hipCheck.h"
0013 #include "HeterogeneousCore/ROCmUtilities/interface/requireDevices.h"
0014 
0015 // Test SoA stores and view.
0016 // Use cases
0017 // Multiple stores in a buffer
0018 // Scalars, Columns of scalars and of Eigen vectors
0019 // View to each of them, from one and multiple stores.
0020 
0021 GENERATE_SOA_LAYOUT(SoAHostDeviceLayoutTemplate,
0022                     /*SoAHostDeviceViewTemplate,*/
0023                     // predefined static scalars
0024                     // size_t size;
0025                     // size_t alignment;
0026 
0027                     // columns: one value per element
0028                     SOA_COLUMN(double, x),
0029                     SOA_COLUMN(double, y),
0030                     SOA_COLUMN(double, z),
0031                     SOA_EIGEN_COLUMN(Eigen::Vector3d, a),
0032                     SOA_EIGEN_COLUMN(Eigen::Vector3d, b),
0033                     SOA_EIGEN_COLUMN(Eigen::Vector3d, r),
0034                     // scalars: one value for the whole structure
0035                     SOA_SCALAR(const char*, description),
0036                     SOA_SCALAR(uint32_t, someNumber))
0037 
0038 using SoAHostDeviceLayout = SoAHostDeviceLayoutTemplate<>;
0039 using SoAHostDeviceView = SoAHostDeviceLayout::View;
0040 using SoAHostDeviceRangeCheckingView =
0041     SoAHostDeviceLayout::ViewTemplate<cms::soa::RestrictQualify::enabled, cms::soa::RangeChecking::enabled>;
0042 using SoAHostDeviceConstView = SoAHostDeviceLayout::ConstView;
0043 
0044 GENERATE_SOA_LAYOUT(SoADeviceOnlyLayoutTemplate,
0045                     /*SoADeviceOnlyViewTemplate,*/
0046                     SOA_COLUMN(uint16_t, color),
0047                     SOA_COLUMN(double, value),
0048                     SOA_COLUMN(double*, py),
0049                     SOA_COLUMN(uint32_t, count),
0050                     SOA_COLUMN(uint32_t, anotherCount))
0051 
0052 using SoADeviceOnlyLayout = SoADeviceOnlyLayoutTemplate<>;
0053 using SoADeviceOnlyView = SoADeviceOnlyLayout::View;
0054 
0055 // A 1 to 1 view of the store (except for unsupported types).
0056 GENERATE_SOA_VIEW(SoAFullDeviceConstViewTemplate,
0057                   SoAFullDeviceViewTemplate,
0058                   SOA_VIEW_LAYOUT_LIST(SOA_VIEW_LAYOUT(SoAHostDeviceLayout, soaHD),
0059                                        SOA_VIEW_LAYOUT(SoADeviceOnlyLayout, soaDO)),
0060                   SOA_VIEW_VALUE_LIST(SOA_VIEW_VALUE(soaHD, x),
0061                                       SOA_VIEW_VALUE(soaHD, y),
0062                                       SOA_VIEW_VALUE(soaHD, z),
0063                                       SOA_VIEW_VALUE(soaDO, color),
0064                                       SOA_VIEW_VALUE(soaDO, value),
0065                                       SOA_VIEW_VALUE(soaDO, py),
0066                                       SOA_VIEW_VALUE(soaDO, count),
0067                                       SOA_VIEW_VALUE(soaDO, anotherCount),
0068                                       SOA_VIEW_VALUE(soaHD, description),
0069                                       SOA_VIEW_VALUE(soaHD, someNumber)))
0070 
0071 using SoAFullDeviceView =
0072     SoAFullDeviceViewTemplate<cms::soa::CacheLineSize::NvidiaGPU, cms::soa::AlignmentEnforcement::enforced>;
0073 
0074 // These SoAs validate that the generating macros do not get confused in the special case where there are
0075 // no columns and only scalar elements in the SoA.
0076 GENERATE_SOA_LAYOUT(TestSoALayoutNoColumn, SOA_SCALAR(double, r))
0077 GENERATE_SOA_LAYOUT(TestSoALayoutNoColumn2, SOA_SCALAR(double, r), SOA_SCALAR(double, r2))
0078 
0079 // Eigen cross product kernel (on store)
0080 __global__ void crossProduct(SoAHostDeviceView soa, const int numElements) {
0081   int i = blockIdx.x * blockDim.x + threadIdx.x;
0082   if (i >= numElements)
0083     return;
0084   auto si = soa[i];
0085   si.r() = si.a().cross(si.b());
0086 }
0087 
0088 // Device-only producer kernel
0089 __global__ void producerKernel(SoAFullDeviceView soa, const int numElements) {
0090   int i = blockIdx.x * blockDim.x + threadIdx.x;
0091   if (i >= numElements)
0092     return;
0093   auto si = soa[i];
0094   si.color() &= 0x55 << i % (sizeof(si.color()) - sizeof(char));
0095   si.value() = sqrt(si.x() * si.x() + si.y() * si.y() + si.z() * si.z());
0096 }
0097 
0098 // Device-only consumer with result in host-device area
0099 __global__ void consumerKernel(SoAFullDeviceView soa, const int numElements) {
0100   int i = blockIdx.x * blockDim.x + threadIdx.x;
0101   if (i >= numElements)
0102     return;
0103   auto si = soa[i];
0104   si.x() = si.color() * si.value();
0105 }
0106 
0107 // Get a view like the default, except for range checking
0108 using RangeCheckingHostDeviceView =
0109     SoAHostDeviceLayout::ViewTemplate<SoAHostDeviceView::restrictQualify, cms::soa::RangeChecking::enabled>;
0110 
0111 // We expect to just run one thread.
0112 __global__ void rangeCheckKernel(RangeCheckingHostDeviceView soa) {
0113   printf("About to fail range-check (operator[]) in ROCm thread: %d\n", (int)threadIdx.x);
0114   [[maybe_unused]] auto si = soa[soa.metadata().size()];
0115   printf("Fail: range-check failure should have stopped the kernel.\n");
0116 }
0117 
0118 int main(void) {
0119   cms::rocmtest::requireDevices();
0120 
0121   hipStream_t stream;
0122   hipCheck(hipStreamCreateWithFlags(&stream, hipStreamNonBlocking));
0123 
0124   // Non-aligned number of elements to check alignment features.
0125   constexpr unsigned int numElements = 65537;
0126 
0127   // Allocate buffer and store on host
0128   size_t hostDeviceSize = SoAHostDeviceLayout::computeDataSize(numElements);
0129   std::byte* h_buf = nullptr;
0130   hipCheck(hipHostMalloc((void**)&h_buf, hostDeviceSize));
0131   SoAHostDeviceLayout h_soahdLayout(h_buf, numElements);
0132   SoAHostDeviceView h_soahd(h_soahdLayout);
0133 
0134   // Validation of range checking variants initialization
0135   SoAHostDeviceRangeCheckingView h_soahdrc(h_soahdLayout);
0136   [[maybe_unused]] SoAHostDeviceRangeCheckingView h_soahdrc2 = h_soahdLayout;
0137   [[maybe_unused]] SoAHostDeviceRangeCheckingView h_soahdrc3{h_soahd};
0138   [[maybe_unused]] SoAHostDeviceRangeCheckingView h_soahdrc4 = h_soahd;
0139   SoAHostDeviceConstView h_soahd_c(h_soahdLayout);
0140 
0141   // Alocate buffer, stores and views on the device (single, shared buffer).
0142   size_t deviceOnlySize = SoADeviceOnlyLayout::computeDataSize(numElements);
0143   std::byte* d_buf = nullptr;
0144   hipCheck(hipHostMalloc((void**)&d_buf, hostDeviceSize + deviceOnlySize));
0145   SoAHostDeviceLayout d_soahdLayout(d_buf, numElements);
0146   SoADeviceOnlyLayout d_soadoLayout(d_soahdLayout.metadata().nextByte(), numElements);
0147   SoAHostDeviceView d_soahdView(d_soahdLayout);
0148   SoAFullDeviceView d_soaFullView(d_soahdLayout, d_soadoLayout);
0149 
0150   // Assert column alignments
0151   assert(0 == reinterpret_cast<uintptr_t>(h_soahd.metadata().addressOf_x()) % decltype(h_soahd)::alignment);
0152   assert(0 == reinterpret_cast<uintptr_t>(h_soahd.metadata().addressOf_y()) % decltype(h_soahd)::alignment);
0153   assert(0 == reinterpret_cast<uintptr_t>(h_soahd.metadata().addressOf_z()) % decltype(h_soahd)::alignment);
0154   assert(0 == reinterpret_cast<uintptr_t>(h_soahd.metadata().addressOf_a()) % decltype(h_soahd)::alignment);
0155   assert(0 == reinterpret_cast<uintptr_t>(h_soahd.metadata().addressOf_b()) % decltype(h_soahd)::alignment);
0156   assert(0 == reinterpret_cast<uintptr_t>(h_soahd.metadata().addressOf_r()) % decltype(h_soahd)::alignment);
0157   assert(0 == reinterpret_cast<uintptr_t>(h_soahd.metadata().addressOf_description()) % decltype(h_soahd)::alignment);
0158   assert(0 == reinterpret_cast<uintptr_t>(h_soahd.metadata().addressOf_someNumber()) % decltype(h_soahd)::alignment);
0159 
0160   assert(0 == reinterpret_cast<uintptr_t>(d_soahdLayout.metadata().addressOf_x()) % decltype(d_soahdLayout)::alignment);
0161   assert(0 == reinterpret_cast<uintptr_t>(d_soahdLayout.metadata().addressOf_y()) % decltype(d_soahdLayout)::alignment);
0162   assert(0 == reinterpret_cast<uintptr_t>(d_soahdLayout.metadata().addressOf_z()) % decltype(d_soahdLayout)::alignment);
0163   assert(0 == reinterpret_cast<uintptr_t>(d_soahdLayout.metadata().addressOf_a()) % decltype(d_soahdLayout)::alignment);
0164   assert(0 == reinterpret_cast<uintptr_t>(d_soahdLayout.metadata().addressOf_b()) % decltype(d_soahdLayout)::alignment);
0165   assert(0 == reinterpret_cast<uintptr_t>(d_soahdLayout.metadata().addressOf_r()) % decltype(d_soahdLayout)::alignment);
0166   assert(0 == reinterpret_cast<uintptr_t>(d_soahdLayout.metadata().addressOf_description()) %
0167                   decltype(d_soahdLayout)::alignment);
0168   assert(0 == reinterpret_cast<uintptr_t>(d_soahdLayout.metadata().addressOf_someNumber()) %
0169                   decltype(d_soahdLayout)::alignment);
0170 
0171   assert(0 ==
0172          reinterpret_cast<uintptr_t>(d_soadoLayout.metadata().addressOf_color()) % decltype(d_soadoLayout)::alignment);
0173   assert(0 ==
0174          reinterpret_cast<uintptr_t>(d_soadoLayout.metadata().addressOf_value()) % decltype(d_soadoLayout)::alignment);
0175   assert(0 ==
0176          reinterpret_cast<uintptr_t>(d_soadoLayout.metadata().addressOf_py()) % decltype(d_soadoLayout)::alignment);
0177   assert(0 ==
0178          reinterpret_cast<uintptr_t>(d_soadoLayout.metadata().addressOf_count()) % decltype(d_soadoLayout)::alignment);
0179   assert(0 == reinterpret_cast<uintptr_t>(d_soadoLayout.metadata().addressOf_anotherCount()) %
0180                   decltype(d_soadoLayout)::alignment);
0181 
0182   // Views should get the same alignment as the stores they refer to
0183   assert(0 == reinterpret_cast<uintptr_t>(d_soaFullView.metadata().addressOf_x()) % decltype(d_soaFullView)::alignment);
0184   assert(0 == reinterpret_cast<uintptr_t>(d_soaFullView.metadata().addressOf_y()) % decltype(d_soaFullView)::alignment);
0185   assert(0 == reinterpret_cast<uintptr_t>(d_soaFullView.metadata().addressOf_z()) % decltype(d_soaFullView)::alignment);
0186   // Limitation of views: we have to get scalar member addresses via metadata.
0187   assert(0 == reinterpret_cast<uintptr_t>(d_soaFullView.metadata().addressOf_description()) %
0188                   decltype(d_soaFullView)::alignment);
0189   assert(0 == reinterpret_cast<uintptr_t>(d_soaFullView.metadata().addressOf_someNumber()) %
0190                   decltype(d_soaFullView)::alignment);
0191   assert(0 ==
0192          reinterpret_cast<uintptr_t>(d_soaFullView.metadata().addressOf_color()) % decltype(d_soaFullView)::alignment);
0193   assert(0 ==
0194          reinterpret_cast<uintptr_t>(d_soaFullView.metadata().addressOf_value()) % decltype(d_soaFullView)::alignment);
0195   assert(0 ==
0196          reinterpret_cast<uintptr_t>(d_soaFullView.metadata().addressOf_py()) % decltype(d_soaFullView)::alignment);
0197   assert(0 ==
0198          reinterpret_cast<uintptr_t>(d_soaFullView.metadata().addressOf_count()) % decltype(d_soaFullView)::alignment);
0199   assert(0 == reinterpret_cast<uintptr_t>(d_soaFullView.metadata().addressOf_anotherCount()) %
0200                   decltype(d_soaFullView)::alignment);
0201 
0202   // Initialize and fill the host buffer
0203   std::memset(h_soahdLayout.metadata().data(), 0, hostDeviceSize);
0204   for (size_t i = 0; i < numElements; ++i) {
0205     auto si = h_soahd[i];
0206     // Tuple assignment...
0207     // elements are: x, y, z, a, b, r
0208     auto v1 = 1.0 * i + 1.0;
0209     auto v2 = 2.0 * i;
0210     auto v3 = 3.0 * i - 1.0;
0211     if (i % 2) {
0212       si = {v1, v2, v3, {v1, v2, v3}, {v3, v2, v1}, {0, 0, 0}};
0213     } else {
0214       si.x() = si.a()(0) = si.b()(2) = v1;
0215       si.y() = si.a()(1) = si.b()(1) = v2;
0216       si.z() = si.a()(2) = si.b()(0) = v3;
0217     }
0218   }
0219   auto& sn = h_soahd.someNumber();
0220   sn = numElements + 2;
0221 
0222   // Push to device
0223   hipCheck(hipMemcpyAsync(d_buf, h_buf, hostDeviceSize, hipMemcpyDefault, stream));
0224 
0225   // Process on device
0226   crossProduct<<<(numElements + 255) / 256, 256, 0, stream>>>(d_soahdView, numElements);
0227 
0228   // Paint the device only with 0xFF initially
0229   hipCheck(hipMemsetAsync(d_soadoLayout.metadata().data(), 0xFF, d_soadoLayout.metadata().byteSize(), stream));
0230 
0231   // Produce to the device only area
0232   producerKernel<<<(numElements + 255) / 256, 256, 0, stream>>>(d_soaFullView, numElements);
0233 
0234   // Consume the device only area and generate a result on the host-device area
0235   consumerKernel<<<(numElements + 255) / 256, 256, 0, stream>>>(d_soaFullView, numElements);
0236 
0237   // Get result back
0238   hipCheck(hipMemcpyAsync(h_buf, d_buf, hostDeviceSize, hipMemcpyDefault, stream));
0239 
0240   // Wait and validate.
0241   hipCheck(hipStreamSynchronize(stream));
0242   for (size_t i = 0; i < numElements; ++i) {
0243     auto si = h_soahd_c[i];
0244     assert(si.r() == si.a().cross(si.b()));
0245     double initialX = 1.0 * i + 1.0;
0246     double initialY = 2.0 * i;
0247     double initialZ = 3.0 * i - 1.0;
0248     uint16_t expectedColor = 0x55 << i % (sizeof(uint16_t) - sizeof(char));
0249     double expectedX = expectedColor * sqrt(initialX * initialX + initialY * initialY + initialZ * initialZ);
0250     if (abs(si.x() - expectedX) / expectedX >= 2 * std::numeric_limits<double>::epsilon()) {
0251       std::cout << "X failed: for i=" << i << std::endl
0252                 << "initialX=" << initialX << " initialY=" << initialY << " initialZ=" << initialZ << std::endl
0253                 << "expectedX=" << expectedX << std::endl
0254                 << "resultX=" << si.x() << " resultY=" << si.y() << " resultZ=" << si.z() << std::endl
0255                 << "relativeDiff=" << abs(si.x() - expectedX) / expectedX
0256                 << " epsilon=" << std::numeric_limits<double>::epsilon() << std::endl;
0257       assert(false);
0258     }
0259   }
0260 
0261   {
0262     // Get a view like the default, except for range checking (direct initialization from layout)
0263     SoAHostDeviceRangeCheckingView soa1viewRangeChecking(h_soahdLayout);
0264     try {
0265       [[maybe_unused]] auto si = soa1viewRangeChecking[soa1viewRangeChecking.metadata().size()];
0266       std::cout << "Fail: expected range-check exception (view-level index access) not caught on the host (overflow)."
0267                 << std::endl;
0268       assert(false);
0269     } catch (const std::out_of_range&) {
0270     }
0271     try {
0272       [[maybe_unused]] auto si = soa1viewRangeChecking[-1];
0273       std::cout << "Fail: expected range-check exception (view-level index access) not caught on the host (underflow)."
0274                 << std::endl;
0275       assert(false);
0276     } catch (const std::out_of_range&) {
0277     }
0278     [[maybe_unused]] auto si = soa1viewRangeChecking[soa1viewRangeChecking.metadata().size() - 1];
0279     [[maybe_unused]] auto si2 = soa1viewRangeChecking[0];
0280     std::cout << "Pass: expected range-check exceptions (view-level index access) successfully caught on the host "
0281                  "(layout initialization)."
0282               << std::endl;
0283   }
0284 
0285   {
0286     // Validation of view initialized range checking view initialization
0287     try {
0288       [[maybe_unused]] auto si = h_soahdrc3[h_soahdrc3.metadata().size()];
0289       std::cout << "Fail: expected range-check exception (view-level index access) not caught on the host (overflow)."
0290                 << std::endl;
0291       assert(false);
0292     } catch (const std::out_of_range&) {
0293     }
0294     try {
0295       [[maybe_unused]] auto si = h_soahdrc3[-1];
0296       std::cout << "Fail: expected range-check exception (view-level index access) not caught on the host (underflow)."
0297                 << std::endl;
0298       assert(false);
0299     } catch (const std::out_of_range&) {
0300     }
0301     [[maybe_unused]] auto si = h_soahdrc3[h_soahdrc3.metadata().size() - 1];
0302     [[maybe_unused]] auto si2 = h_soahdrc3[0];
0303     std::cout << "Pass: expected range-check exceptions (view-level index access) successfully caught on the host "
0304                  "(view initialization)."
0305               << std::endl;
0306   }
0307 
0308   // Validation of range checking in a kernel
0309   // Get a view like the default one, except for range checking
0310   RangeCheckingHostDeviceView soa1viewRangeChecking(d_soahdLayout);
0311 
0312   // This should throw an exception in the kernel
0313   rangeCheckKernel<<<1, 1, 0, stream>>>(soa1viewRangeChecking);
0314 
0315   // Wait and confirm that the ROCm kernel failed
0316   try {
0317     hipCheck(hipStreamSynchronize(stream));
0318     std::cout << "Fail: expected range-check exception not caught while executing the kernel." << std::endl;
0319     assert(false);
0320   } catch (const std::runtime_error&) {
0321     std::cout << "Pass: expected range-check exception caught while executing the kernel." << std::endl;
0322   }
0323 
0324   std::cout << "OK" << std::endl;
0325 }