Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2025-06-26 23:26:40

0001 #include <cassert>
0002 #include <cstdlib>
0003 #include <memory>
0004 
0005 #include <hip/hip_runtime.h>
0006 
0007 #include <Eigen/Core>
0008 #include <Eigen/Dense>
0009 
0010 #include "DataFormats/SoATemplate/interface/SoALayout.h"
0011 #include "HeterogeneousCore/ROCmUtilities/interface/hipCheck.h"
0012 #include "HeterogeneousCore/ROCmUtilities/interface/requireDevices.h"
0013 
0014 // Test SoA stores and view.
0015 // Use cases
0016 // Multiple stores in a buffer
0017 // Scalars, Columns of scalars and of Eigen vectors
0018 // View to each of them, from one and multiple stores.
0019 
0020 GENERATE_SOA_LAYOUT(SoAHostDeviceLayoutTemplate,
0021                     /*SoAHostDeviceViewTemplate,*/
0022                     // predefined static scalars
0023                     // size_t size;
0024                     // size_t alignment;
0025 
0026                     // columns: one value per element
0027                     SOA_COLUMN(double, x),
0028                     SOA_COLUMN(double, y),
0029                     SOA_COLUMN(double, z),
0030                     SOA_EIGEN_COLUMN(Eigen::Vector3d, a),
0031                     SOA_EIGEN_COLUMN(Eigen::Vector3d, b),
0032                     SOA_EIGEN_COLUMN(Eigen::Vector3d, r),
0033                     // scalars: one value for the whole structure
0034                     SOA_SCALAR(const char*, description),
0035                     SOA_SCALAR(uint32_t, someNumber))
0036 
0037 using SoAHostDeviceLayout = SoAHostDeviceLayoutTemplate<>;
0038 using SoAHostDeviceView = SoAHostDeviceLayout::View;
0039 using SoAHostDeviceRangeCheckingView =
0040     SoAHostDeviceLayout::ViewTemplate<cms::soa::RestrictQualify::enabled, cms::soa::RangeChecking::enabled>;
0041 using SoAHostDeviceConstView = SoAHostDeviceLayout::ConstView;
0042 
0043 GENERATE_SOA_LAYOUT(SoADeviceOnlyLayoutTemplate,
0044                     /*SoADeviceOnlyViewTemplate,*/
0045                     SOA_COLUMN(uint16_t, color),
0046                     SOA_COLUMN(double, value),
0047                     SOA_COLUMN(double*, py),
0048                     SOA_COLUMN(uint32_t, count),
0049                     SOA_COLUMN(uint32_t, anotherCount))
0050 
0051 using SoADeviceOnlyLayout = SoADeviceOnlyLayoutTemplate<>;
0052 using SoADeviceOnlyView = SoADeviceOnlyLayout::View;
0053 
0054 GENERATE_SOA_LAYOUT(SoAFullDeviceLayoutTemplate,
0055                     SOA_COLUMN(double, x),
0056                     SOA_COLUMN(double, y),
0057                     SOA_COLUMN(double, z),
0058                     SOA_COLUMN(uint16_t, color),
0059                     SOA_COLUMN(double, value),
0060                     SOA_COLUMN(double*, py),
0061                     SOA_COLUMN(uint32_t, count),
0062                     SOA_COLUMN(uint32_t, anotherCount),
0063                     SOA_SCALAR(const char*, description),
0064                     SOA_SCALAR(uint32_t, someNumber))
0065 
0066 using SoAFullDeviceLayout =
0067     SoAFullDeviceLayoutTemplate<cms::soa::CacheLineSize::NvidiaGPU, cms::soa::AlignmentEnforcement::enforced>;
0068 using SoAFullDeviceView = SoAFullDeviceLayout::View;
0069 using SoAFullDeviceConstView = SoAFullDeviceLayout::ConstView;
0070 
0071 // These SoAs validate that the generating macros do not get confused in the special case where there are
0072 // no columns and only scalar elements in the SoA.
0073 GENERATE_SOA_LAYOUT(TestSoALayoutNoColumn, SOA_SCALAR(double, r))
0074 GENERATE_SOA_LAYOUT(TestSoALayoutNoColumn2, SOA_SCALAR(double, r), SOA_SCALAR(double, r2))
0075 
0076 // Eigen cross product kernel (on store)
0077 __global__ void crossProduct(SoAHostDeviceView soa, const int numElements) {
0078   int i = blockIdx.x * blockDim.x + threadIdx.x;
0079   if (i >= numElements)
0080     return;
0081   auto si = soa[i];
0082   si.r() = si.a().cross(si.b());
0083 }
0084 
0085 // Device-only producer kernel
0086 __global__ void producerKernel(SoAFullDeviceView soa, const int numElements) {
0087   int i = blockIdx.x * blockDim.x + threadIdx.x;
0088   if (i >= numElements)
0089     return;
0090   auto si = soa[i];
0091   si.color() &= 0x55 << i % (sizeof(si.color()) - sizeof(char));
0092   si.value() = sqrt(si.x() * si.x() + si.y() * si.y() + si.z() * si.z());
0093 }
0094 
0095 // Device-only consumer with result in host-device area
0096 __global__ void consumerKernel(SoAFullDeviceView soa, const int numElements) {
0097   int i = blockIdx.x * blockDim.x + threadIdx.x;
0098   if (i >= numElements)
0099     return;
0100   auto si = soa[i];
0101   si.x() = si.color() * si.value();
0102 }
0103 
0104 // Get a view like the default, except for range checking
0105 using RangeCheckingHostDeviceView =
0106     SoAHostDeviceLayout::ViewTemplate<SoAHostDeviceView::restrictQualify, cms::soa::RangeChecking::enabled>;
0107 
0108 // We expect to just run one thread.
0109 __global__ void rangeCheckKernel(RangeCheckingHostDeviceView soa) {
0110   printf("About to fail range-check (operator[]) in ROCm thread: %d\n", (int)threadIdx.x);
0111   [[maybe_unused]] auto si = soa[soa.metadata().size()];
0112   printf("Fail: range-check failure should have stopped the kernel.\n");
0113 }
0114 
0115 int main(void) {
0116   cms::rocmtest::requireDevices();
0117 
0118   hipStream_t stream;
0119   hipCheck(hipStreamCreateWithFlags(&stream, hipStreamNonBlocking));
0120 
0121   // Non-aligned number of elements to check alignment features.
0122   constexpr unsigned int numElements = 65537;
0123 
0124   // Allocate buffer and store on host
0125   size_t hostDeviceSize = SoAHostDeviceLayout::computeDataSize(numElements);
0126   std::byte* h_buf = nullptr;
0127   hipCheck(hipHostMalloc((void**)&h_buf, hostDeviceSize));
0128   SoAHostDeviceLayout h_soahdLayout(h_buf, numElements);
0129   SoAHostDeviceView h_soahd(h_soahdLayout);
0130 
0131   // Validation of range checking variants initialization
0132   SoAHostDeviceRangeCheckingView h_soahdrc(h_soahdLayout);
0133   [[maybe_unused]] SoAHostDeviceRangeCheckingView h_soahdrc2 = h_soahdLayout;
0134   [[maybe_unused]] SoAHostDeviceRangeCheckingView h_soahdrc3{h_soahd};
0135   [[maybe_unused]] SoAHostDeviceRangeCheckingView h_soahdrc4 = h_soahd;
0136   SoAHostDeviceConstView h_soahd_c(h_soahdLayout);
0137 
0138   // Alocate buffer, stores and views on the device (single, shared buffer).
0139   size_t deviceOnlySize = SoADeviceOnlyLayout::computeDataSize(numElements);
0140   std::byte* d_buf = nullptr;
0141   hipCheck(hipHostMalloc((void**)&d_buf, hostDeviceSize + deviceOnlySize));
0142   SoAHostDeviceLayout d_soahdLayout(d_buf, numElements);
0143   SoADeviceOnlyLayout d_soadoLayout(d_soahdLayout.metadata().nextByte(), numElements);
0144   SoAHostDeviceView d_soahdView(d_soahdLayout);
0145   SoADeviceOnlyView d_soadoView(d_soadoLayout);
0146   const auto d_soahdRecords = d_soahdView.records();
0147   const auto d_soadoRecords = d_soadoView.records();
0148   SoAFullDeviceView d_soaFullView(d_soahdRecords.x(),
0149                                   d_soahdRecords.y(),
0150                                   d_soahdRecords.z(),
0151                                   d_soadoRecords.color(),
0152                                   d_soadoRecords.value(),
0153                                   d_soadoRecords.py(),
0154                                   d_soadoRecords.count(),
0155                                   d_soadoRecords.anotherCount(),
0156                                   d_soahdRecords.description(),
0157                                   d_soahdRecords.someNumber());
0158 
0159   // Assert column alignments
0160   assert(0 == reinterpret_cast<uintptr_t>(h_soahd.metadata().addressOf_x()) % decltype(h_soahd)::alignment);
0161   assert(0 == reinterpret_cast<uintptr_t>(h_soahd.metadata().addressOf_y()) % decltype(h_soahd)::alignment);
0162   assert(0 == reinterpret_cast<uintptr_t>(h_soahd.metadata().addressOf_z()) % decltype(h_soahd)::alignment);
0163   assert(0 == reinterpret_cast<uintptr_t>(h_soahd.metadata().addressOf_a()) % decltype(h_soahd)::alignment);
0164   assert(0 == reinterpret_cast<uintptr_t>(h_soahd.metadata().addressOf_b()) % decltype(h_soahd)::alignment);
0165   assert(0 == reinterpret_cast<uintptr_t>(h_soahd.metadata().addressOf_r()) % decltype(h_soahd)::alignment);
0166   assert(0 == reinterpret_cast<uintptr_t>(h_soahd.metadata().addressOf_description()) % decltype(h_soahd)::alignment);
0167   assert(0 == reinterpret_cast<uintptr_t>(h_soahd.metadata().addressOf_someNumber()) % decltype(h_soahd)::alignment);
0168 
0169   assert(0 == reinterpret_cast<uintptr_t>(d_soahdLayout.metadata().addressOf_x()) % decltype(d_soahdLayout)::alignment);
0170   assert(0 == reinterpret_cast<uintptr_t>(d_soahdLayout.metadata().addressOf_y()) % decltype(d_soahdLayout)::alignment);
0171   assert(0 == reinterpret_cast<uintptr_t>(d_soahdLayout.metadata().addressOf_z()) % decltype(d_soahdLayout)::alignment);
0172   assert(0 == reinterpret_cast<uintptr_t>(d_soahdLayout.metadata().addressOf_a()) % decltype(d_soahdLayout)::alignment);
0173   assert(0 == reinterpret_cast<uintptr_t>(d_soahdLayout.metadata().addressOf_b()) % decltype(d_soahdLayout)::alignment);
0174   assert(0 == reinterpret_cast<uintptr_t>(d_soahdLayout.metadata().addressOf_r()) % decltype(d_soahdLayout)::alignment);
0175   assert(0 == reinterpret_cast<uintptr_t>(d_soahdLayout.metadata().addressOf_description()) %
0176                   decltype(d_soahdLayout)::alignment);
0177   assert(0 == reinterpret_cast<uintptr_t>(d_soahdLayout.metadata().addressOf_someNumber()) %
0178                   decltype(d_soahdLayout)::alignment);
0179 
0180   assert(0 ==
0181          reinterpret_cast<uintptr_t>(d_soadoLayout.metadata().addressOf_color()) % decltype(d_soadoLayout)::alignment);
0182   assert(0 ==
0183          reinterpret_cast<uintptr_t>(d_soadoLayout.metadata().addressOf_value()) % decltype(d_soadoLayout)::alignment);
0184   assert(0 ==
0185          reinterpret_cast<uintptr_t>(d_soadoLayout.metadata().addressOf_py()) % decltype(d_soadoLayout)::alignment);
0186   assert(0 ==
0187          reinterpret_cast<uintptr_t>(d_soadoLayout.metadata().addressOf_count()) % decltype(d_soadoLayout)::alignment);
0188   assert(0 == reinterpret_cast<uintptr_t>(d_soadoLayout.metadata().addressOf_anotherCount()) %
0189                   decltype(d_soadoLayout)::alignment);
0190 
0191   // Views should get the same alignment as the stores they refer to
0192   assert(0 == reinterpret_cast<uintptr_t>(d_soaFullView.metadata().addressOf_x()) % decltype(d_soaFullView)::alignment);
0193   assert(0 == reinterpret_cast<uintptr_t>(d_soaFullView.metadata().addressOf_y()) % decltype(d_soaFullView)::alignment);
0194   assert(0 == reinterpret_cast<uintptr_t>(d_soaFullView.metadata().addressOf_z()) % decltype(d_soaFullView)::alignment);
0195   // Limitation of views: we have to get scalar member addresses via metadata.
0196   assert(0 == reinterpret_cast<uintptr_t>(d_soaFullView.metadata().addressOf_description()) %
0197                   decltype(d_soaFullView)::alignment);
0198   assert(0 == reinterpret_cast<uintptr_t>(d_soaFullView.metadata().addressOf_someNumber()) %
0199                   decltype(d_soaFullView)::alignment);
0200   assert(0 ==
0201          reinterpret_cast<uintptr_t>(d_soaFullView.metadata().addressOf_color()) % decltype(d_soaFullView)::alignment);
0202   assert(0 ==
0203          reinterpret_cast<uintptr_t>(d_soaFullView.metadata().addressOf_value()) % decltype(d_soaFullView)::alignment);
0204   assert(0 ==
0205          reinterpret_cast<uintptr_t>(d_soaFullView.metadata().addressOf_py()) % decltype(d_soaFullView)::alignment);
0206   assert(0 ==
0207          reinterpret_cast<uintptr_t>(d_soaFullView.metadata().addressOf_count()) % decltype(d_soaFullView)::alignment);
0208   assert(0 == reinterpret_cast<uintptr_t>(d_soaFullView.metadata().addressOf_anotherCount()) %
0209                   decltype(d_soaFullView)::alignment);
0210 
0211   // Initialize and fill the host buffer
0212   std::memset(h_soahdLayout.metadata().data(), 0, hostDeviceSize);
0213   for (size_t i = 0; i < numElements; ++i) {
0214     auto si = h_soahd[i];
0215     // Tuple assignment...
0216     // elements are: x, y, z, a, b, r
0217     auto v1 = 1.0 * i + 1.0;
0218     auto v2 = 2.0 * i;
0219     auto v3 = 3.0 * i - 1.0;
0220     if (i % 2) {
0221       si = {v1, v2, v3, {v1, v2, v3}, {v3, v2, v1}, {0, 0, 0}};
0222     } else {
0223       si.x() = si.a()(0) = si.b()(2) = v1;
0224       si.y() = si.a()(1) = si.b()(1) = v2;
0225       si.z() = si.a()(2) = si.b()(0) = v3;
0226     }
0227   }
0228   auto& sn = h_soahd.someNumber();
0229   sn = numElements + 2;
0230 
0231   // Push to device
0232   hipCheck(hipMemcpyAsync(d_buf, h_buf, hostDeviceSize, hipMemcpyDefault, stream));
0233 
0234   // Process on device
0235   crossProduct<<<(numElements + 255) / 256, 256, 0, stream>>>(d_soahdView, numElements);
0236 
0237   // Paint the device only with 0xFF initially
0238   hipCheck(hipMemsetAsync(d_soadoLayout.metadata().data(), 0xFF, d_soadoLayout.metadata().byteSize(), stream));
0239 
0240   // Produce to the device only area
0241   producerKernel<<<(numElements + 255) / 256, 256, 0, stream>>>(d_soaFullView, numElements);
0242 
0243   // Consume the device only area and generate a result on the host-device area
0244   consumerKernel<<<(numElements + 255) / 256, 256, 0, stream>>>(d_soaFullView, numElements);
0245 
0246   // Get result back
0247   hipCheck(hipMemcpyAsync(h_buf, d_buf, hostDeviceSize, hipMemcpyDefault, stream));
0248 
0249   // Wait and validate.
0250   hipCheck(hipStreamSynchronize(stream));
0251   for (size_t i = 0; i < numElements; ++i) {
0252     auto si = h_soahd_c[i];
0253     assert(si.r() == si.a().cross(si.b()));
0254     double initialX = 1.0 * i + 1.0;
0255     double initialY = 2.0 * i;
0256     double initialZ = 3.0 * i - 1.0;
0257     uint16_t expectedColor = 0x55 << i % (sizeof(uint16_t) - sizeof(char));
0258     double expectedX = expectedColor * sqrt(initialX * initialX + initialY * initialY + initialZ * initialZ);
0259     if (abs(si.x() - expectedX) / expectedX >= 2 * std::numeric_limits<double>::epsilon()) {
0260       std::cout << "X failed: for i=" << i << std::endl
0261                 << "initialX=" << initialX << " initialY=" << initialY << " initialZ=" << initialZ << std::endl
0262                 << "expectedX=" << expectedX << std::endl
0263                 << "resultX=" << si.x() << " resultY=" << si.y() << " resultZ=" << si.z() << std::endl
0264                 << "relativeDiff=" << abs(si.x() - expectedX) / expectedX
0265                 << " epsilon=" << std::numeric_limits<double>::epsilon() << std::endl;
0266       assert(false);
0267     }
0268   }
0269 
0270   {
0271     // Get a view like the default, except for range checking (direct initialization from layout)
0272     SoAHostDeviceRangeCheckingView soa1viewRangeChecking(h_soahdLayout);
0273     try {
0274       [[maybe_unused]] auto si = soa1viewRangeChecking[soa1viewRangeChecking.metadata().size()];
0275       std::cout << "Fail: expected range-check exception (view-level index access) not caught on the host (overflow)."
0276                 << std::endl;
0277       assert(false);
0278     } catch (const std::out_of_range&) {
0279     }
0280     try {
0281       [[maybe_unused]] auto si = soa1viewRangeChecking[-1];
0282       std::cout << "Fail: expected range-check exception (view-level index access) not caught on the host (underflow)."
0283                 << std::endl;
0284       assert(false);
0285     } catch (const std::out_of_range&) {
0286     }
0287     [[maybe_unused]] auto si = soa1viewRangeChecking[soa1viewRangeChecking.metadata().size() - 1];
0288     [[maybe_unused]] auto si2 = soa1viewRangeChecking[0];
0289     std::cout << "Pass: expected range-check exceptions (view-level index access) successfully caught on the host "
0290                  "(layout initialization)."
0291               << std::endl;
0292   }
0293 
0294   {
0295     // Validation of view initialized range checking view initialization
0296     try {
0297       [[maybe_unused]] auto si = h_soahdrc3[h_soahdrc3.metadata().size()];
0298       std::cout << "Fail: expected range-check exception (view-level index access) not caught on the host (overflow)."
0299                 << std::endl;
0300       assert(false);
0301     } catch (const std::out_of_range&) {
0302     }
0303     try {
0304       [[maybe_unused]] auto si = h_soahdrc3[-1];
0305       std::cout << "Fail: expected range-check exception (view-level index access) not caught on the host (underflow)."
0306                 << std::endl;
0307       assert(false);
0308     } catch (const std::out_of_range&) {
0309     }
0310     [[maybe_unused]] auto si = h_soahdrc3[h_soahdrc3.metadata().size() - 1];
0311     [[maybe_unused]] auto si2 = h_soahdrc3[0];
0312     std::cout << "Pass: expected range-check exceptions (view-level index access) successfully caught on the host "
0313                  "(view initialization)."
0314               << std::endl;
0315   }
0316 
0317   // Validation of range checking in a kernel
0318   // Get a view like the default one, except for range checking
0319   RangeCheckingHostDeviceView soa1viewRangeChecking(d_soahdLayout);
0320 
0321   // This should throw an exception in the kernel
0322   rangeCheckKernel<<<1, 1, 0, stream>>>(soa1viewRangeChecking);
0323 
0324   // Wait and confirm that the ROCm kernel failed
0325   try {
0326     hipCheck(hipStreamSynchronize(stream));
0327     std::cout << "Fail: expected range-check exception not caught while executing the kernel." << std::endl;
0328     assert(false);
0329   } catch (const std::runtime_error&) {
0330     std::cout << "Pass: expected range-check exception caught while executing the kernel." << std::endl;
0331   }
0332 
0333   std::cout << "OK" << std::endl;
0334 }