Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2023-10-25 09:40:03

0001 #include <cassert>
0002 #include <cstdlib>
0003 #include <memory>
0004 
0005 #include <hip/hip_runtime.h>
0006 
0007 #include <Eigen/Core>
0008 #include <Eigen/Dense>
0009 
0010 #include "DataFormats/SoATemplate/interface/SoALayout.h"
0011 #include "DataFormats/SoATemplate/interface/SoAView.h"
0012 #include "HeterogeneousCore/ROCmUtilities/interface/hipCheck.h"
0013 #include "HeterogeneousCore/ROCmUtilities/interface/requireDevices.h"
0014 
0015 // Test SoA stores and view.
0016 // Use cases
0017 // Multiple stores in a buffer
0018 // Scalars, Columns of scalars and of Eigen vectors
0019 // View to each of them, from one and multiple stores.
0020 
0021 GENERATE_SOA_LAYOUT(SoAHostDeviceLayoutTemplate,
0022                     /*SoAHostDeviceViewTemplate,*/
0023                     // predefined static scalars
0024                     // size_t size;
0025                     // size_t alignment;
0026 
0027                     // columns: one value per element
0028                     SOA_COLUMN(double, x),
0029                     SOA_COLUMN(double, y),
0030                     SOA_COLUMN(double, z),
0031                     SOA_EIGEN_COLUMN(Eigen::Vector3d, a),
0032                     SOA_EIGEN_COLUMN(Eigen::Vector3d, b),
0033                     SOA_EIGEN_COLUMN(Eigen::Vector3d, r),
0034                     // scalars: one value for the whole structure
0035                     SOA_SCALAR(const char*, description),
0036                     SOA_SCALAR(uint32_t, someNumber))
0037 
0038 using SoAHostDeviceLayout = SoAHostDeviceLayoutTemplate<>;
0039 using SoAHostDeviceView = SoAHostDeviceLayout::View;
0040 using SoAHostDeviceConstView = SoAHostDeviceLayout::ConstView;
0041 
0042 GENERATE_SOA_LAYOUT(SoADeviceOnlyLayoutTemplate,
0043                     /*SoADeviceOnlyViewTemplate,*/
0044                     SOA_COLUMN(uint16_t, color),
0045                     SOA_COLUMN(double, value),
0046                     SOA_COLUMN(double*, py),
0047                     SOA_COLUMN(uint32_t, count),
0048                     SOA_COLUMN(uint32_t, anotherCount))
0049 
0050 using SoADeviceOnlyLayout = SoADeviceOnlyLayoutTemplate<>;
0051 using SoADeviceOnlyView = SoADeviceOnlyLayout::View;
0052 
0053 // A 1 to 1 view of the store (except for unsupported types).
0054 GENERATE_SOA_VIEW(SoAFullDeviceConstViewTemplate,
0055                   SoAFullDeviceViewTemplate,
0056                   SOA_VIEW_LAYOUT_LIST(SOA_VIEW_LAYOUT(SoAHostDeviceLayout, soaHD),
0057                                        SOA_VIEW_LAYOUT(SoADeviceOnlyLayout, soaDO)),
0058                   SOA_VIEW_VALUE_LIST(SOA_VIEW_VALUE(soaHD, x),
0059                                       SOA_VIEW_VALUE(soaHD, y),
0060                                       SOA_VIEW_VALUE(soaHD, z),
0061                                       SOA_VIEW_VALUE(soaDO, color),
0062                                       SOA_VIEW_VALUE(soaDO, value),
0063                                       SOA_VIEW_VALUE(soaDO, py),
0064                                       SOA_VIEW_VALUE(soaDO, count),
0065                                       SOA_VIEW_VALUE(soaDO, anotherCount),
0066                                       SOA_VIEW_VALUE(soaHD, description),
0067                                       SOA_VIEW_VALUE(soaHD, someNumber)))
0068 
0069 using SoAFullDeviceView =
0070     SoAFullDeviceViewTemplate<cms::soa::CacheLineSize::NvidiaGPU, cms::soa::AlignmentEnforcement::enforced>;
0071 
0072 // These SoAs validate that the generating macros do not get confused in the special case where there are
0073 // no columns and only scalar elements in the SoA.
0074 GENERATE_SOA_LAYOUT(TestSoALayoutNoColumn, SOA_SCALAR(double, r))
0075 GENERATE_SOA_LAYOUT(TestSoALayoutNoColumn2, SOA_SCALAR(double, r), SOA_SCALAR(double, r2))
0076 
0077 // Eigen cross product kernel (on store)
0078 __global__ void crossProduct(SoAHostDeviceView soa, const int numElements) {
0079   int i = blockIdx.x * blockDim.x + threadIdx.x;
0080   if (i >= numElements)
0081     return;
0082   auto si = soa[i];
0083   si.r() = si.a().cross(si.b());
0084 }
0085 
0086 // Device-only producer kernel
0087 __global__ void producerKernel(SoAFullDeviceView soa, const int numElements) {
0088   int i = blockIdx.x * blockDim.x + threadIdx.x;
0089   if (i >= numElements)
0090     return;
0091   auto si = soa[i];
0092   si.color() &= 0x55 << i % (sizeof(si.color()) - sizeof(char));
0093   si.value() = sqrt(si.x() * si.x() + si.y() * si.y() + si.z() * si.z());
0094 }
0095 
0096 // Device-only consumer with result in host-device area
0097 __global__ void consumerKernel(SoAFullDeviceView soa, const int numElements) {
0098   int i = blockIdx.x * blockDim.x + threadIdx.x;
0099   if (i >= numElements)
0100     return;
0101   auto si = soa[i];
0102   si.x() = si.color() * si.value();
0103 }
0104 
0105 // Get a view like the default, except for range checking
0106 using RangeCheckingHostDeviceView =
0107     SoAHostDeviceLayout::ViewTemplate<SoAHostDeviceView::restrictQualify, cms::soa::RangeChecking::enabled>;
0108 
0109 // We expect to just run one thread.
0110 __global__ void rangeCheckKernel(RangeCheckingHostDeviceView soa) {
0111   printf("About to fail range-check (operator[]) in ROCm thread: %d\n", (int)threadIdx.x);
0112   [[maybe_unused]] auto si = soa[soa.metadata().size()];
0113   printf("Fail: range-check failure should have stopped the kernel.\n");
0114 }
0115 
0116 int main(void) {
0117   cms::rocmtest::requireDevices();
0118 
0119   hipStream_t stream;
0120   hipCheck(hipStreamCreateWithFlags(&stream, hipStreamNonBlocking));
0121 
0122   // Non-aligned number of elements to check alignment features.
0123   constexpr unsigned int numElements = 65537;
0124 
0125   // Allocate buffer and store on host
0126   size_t hostDeviceSize = SoAHostDeviceLayout::computeDataSize(numElements);
0127   std::byte* h_buf = nullptr;
0128   hipCheck(hipMallocHost((void**)&h_buf, hostDeviceSize));
0129   SoAHostDeviceLayout h_soahdLayout(h_buf, numElements);
0130   SoAHostDeviceView h_soahd(h_soahdLayout);
0131   SoAHostDeviceConstView h_soahd_c(h_soahdLayout);
0132 
0133   // Alocate buffer, stores and views on the device (single, shared buffer).
0134   size_t deviceOnlySize = SoADeviceOnlyLayout::computeDataSize(numElements);
0135   std::byte* d_buf = nullptr;
0136   hipCheck(hipMallocHost((void**)&d_buf, hostDeviceSize + deviceOnlySize));
0137   SoAHostDeviceLayout d_soahdLayout(d_buf, numElements);
0138   SoADeviceOnlyLayout d_soadoLayout(d_soahdLayout.metadata().nextByte(), numElements);
0139   SoAHostDeviceView d_soahdView(d_soahdLayout);
0140   SoAFullDeviceView d_soaFullView(d_soahdLayout, d_soadoLayout);
0141 
0142   // Assert column alignments
0143   assert(0 == reinterpret_cast<uintptr_t>(h_soahd.metadata().addressOf_x()) % decltype(h_soahd)::alignment);
0144   assert(0 == reinterpret_cast<uintptr_t>(h_soahd.metadata().addressOf_y()) % decltype(h_soahd)::alignment);
0145   assert(0 == reinterpret_cast<uintptr_t>(h_soahd.metadata().addressOf_z()) % decltype(h_soahd)::alignment);
0146   assert(0 == reinterpret_cast<uintptr_t>(h_soahd.metadata().addressOf_a()) % decltype(h_soahd)::alignment);
0147   assert(0 == reinterpret_cast<uintptr_t>(h_soahd.metadata().addressOf_b()) % decltype(h_soahd)::alignment);
0148   assert(0 == reinterpret_cast<uintptr_t>(h_soahd.metadata().addressOf_r()) % decltype(h_soahd)::alignment);
0149   assert(0 == reinterpret_cast<uintptr_t>(h_soahd.metadata().addressOf_description()) % decltype(h_soahd)::alignment);
0150   assert(0 == reinterpret_cast<uintptr_t>(h_soahd.metadata().addressOf_someNumber()) % decltype(h_soahd)::alignment);
0151 
0152   assert(0 == reinterpret_cast<uintptr_t>(d_soahdLayout.metadata().addressOf_x()) % decltype(d_soahdLayout)::alignment);
0153   assert(0 == reinterpret_cast<uintptr_t>(d_soahdLayout.metadata().addressOf_y()) % decltype(d_soahdLayout)::alignment);
0154   assert(0 == reinterpret_cast<uintptr_t>(d_soahdLayout.metadata().addressOf_z()) % decltype(d_soahdLayout)::alignment);
0155   assert(0 == reinterpret_cast<uintptr_t>(d_soahdLayout.metadata().addressOf_a()) % decltype(d_soahdLayout)::alignment);
0156   assert(0 == reinterpret_cast<uintptr_t>(d_soahdLayout.metadata().addressOf_b()) % decltype(d_soahdLayout)::alignment);
0157   assert(0 == reinterpret_cast<uintptr_t>(d_soahdLayout.metadata().addressOf_r()) % decltype(d_soahdLayout)::alignment);
0158   assert(0 == reinterpret_cast<uintptr_t>(d_soahdLayout.metadata().addressOf_description()) %
0159                   decltype(d_soahdLayout)::alignment);
0160   assert(0 == reinterpret_cast<uintptr_t>(d_soahdLayout.metadata().addressOf_someNumber()) %
0161                   decltype(d_soahdLayout)::alignment);
0162 
0163   assert(0 ==
0164          reinterpret_cast<uintptr_t>(d_soadoLayout.metadata().addressOf_color()) % decltype(d_soadoLayout)::alignment);
0165   assert(0 ==
0166          reinterpret_cast<uintptr_t>(d_soadoLayout.metadata().addressOf_value()) % decltype(d_soadoLayout)::alignment);
0167   assert(0 ==
0168          reinterpret_cast<uintptr_t>(d_soadoLayout.metadata().addressOf_py()) % decltype(d_soadoLayout)::alignment);
0169   assert(0 ==
0170          reinterpret_cast<uintptr_t>(d_soadoLayout.metadata().addressOf_count()) % decltype(d_soadoLayout)::alignment);
0171   assert(0 == reinterpret_cast<uintptr_t>(d_soadoLayout.metadata().addressOf_anotherCount()) %
0172                   decltype(d_soadoLayout)::alignment);
0173 
0174   // Views should get the same alignment as the stores they refer to
0175   assert(0 == reinterpret_cast<uintptr_t>(d_soaFullView.metadata().addressOf_x()) % decltype(d_soaFullView)::alignment);
0176   assert(0 == reinterpret_cast<uintptr_t>(d_soaFullView.metadata().addressOf_y()) % decltype(d_soaFullView)::alignment);
0177   assert(0 == reinterpret_cast<uintptr_t>(d_soaFullView.metadata().addressOf_z()) % decltype(d_soaFullView)::alignment);
0178   // Limitation of views: we have to get scalar member addresses via metadata.
0179   assert(0 == reinterpret_cast<uintptr_t>(d_soaFullView.metadata().addressOf_description()) %
0180                   decltype(d_soaFullView)::alignment);
0181   assert(0 == reinterpret_cast<uintptr_t>(d_soaFullView.metadata().addressOf_someNumber()) %
0182                   decltype(d_soaFullView)::alignment);
0183   assert(0 ==
0184          reinterpret_cast<uintptr_t>(d_soaFullView.metadata().addressOf_color()) % decltype(d_soaFullView)::alignment);
0185   assert(0 ==
0186          reinterpret_cast<uintptr_t>(d_soaFullView.metadata().addressOf_value()) % decltype(d_soaFullView)::alignment);
0187   assert(0 ==
0188          reinterpret_cast<uintptr_t>(d_soaFullView.metadata().addressOf_py()) % decltype(d_soaFullView)::alignment);
0189   assert(0 ==
0190          reinterpret_cast<uintptr_t>(d_soaFullView.metadata().addressOf_count()) % decltype(d_soaFullView)::alignment);
0191   assert(0 == reinterpret_cast<uintptr_t>(d_soaFullView.metadata().addressOf_anotherCount()) %
0192                   decltype(d_soaFullView)::alignment);
0193 
0194   // Initialize and fill the host buffer
0195   std::memset(h_soahdLayout.metadata().data(), 0, hostDeviceSize);
0196   for (size_t i = 0; i < numElements; ++i) {
0197     auto si = h_soahd[i];
0198     // Tuple assignment...
0199     // elements are: x, y, z, a, b, r
0200     auto v1 = 1.0 * i + 1.0;
0201     auto v2 = 2.0 * i;
0202     auto v3 = 3.0 * i - 1.0;
0203     if (i % 2) {
0204       si = {v1, v2, v3, {v1, v2, v3}, {v3, v2, v1}, {0, 0, 0}};
0205     } else {
0206       si.x() = si.a()(0) = si.b()(2) = v1;
0207       si.y() = si.a()(1) = si.b()(1) = v2;
0208       si.z() = si.a()(2) = si.b()(0) = v3;
0209     }
0210   }
0211   auto& sn = h_soahd.someNumber();
0212   sn = numElements + 2;
0213 
0214   // Push to device
0215   hipCheck(hipMemcpyAsync(d_buf, h_buf, hostDeviceSize, hipMemcpyDefault, stream));
0216 
0217   // Process on device
0218   crossProduct<<<(numElements + 255) / 256, 256, 0, stream>>>(d_soahdView, numElements);
0219 
0220   // Paint the device only with 0xFF initially
0221   hipCheck(hipMemsetAsync(d_soadoLayout.metadata().data(), 0xFF, d_soadoLayout.metadata().byteSize(), stream));
0222 
0223   // Produce to the device only area
0224   producerKernel<<<(numElements + 255) / 256, 256, 0, stream>>>(d_soaFullView, numElements);
0225 
0226   // Consume the device only area and generate a result on the host-device area
0227   consumerKernel<<<(numElements + 255) / 256, 256, 0, stream>>>(d_soaFullView, numElements);
0228 
0229   // Get result back
0230   hipCheck(hipMemcpyAsync(h_buf, d_buf, hostDeviceSize, hipMemcpyDefault, stream));
0231 
0232   // Wait and validate.
0233   hipCheck(hipStreamSynchronize(stream));
0234   for (size_t i = 0; i < numElements; ++i) {
0235     auto si = h_soahd_c[i];
0236     assert(si.r() == si.a().cross(si.b()));
0237     double initialX = 1.0 * i + 1.0;
0238     double initialY = 2.0 * i;
0239     double initialZ = 3.0 * i - 1.0;
0240     uint16_t expectedColor = 0x55 << i % (sizeof(uint16_t) - sizeof(char));
0241     double expectedX = expectedColor * sqrt(initialX * initialX + initialY * initialY + initialZ * initialZ);
0242     if (abs(si.x() - expectedX) / expectedX >= 2 * std::numeric_limits<double>::epsilon()) {
0243       std::cout << "X failed: for i=" << i << std::endl
0244                 << "initialX=" << initialX << " initialY=" << initialY << " initialZ=" << initialZ << std::endl
0245                 << "expectedX=" << expectedX << std::endl
0246                 << "resultX=" << si.x() << " resultY=" << si.y() << " resultZ=" << si.z() << std::endl
0247                 << "relativeDiff=" << abs(si.x() - expectedX) / expectedX
0248                 << " epsilon=" << std::numeric_limits<double>::epsilon() << std::endl;
0249       assert(false);
0250     }
0251   }
0252 
0253   // Validation of range checking
0254   try {
0255     // Get a view like the default, except for range checking
0256     SoAHostDeviceLayout::ViewTemplate<SoAHostDeviceView::restrictQualify, cms::soa::RangeChecking::enabled>
0257         soa1viewRangeChecking(h_soahdLayout);
0258     // This should throw an exception
0259     [[maybe_unused]] auto si = soa1viewRangeChecking[soa1viewRangeChecking.metadata().size()];
0260     std::cout << "Fail: expected range-check exception (operator[]) not caught on the host." << std::endl;
0261     assert(false);
0262   } catch (const std::out_of_range&) {
0263     std::cout << "Pass: expected range-check exception (operator[]) successfully caught on the host." << std::endl;
0264   }
0265 
0266   try {
0267     // Get a view like the default, except for range checking
0268     SoAHostDeviceLayout::ViewTemplate<SoAHostDeviceView::restrictQualify, cms::soa::RangeChecking::enabled>
0269         soa1viewRangeChecking(h_soahdLayout);
0270     // This should throw an exception
0271     [[maybe_unused]] auto si = soa1viewRangeChecking[soa1viewRangeChecking.metadata().size()];
0272     std::cout << "Fail: expected range-check exception (view-level index access) not caught on the host." << std::endl;
0273     assert(false);
0274   } catch (const std::out_of_range&) {
0275     std::cout << "Pass: expected range-check exception (view-level index access) successfully caught on the host."
0276               << std::endl;
0277   }
0278 
0279   // Validation of range checking in a kernel
0280   // Get a view like the default one, except for range checking
0281   RangeCheckingHostDeviceView soa1viewRangeChecking(d_soahdLayout);
0282 
0283   // This should throw an exception in the kernel
0284   rangeCheckKernel<<<1, 1, 0, stream>>>(soa1viewRangeChecking);
0285 
0286   // Wait and confirm that the ROCm kernel failed
0287   try {
0288     hipCheck(hipStreamSynchronize(stream));
0289     std::cout << "Fail: expected range-check exception not caught while executing the kernel." << std::endl;
0290     assert(false);
0291   } catch (const std::runtime_error&) {
0292     std::cout << "Pass: expected range-check exception caught while executing the kernel." << std::endl;
0293   }
0294 
0295   std::cout << "OK" << std::endl;
0296 }