File indexing completed on 2025-06-26 23:26:40
0001 #include <cassert>
0002 #include <cstdlib>
0003 #include <memory>
0004
0005 #include <hip/hip_runtime.h>
0006
0007 #include <Eigen/Core>
0008 #include <Eigen/Dense>
0009
0010 #include "DataFormats/SoATemplate/interface/SoALayout.h"
0011 #include "HeterogeneousCore/ROCmUtilities/interface/hipCheck.h"
0012 #include "HeterogeneousCore/ROCmUtilities/interface/requireDevices.h"
0013
0014
0015
0016
0017
0018
0019
0020 GENERATE_SOA_LAYOUT(SoAHostDeviceLayoutTemplate,
0021
0022
0023
0024
0025
0026
0027 SOA_COLUMN(double, x),
0028 SOA_COLUMN(double, y),
0029 SOA_COLUMN(double, z),
0030 SOA_EIGEN_COLUMN(Eigen::Vector3d, a),
0031 SOA_EIGEN_COLUMN(Eigen::Vector3d, b),
0032 SOA_EIGEN_COLUMN(Eigen::Vector3d, r),
0033
0034 SOA_SCALAR(const char*, description),
0035 SOA_SCALAR(uint32_t, someNumber))
0036
0037 using SoAHostDeviceLayout = SoAHostDeviceLayoutTemplate<>;
0038 using SoAHostDeviceView = SoAHostDeviceLayout::View;
0039 using SoAHostDeviceRangeCheckingView =
0040 SoAHostDeviceLayout::ViewTemplate<cms::soa::RestrictQualify::enabled, cms::soa::RangeChecking::enabled>;
0041 using SoAHostDeviceConstView = SoAHostDeviceLayout::ConstView;
0042
0043 GENERATE_SOA_LAYOUT(SoADeviceOnlyLayoutTemplate,
0044
0045 SOA_COLUMN(uint16_t, color),
0046 SOA_COLUMN(double, value),
0047 SOA_COLUMN(double*, py),
0048 SOA_COLUMN(uint32_t, count),
0049 SOA_COLUMN(uint32_t, anotherCount))
0050
0051 using SoADeviceOnlyLayout = SoADeviceOnlyLayoutTemplate<>;
0052 using SoADeviceOnlyView = SoADeviceOnlyLayout::View;
0053
0054 GENERATE_SOA_LAYOUT(SoAFullDeviceLayoutTemplate,
0055 SOA_COLUMN(double, x),
0056 SOA_COLUMN(double, y),
0057 SOA_COLUMN(double, z),
0058 SOA_COLUMN(uint16_t, color),
0059 SOA_COLUMN(double, value),
0060 SOA_COLUMN(double*, py),
0061 SOA_COLUMN(uint32_t, count),
0062 SOA_COLUMN(uint32_t, anotherCount),
0063 SOA_SCALAR(const char*, description),
0064 SOA_SCALAR(uint32_t, someNumber))
0065
0066 using SoAFullDeviceLayout =
0067 SoAFullDeviceLayoutTemplate<cms::soa::CacheLineSize::NvidiaGPU, cms::soa::AlignmentEnforcement::enforced>;
0068 using SoAFullDeviceView = SoAFullDeviceLayout::View;
0069 using SoAFullDeviceConstView = SoAFullDeviceLayout::ConstView;
0070
0071
0072
0073 GENERATE_SOA_LAYOUT(TestSoALayoutNoColumn, SOA_SCALAR(double, r))
0074 GENERATE_SOA_LAYOUT(TestSoALayoutNoColumn2, SOA_SCALAR(double, r), SOA_SCALAR(double, r2))
0075
0076
0077 __global__ void crossProduct(SoAHostDeviceView soa, const int numElements) {
0078 int i = blockIdx.x * blockDim.x + threadIdx.x;
0079 if (i >= numElements)
0080 return;
0081 auto si = soa[i];
0082 si.r() = si.a().cross(si.b());
0083 }
0084
0085
0086 __global__ void producerKernel(SoAFullDeviceView soa, const int numElements) {
0087 int i = blockIdx.x * blockDim.x + threadIdx.x;
0088 if (i >= numElements)
0089 return;
0090 auto si = soa[i];
0091 si.color() &= 0x55 << i % (sizeof(si.color()) - sizeof(char));
0092 si.value() = sqrt(si.x() * si.x() + si.y() * si.y() + si.z() * si.z());
0093 }
0094
0095
0096 __global__ void consumerKernel(SoAFullDeviceView soa, const int numElements) {
0097 int i = blockIdx.x * blockDim.x + threadIdx.x;
0098 if (i >= numElements)
0099 return;
0100 auto si = soa[i];
0101 si.x() = si.color() * si.value();
0102 }
0103
0104
0105 using RangeCheckingHostDeviceView =
0106 SoAHostDeviceLayout::ViewTemplate<SoAHostDeviceView::restrictQualify, cms::soa::RangeChecking::enabled>;
0107
0108
0109 __global__ void rangeCheckKernel(RangeCheckingHostDeviceView soa) {
0110 printf("About to fail range-check (operator[]) in ROCm thread: %d\n", (int)threadIdx.x);
0111 [[maybe_unused]] auto si = soa[soa.metadata().size()];
0112 printf("Fail: range-check failure should have stopped the kernel.\n");
0113 }
0114
0115 int main(void) {
0116 cms::rocmtest::requireDevices();
0117
0118 hipStream_t stream;
0119 hipCheck(hipStreamCreateWithFlags(&stream, hipStreamNonBlocking));
0120
0121
0122 constexpr unsigned int numElements = 65537;
0123
0124
0125 size_t hostDeviceSize = SoAHostDeviceLayout::computeDataSize(numElements);
0126 std::byte* h_buf = nullptr;
0127 hipCheck(hipHostMalloc((void**)&h_buf, hostDeviceSize));
0128 SoAHostDeviceLayout h_soahdLayout(h_buf, numElements);
0129 SoAHostDeviceView h_soahd(h_soahdLayout);
0130
0131
0132 SoAHostDeviceRangeCheckingView h_soahdrc(h_soahdLayout);
0133 [[maybe_unused]] SoAHostDeviceRangeCheckingView h_soahdrc2 = h_soahdLayout;
0134 [[maybe_unused]] SoAHostDeviceRangeCheckingView h_soahdrc3{h_soahd};
0135 [[maybe_unused]] SoAHostDeviceRangeCheckingView h_soahdrc4 = h_soahd;
0136 SoAHostDeviceConstView h_soahd_c(h_soahdLayout);
0137
0138
0139 size_t deviceOnlySize = SoADeviceOnlyLayout::computeDataSize(numElements);
0140 std::byte* d_buf = nullptr;
0141 hipCheck(hipHostMalloc((void**)&d_buf, hostDeviceSize + deviceOnlySize));
0142 SoAHostDeviceLayout d_soahdLayout(d_buf, numElements);
0143 SoADeviceOnlyLayout d_soadoLayout(d_soahdLayout.metadata().nextByte(), numElements);
0144 SoAHostDeviceView d_soahdView(d_soahdLayout);
0145 SoADeviceOnlyView d_soadoView(d_soadoLayout);
0146 const auto d_soahdRecords = d_soahdView.records();
0147 const auto d_soadoRecords = d_soadoView.records();
0148 SoAFullDeviceView d_soaFullView(d_soahdRecords.x(),
0149 d_soahdRecords.y(),
0150 d_soahdRecords.z(),
0151 d_soadoRecords.color(),
0152 d_soadoRecords.value(),
0153 d_soadoRecords.py(),
0154 d_soadoRecords.count(),
0155 d_soadoRecords.anotherCount(),
0156 d_soahdRecords.description(),
0157 d_soahdRecords.someNumber());
0158
0159
0160 assert(0 == reinterpret_cast<uintptr_t>(h_soahd.metadata().addressOf_x()) % decltype(h_soahd)::alignment);
0161 assert(0 == reinterpret_cast<uintptr_t>(h_soahd.metadata().addressOf_y()) % decltype(h_soahd)::alignment);
0162 assert(0 == reinterpret_cast<uintptr_t>(h_soahd.metadata().addressOf_z()) % decltype(h_soahd)::alignment);
0163 assert(0 == reinterpret_cast<uintptr_t>(h_soahd.metadata().addressOf_a()) % decltype(h_soahd)::alignment);
0164 assert(0 == reinterpret_cast<uintptr_t>(h_soahd.metadata().addressOf_b()) % decltype(h_soahd)::alignment);
0165 assert(0 == reinterpret_cast<uintptr_t>(h_soahd.metadata().addressOf_r()) % decltype(h_soahd)::alignment);
0166 assert(0 == reinterpret_cast<uintptr_t>(h_soahd.metadata().addressOf_description()) % decltype(h_soahd)::alignment);
0167 assert(0 == reinterpret_cast<uintptr_t>(h_soahd.metadata().addressOf_someNumber()) % decltype(h_soahd)::alignment);
0168
0169 assert(0 == reinterpret_cast<uintptr_t>(d_soahdLayout.metadata().addressOf_x()) % decltype(d_soahdLayout)::alignment);
0170 assert(0 == reinterpret_cast<uintptr_t>(d_soahdLayout.metadata().addressOf_y()) % decltype(d_soahdLayout)::alignment);
0171 assert(0 == reinterpret_cast<uintptr_t>(d_soahdLayout.metadata().addressOf_z()) % decltype(d_soahdLayout)::alignment);
0172 assert(0 == reinterpret_cast<uintptr_t>(d_soahdLayout.metadata().addressOf_a()) % decltype(d_soahdLayout)::alignment);
0173 assert(0 == reinterpret_cast<uintptr_t>(d_soahdLayout.metadata().addressOf_b()) % decltype(d_soahdLayout)::alignment);
0174 assert(0 == reinterpret_cast<uintptr_t>(d_soahdLayout.metadata().addressOf_r()) % decltype(d_soahdLayout)::alignment);
0175 assert(0 == reinterpret_cast<uintptr_t>(d_soahdLayout.metadata().addressOf_description()) %
0176 decltype(d_soahdLayout)::alignment);
0177 assert(0 == reinterpret_cast<uintptr_t>(d_soahdLayout.metadata().addressOf_someNumber()) %
0178 decltype(d_soahdLayout)::alignment);
0179
0180 assert(0 ==
0181 reinterpret_cast<uintptr_t>(d_soadoLayout.metadata().addressOf_color()) % decltype(d_soadoLayout)::alignment);
0182 assert(0 ==
0183 reinterpret_cast<uintptr_t>(d_soadoLayout.metadata().addressOf_value()) % decltype(d_soadoLayout)::alignment);
0184 assert(0 ==
0185 reinterpret_cast<uintptr_t>(d_soadoLayout.metadata().addressOf_py()) % decltype(d_soadoLayout)::alignment);
0186 assert(0 ==
0187 reinterpret_cast<uintptr_t>(d_soadoLayout.metadata().addressOf_count()) % decltype(d_soadoLayout)::alignment);
0188 assert(0 == reinterpret_cast<uintptr_t>(d_soadoLayout.metadata().addressOf_anotherCount()) %
0189 decltype(d_soadoLayout)::alignment);
0190
0191
0192 assert(0 == reinterpret_cast<uintptr_t>(d_soaFullView.metadata().addressOf_x()) % decltype(d_soaFullView)::alignment);
0193 assert(0 == reinterpret_cast<uintptr_t>(d_soaFullView.metadata().addressOf_y()) % decltype(d_soaFullView)::alignment);
0194 assert(0 == reinterpret_cast<uintptr_t>(d_soaFullView.metadata().addressOf_z()) % decltype(d_soaFullView)::alignment);
0195
0196 assert(0 == reinterpret_cast<uintptr_t>(d_soaFullView.metadata().addressOf_description()) %
0197 decltype(d_soaFullView)::alignment);
0198 assert(0 == reinterpret_cast<uintptr_t>(d_soaFullView.metadata().addressOf_someNumber()) %
0199 decltype(d_soaFullView)::alignment);
0200 assert(0 ==
0201 reinterpret_cast<uintptr_t>(d_soaFullView.metadata().addressOf_color()) % decltype(d_soaFullView)::alignment);
0202 assert(0 ==
0203 reinterpret_cast<uintptr_t>(d_soaFullView.metadata().addressOf_value()) % decltype(d_soaFullView)::alignment);
0204 assert(0 ==
0205 reinterpret_cast<uintptr_t>(d_soaFullView.metadata().addressOf_py()) % decltype(d_soaFullView)::alignment);
0206 assert(0 ==
0207 reinterpret_cast<uintptr_t>(d_soaFullView.metadata().addressOf_count()) % decltype(d_soaFullView)::alignment);
0208 assert(0 == reinterpret_cast<uintptr_t>(d_soaFullView.metadata().addressOf_anotherCount()) %
0209 decltype(d_soaFullView)::alignment);
0210
0211
0212 std::memset(h_soahdLayout.metadata().data(), 0, hostDeviceSize);
0213 for (size_t i = 0; i < numElements; ++i) {
0214 auto si = h_soahd[i];
0215
0216
0217 auto v1 = 1.0 * i + 1.0;
0218 auto v2 = 2.0 * i;
0219 auto v3 = 3.0 * i - 1.0;
0220 if (i % 2) {
0221 si = {v1, v2, v3, {v1, v2, v3}, {v3, v2, v1}, {0, 0, 0}};
0222 } else {
0223 si.x() = si.a()(0) = si.b()(2) = v1;
0224 si.y() = si.a()(1) = si.b()(1) = v2;
0225 si.z() = si.a()(2) = si.b()(0) = v3;
0226 }
0227 }
0228 auto& sn = h_soahd.someNumber();
0229 sn = numElements + 2;
0230
0231
0232 hipCheck(hipMemcpyAsync(d_buf, h_buf, hostDeviceSize, hipMemcpyDefault, stream));
0233
0234
0235 crossProduct<<<(numElements + 255) / 256, 256, 0, stream>>>(d_soahdView, numElements);
0236
0237
0238 hipCheck(hipMemsetAsync(d_soadoLayout.metadata().data(), 0xFF, d_soadoLayout.metadata().byteSize(), stream));
0239
0240
0241 producerKernel<<<(numElements + 255) / 256, 256, 0, stream>>>(d_soaFullView, numElements);
0242
0243
0244 consumerKernel<<<(numElements + 255) / 256, 256, 0, stream>>>(d_soaFullView, numElements);
0245
0246
0247 hipCheck(hipMemcpyAsync(h_buf, d_buf, hostDeviceSize, hipMemcpyDefault, stream));
0248
0249
0250 hipCheck(hipStreamSynchronize(stream));
0251 for (size_t i = 0; i < numElements; ++i) {
0252 auto si = h_soahd_c[i];
0253 assert(si.r() == si.a().cross(si.b()));
0254 double initialX = 1.0 * i + 1.0;
0255 double initialY = 2.0 * i;
0256 double initialZ = 3.0 * i - 1.0;
0257 uint16_t expectedColor = 0x55 << i % (sizeof(uint16_t) - sizeof(char));
0258 double expectedX = expectedColor * sqrt(initialX * initialX + initialY * initialY + initialZ * initialZ);
0259 if (abs(si.x() - expectedX) / expectedX >= 2 * std::numeric_limits<double>::epsilon()) {
0260 std::cout << "X failed: for i=" << i << std::endl
0261 << "initialX=" << initialX << " initialY=" << initialY << " initialZ=" << initialZ << std::endl
0262 << "expectedX=" << expectedX << std::endl
0263 << "resultX=" << si.x() << " resultY=" << si.y() << " resultZ=" << si.z() << std::endl
0264 << "relativeDiff=" << abs(si.x() - expectedX) / expectedX
0265 << " epsilon=" << std::numeric_limits<double>::epsilon() << std::endl;
0266 assert(false);
0267 }
0268 }
0269
0270 {
0271
0272 SoAHostDeviceRangeCheckingView soa1viewRangeChecking(h_soahdLayout);
0273 try {
0274 [[maybe_unused]] auto si = soa1viewRangeChecking[soa1viewRangeChecking.metadata().size()];
0275 std::cout << "Fail: expected range-check exception (view-level index access) not caught on the host (overflow)."
0276 << std::endl;
0277 assert(false);
0278 } catch (const std::out_of_range&) {
0279 }
0280 try {
0281 [[maybe_unused]] auto si = soa1viewRangeChecking[-1];
0282 std::cout << "Fail: expected range-check exception (view-level index access) not caught on the host (underflow)."
0283 << std::endl;
0284 assert(false);
0285 } catch (const std::out_of_range&) {
0286 }
0287 [[maybe_unused]] auto si = soa1viewRangeChecking[soa1viewRangeChecking.metadata().size() - 1];
0288 [[maybe_unused]] auto si2 = soa1viewRangeChecking[0];
0289 std::cout << "Pass: expected range-check exceptions (view-level index access) successfully caught on the host "
0290 "(layout initialization)."
0291 << std::endl;
0292 }
0293
0294 {
0295
0296 try {
0297 [[maybe_unused]] auto si = h_soahdrc3[h_soahdrc3.metadata().size()];
0298 std::cout << "Fail: expected range-check exception (view-level index access) not caught on the host (overflow)."
0299 << std::endl;
0300 assert(false);
0301 } catch (const std::out_of_range&) {
0302 }
0303 try {
0304 [[maybe_unused]] auto si = h_soahdrc3[-1];
0305 std::cout << "Fail: expected range-check exception (view-level index access) not caught on the host (underflow)."
0306 << std::endl;
0307 assert(false);
0308 } catch (const std::out_of_range&) {
0309 }
0310 [[maybe_unused]] auto si = h_soahdrc3[h_soahdrc3.metadata().size() - 1];
0311 [[maybe_unused]] auto si2 = h_soahdrc3[0];
0312 std::cout << "Pass: expected range-check exceptions (view-level index access) successfully caught on the host "
0313 "(view initialization)."
0314 << std::endl;
0315 }
0316
0317
0318
0319 RangeCheckingHostDeviceView soa1viewRangeChecking(d_soahdLayout);
0320
0321
0322 rangeCheckKernel<<<1, 1, 0, stream>>>(soa1viewRangeChecking);
0323
0324
0325 try {
0326 hipCheck(hipStreamSynchronize(stream));
0327 std::cout << "Fail: expected range-check exception not caught while executing the kernel." << std::endl;
0328 assert(false);
0329 } catch (const std::runtime_error&) {
0330 std::cout << "Pass: expected range-check exception caught while executing the kernel." << std::endl;
0331 }
0332
0333 std::cout << "OK" << std::endl;
0334 }