File indexing completed on 2024-04-06 12:05:16
0001 #include <cassert>
0002 #include <cstdlib>
0003 #include <memory>
0004
0005 #include <hip/hip_runtime.h>
0006
0007 #include <Eigen/Core>
0008 #include <Eigen/Dense>
0009
0010 #include "DataFormats/SoATemplate/interface/SoALayout.h"
0011 #include "DataFormats/SoATemplate/interface/SoAView.h"
0012 #include "HeterogeneousCore/ROCmUtilities/interface/hipCheck.h"
0013 #include "HeterogeneousCore/ROCmUtilities/interface/requireDevices.h"
0014
0015
0016
0017
0018
0019
0020
0021 GENERATE_SOA_LAYOUT(SoAHostDeviceLayoutTemplate,
0022
0023
0024
0025
0026
0027
0028 SOA_COLUMN(double, x),
0029 SOA_COLUMN(double, y),
0030 SOA_COLUMN(double, z),
0031 SOA_EIGEN_COLUMN(Eigen::Vector3d, a),
0032 SOA_EIGEN_COLUMN(Eigen::Vector3d, b),
0033 SOA_EIGEN_COLUMN(Eigen::Vector3d, r),
0034
0035 SOA_SCALAR(const char*, description),
0036 SOA_SCALAR(uint32_t, someNumber))
0037
0038 using SoAHostDeviceLayout = SoAHostDeviceLayoutTemplate<>;
0039 using SoAHostDeviceView = SoAHostDeviceLayout::View;
0040 using SoAHostDeviceRangeCheckingView =
0041 SoAHostDeviceLayout::ViewTemplate<cms::soa::RestrictQualify::enabled, cms::soa::RangeChecking::enabled>;
0042 using SoAHostDeviceConstView = SoAHostDeviceLayout::ConstView;
0043
0044 GENERATE_SOA_LAYOUT(SoADeviceOnlyLayoutTemplate,
0045
0046 SOA_COLUMN(uint16_t, color),
0047 SOA_COLUMN(double, value),
0048 SOA_COLUMN(double*, py),
0049 SOA_COLUMN(uint32_t, count),
0050 SOA_COLUMN(uint32_t, anotherCount))
0051
0052 using SoADeviceOnlyLayout = SoADeviceOnlyLayoutTemplate<>;
0053 using SoADeviceOnlyView = SoADeviceOnlyLayout::View;
0054
0055
0056 GENERATE_SOA_VIEW(SoAFullDeviceConstViewTemplate,
0057 SoAFullDeviceViewTemplate,
0058 SOA_VIEW_LAYOUT_LIST(SOA_VIEW_LAYOUT(SoAHostDeviceLayout, soaHD),
0059 SOA_VIEW_LAYOUT(SoADeviceOnlyLayout, soaDO)),
0060 SOA_VIEW_VALUE_LIST(SOA_VIEW_VALUE(soaHD, x),
0061 SOA_VIEW_VALUE(soaHD, y),
0062 SOA_VIEW_VALUE(soaHD, z),
0063 SOA_VIEW_VALUE(soaDO, color),
0064 SOA_VIEW_VALUE(soaDO, value),
0065 SOA_VIEW_VALUE(soaDO, py),
0066 SOA_VIEW_VALUE(soaDO, count),
0067 SOA_VIEW_VALUE(soaDO, anotherCount),
0068 SOA_VIEW_VALUE(soaHD, description),
0069 SOA_VIEW_VALUE(soaHD, someNumber)))
0070
0071 using SoAFullDeviceView =
0072 SoAFullDeviceViewTemplate<cms::soa::CacheLineSize::NvidiaGPU, cms::soa::AlignmentEnforcement::enforced>;
0073
0074
0075
0076 GENERATE_SOA_LAYOUT(TestSoALayoutNoColumn, SOA_SCALAR(double, r))
0077 GENERATE_SOA_LAYOUT(TestSoALayoutNoColumn2, SOA_SCALAR(double, r), SOA_SCALAR(double, r2))
0078
0079
0080 __global__ void crossProduct(SoAHostDeviceView soa, const int numElements) {
0081 int i = blockIdx.x * blockDim.x + threadIdx.x;
0082 if (i >= numElements)
0083 return;
0084 auto si = soa[i];
0085 si.r() = si.a().cross(si.b());
0086 }
0087
0088
0089 __global__ void producerKernel(SoAFullDeviceView soa, const int numElements) {
0090 int i = blockIdx.x * blockDim.x + threadIdx.x;
0091 if (i >= numElements)
0092 return;
0093 auto si = soa[i];
0094 si.color() &= 0x55 << i % (sizeof(si.color()) - sizeof(char));
0095 si.value() = sqrt(si.x() * si.x() + si.y() * si.y() + si.z() * si.z());
0096 }
0097
0098
0099 __global__ void consumerKernel(SoAFullDeviceView soa, const int numElements) {
0100 int i = blockIdx.x * blockDim.x + threadIdx.x;
0101 if (i >= numElements)
0102 return;
0103 auto si = soa[i];
0104 si.x() = si.color() * si.value();
0105 }
0106
0107
0108 using RangeCheckingHostDeviceView =
0109 SoAHostDeviceLayout::ViewTemplate<SoAHostDeviceView::restrictQualify, cms::soa::RangeChecking::enabled>;
0110
0111
0112 __global__ void rangeCheckKernel(RangeCheckingHostDeviceView soa) {
0113 printf("About to fail range-check (operator[]) in ROCm thread: %d\n", (int)threadIdx.x);
0114 [[maybe_unused]] auto si = soa[soa.metadata().size()];
0115 printf("Fail: range-check failure should have stopped the kernel.\n");
0116 }
0117
0118 int main(void) {
0119 cms::rocmtest::requireDevices();
0120
0121 hipStream_t stream;
0122 hipCheck(hipStreamCreateWithFlags(&stream, hipStreamNonBlocking));
0123
0124
0125 constexpr unsigned int numElements = 65537;
0126
0127
0128 size_t hostDeviceSize = SoAHostDeviceLayout::computeDataSize(numElements);
0129 std::byte* h_buf = nullptr;
0130 hipCheck(hipHostMalloc((void**)&h_buf, hostDeviceSize));
0131 SoAHostDeviceLayout h_soahdLayout(h_buf, numElements);
0132 SoAHostDeviceView h_soahd(h_soahdLayout);
0133
0134
0135 SoAHostDeviceRangeCheckingView h_soahdrc(h_soahdLayout);
0136 [[maybe_unused]] SoAHostDeviceRangeCheckingView h_soahdrc2 = h_soahdLayout;
0137 [[maybe_unused]] SoAHostDeviceRangeCheckingView h_soahdrc3{h_soahd};
0138 [[maybe_unused]] SoAHostDeviceRangeCheckingView h_soahdrc4 = h_soahd;
0139 SoAHostDeviceConstView h_soahd_c(h_soahdLayout);
0140
0141
0142 size_t deviceOnlySize = SoADeviceOnlyLayout::computeDataSize(numElements);
0143 std::byte* d_buf = nullptr;
0144 hipCheck(hipHostMalloc((void**)&d_buf, hostDeviceSize + deviceOnlySize));
0145 SoAHostDeviceLayout d_soahdLayout(d_buf, numElements);
0146 SoADeviceOnlyLayout d_soadoLayout(d_soahdLayout.metadata().nextByte(), numElements);
0147 SoAHostDeviceView d_soahdView(d_soahdLayout);
0148 SoAFullDeviceView d_soaFullView(d_soahdLayout, d_soadoLayout);
0149
0150
0151 assert(0 == reinterpret_cast<uintptr_t>(h_soahd.metadata().addressOf_x()) % decltype(h_soahd)::alignment);
0152 assert(0 == reinterpret_cast<uintptr_t>(h_soahd.metadata().addressOf_y()) % decltype(h_soahd)::alignment);
0153 assert(0 == reinterpret_cast<uintptr_t>(h_soahd.metadata().addressOf_z()) % decltype(h_soahd)::alignment);
0154 assert(0 == reinterpret_cast<uintptr_t>(h_soahd.metadata().addressOf_a()) % decltype(h_soahd)::alignment);
0155 assert(0 == reinterpret_cast<uintptr_t>(h_soahd.metadata().addressOf_b()) % decltype(h_soahd)::alignment);
0156 assert(0 == reinterpret_cast<uintptr_t>(h_soahd.metadata().addressOf_r()) % decltype(h_soahd)::alignment);
0157 assert(0 == reinterpret_cast<uintptr_t>(h_soahd.metadata().addressOf_description()) % decltype(h_soahd)::alignment);
0158 assert(0 == reinterpret_cast<uintptr_t>(h_soahd.metadata().addressOf_someNumber()) % decltype(h_soahd)::alignment);
0159
0160 assert(0 == reinterpret_cast<uintptr_t>(d_soahdLayout.metadata().addressOf_x()) % decltype(d_soahdLayout)::alignment);
0161 assert(0 == reinterpret_cast<uintptr_t>(d_soahdLayout.metadata().addressOf_y()) % decltype(d_soahdLayout)::alignment);
0162 assert(0 == reinterpret_cast<uintptr_t>(d_soahdLayout.metadata().addressOf_z()) % decltype(d_soahdLayout)::alignment);
0163 assert(0 == reinterpret_cast<uintptr_t>(d_soahdLayout.metadata().addressOf_a()) % decltype(d_soahdLayout)::alignment);
0164 assert(0 == reinterpret_cast<uintptr_t>(d_soahdLayout.metadata().addressOf_b()) % decltype(d_soahdLayout)::alignment);
0165 assert(0 == reinterpret_cast<uintptr_t>(d_soahdLayout.metadata().addressOf_r()) % decltype(d_soahdLayout)::alignment);
0166 assert(0 == reinterpret_cast<uintptr_t>(d_soahdLayout.metadata().addressOf_description()) %
0167 decltype(d_soahdLayout)::alignment);
0168 assert(0 == reinterpret_cast<uintptr_t>(d_soahdLayout.metadata().addressOf_someNumber()) %
0169 decltype(d_soahdLayout)::alignment);
0170
0171 assert(0 ==
0172 reinterpret_cast<uintptr_t>(d_soadoLayout.metadata().addressOf_color()) % decltype(d_soadoLayout)::alignment);
0173 assert(0 ==
0174 reinterpret_cast<uintptr_t>(d_soadoLayout.metadata().addressOf_value()) % decltype(d_soadoLayout)::alignment);
0175 assert(0 ==
0176 reinterpret_cast<uintptr_t>(d_soadoLayout.metadata().addressOf_py()) % decltype(d_soadoLayout)::alignment);
0177 assert(0 ==
0178 reinterpret_cast<uintptr_t>(d_soadoLayout.metadata().addressOf_count()) % decltype(d_soadoLayout)::alignment);
0179 assert(0 == reinterpret_cast<uintptr_t>(d_soadoLayout.metadata().addressOf_anotherCount()) %
0180 decltype(d_soadoLayout)::alignment);
0181
0182
0183 assert(0 == reinterpret_cast<uintptr_t>(d_soaFullView.metadata().addressOf_x()) % decltype(d_soaFullView)::alignment);
0184 assert(0 == reinterpret_cast<uintptr_t>(d_soaFullView.metadata().addressOf_y()) % decltype(d_soaFullView)::alignment);
0185 assert(0 == reinterpret_cast<uintptr_t>(d_soaFullView.metadata().addressOf_z()) % decltype(d_soaFullView)::alignment);
0186
0187 assert(0 == reinterpret_cast<uintptr_t>(d_soaFullView.metadata().addressOf_description()) %
0188 decltype(d_soaFullView)::alignment);
0189 assert(0 == reinterpret_cast<uintptr_t>(d_soaFullView.metadata().addressOf_someNumber()) %
0190 decltype(d_soaFullView)::alignment);
0191 assert(0 ==
0192 reinterpret_cast<uintptr_t>(d_soaFullView.metadata().addressOf_color()) % decltype(d_soaFullView)::alignment);
0193 assert(0 ==
0194 reinterpret_cast<uintptr_t>(d_soaFullView.metadata().addressOf_value()) % decltype(d_soaFullView)::alignment);
0195 assert(0 ==
0196 reinterpret_cast<uintptr_t>(d_soaFullView.metadata().addressOf_py()) % decltype(d_soaFullView)::alignment);
0197 assert(0 ==
0198 reinterpret_cast<uintptr_t>(d_soaFullView.metadata().addressOf_count()) % decltype(d_soaFullView)::alignment);
0199 assert(0 == reinterpret_cast<uintptr_t>(d_soaFullView.metadata().addressOf_anotherCount()) %
0200 decltype(d_soaFullView)::alignment);
0201
0202
0203 std::memset(h_soahdLayout.metadata().data(), 0, hostDeviceSize);
0204 for (size_t i = 0; i < numElements; ++i) {
0205 auto si = h_soahd[i];
0206
0207
0208 auto v1 = 1.0 * i + 1.0;
0209 auto v2 = 2.0 * i;
0210 auto v3 = 3.0 * i - 1.0;
0211 if (i % 2) {
0212 si = {v1, v2, v3, {v1, v2, v3}, {v3, v2, v1}, {0, 0, 0}};
0213 } else {
0214 si.x() = si.a()(0) = si.b()(2) = v1;
0215 si.y() = si.a()(1) = si.b()(1) = v2;
0216 si.z() = si.a()(2) = si.b()(0) = v3;
0217 }
0218 }
0219 auto& sn = h_soahd.someNumber();
0220 sn = numElements + 2;
0221
0222
0223 hipCheck(hipMemcpyAsync(d_buf, h_buf, hostDeviceSize, hipMemcpyDefault, stream));
0224
0225
0226 crossProduct<<<(numElements + 255) / 256, 256, 0, stream>>>(d_soahdView, numElements);
0227
0228
0229 hipCheck(hipMemsetAsync(d_soadoLayout.metadata().data(), 0xFF, d_soadoLayout.metadata().byteSize(), stream));
0230
0231
0232 producerKernel<<<(numElements + 255) / 256, 256, 0, stream>>>(d_soaFullView, numElements);
0233
0234
0235 consumerKernel<<<(numElements + 255) / 256, 256, 0, stream>>>(d_soaFullView, numElements);
0236
0237
0238 hipCheck(hipMemcpyAsync(h_buf, d_buf, hostDeviceSize, hipMemcpyDefault, stream));
0239
0240
0241 hipCheck(hipStreamSynchronize(stream));
0242 for (size_t i = 0; i < numElements; ++i) {
0243 auto si = h_soahd_c[i];
0244 assert(si.r() == si.a().cross(si.b()));
0245 double initialX = 1.0 * i + 1.0;
0246 double initialY = 2.0 * i;
0247 double initialZ = 3.0 * i - 1.0;
0248 uint16_t expectedColor = 0x55 << i % (sizeof(uint16_t) - sizeof(char));
0249 double expectedX = expectedColor * sqrt(initialX * initialX + initialY * initialY + initialZ * initialZ);
0250 if (abs(si.x() - expectedX) / expectedX >= 2 * std::numeric_limits<double>::epsilon()) {
0251 std::cout << "X failed: for i=" << i << std::endl
0252 << "initialX=" << initialX << " initialY=" << initialY << " initialZ=" << initialZ << std::endl
0253 << "expectedX=" << expectedX << std::endl
0254 << "resultX=" << si.x() << " resultY=" << si.y() << " resultZ=" << si.z() << std::endl
0255 << "relativeDiff=" << abs(si.x() - expectedX) / expectedX
0256 << " epsilon=" << std::numeric_limits<double>::epsilon() << std::endl;
0257 assert(false);
0258 }
0259 }
0260
0261 {
0262
0263 SoAHostDeviceRangeCheckingView soa1viewRangeChecking(h_soahdLayout);
0264 try {
0265 [[maybe_unused]] auto si = soa1viewRangeChecking[soa1viewRangeChecking.metadata().size()];
0266 std::cout << "Fail: expected range-check exception (view-level index access) not caught on the host (overflow)."
0267 << std::endl;
0268 assert(false);
0269 } catch (const std::out_of_range&) {
0270 }
0271 try {
0272 [[maybe_unused]] auto si = soa1viewRangeChecking[-1];
0273 std::cout << "Fail: expected range-check exception (view-level index access) not caught on the host (underflow)."
0274 << std::endl;
0275 assert(false);
0276 } catch (const std::out_of_range&) {
0277 }
0278 [[maybe_unused]] auto si = soa1viewRangeChecking[soa1viewRangeChecking.metadata().size() - 1];
0279 [[maybe_unused]] auto si2 = soa1viewRangeChecking[0];
0280 std::cout << "Pass: expected range-check exceptions (view-level index access) successfully caught on the host "
0281 "(layout initialization)."
0282 << std::endl;
0283 }
0284
0285 {
0286
0287 try {
0288 [[maybe_unused]] auto si = h_soahdrc3[h_soahdrc3.metadata().size()];
0289 std::cout << "Fail: expected range-check exception (view-level index access) not caught on the host (overflow)."
0290 << std::endl;
0291 assert(false);
0292 } catch (const std::out_of_range&) {
0293 }
0294 try {
0295 [[maybe_unused]] auto si = h_soahdrc3[-1];
0296 std::cout << "Fail: expected range-check exception (view-level index access) not caught on the host (underflow)."
0297 << std::endl;
0298 assert(false);
0299 } catch (const std::out_of_range&) {
0300 }
0301 [[maybe_unused]] auto si = h_soahdrc3[h_soahdrc3.metadata().size() - 1];
0302 [[maybe_unused]] auto si2 = h_soahdrc3[0];
0303 std::cout << "Pass: expected range-check exceptions (view-level index access) successfully caught on the host "
0304 "(view initialization)."
0305 << std::endl;
0306 }
0307
0308
0309
0310 RangeCheckingHostDeviceView soa1viewRangeChecking(d_soahdLayout);
0311
0312
0313 rangeCheckKernel<<<1, 1, 0, stream>>>(soa1viewRangeChecking);
0314
0315
0316 try {
0317 hipCheck(hipStreamSynchronize(stream));
0318 std::cout << "Fail: expected range-check exception not caught while executing the kernel." << std::endl;
0319 assert(false);
0320 } catch (const std::runtime_error&) {
0321 std::cout << "Pass: expected range-check exception caught while executing the kernel." << std::endl;
0322 }
0323
0324 std::cout << "OK" << std::endl;
0325 }