File indexing completed on 2023-10-25 09:40:03
0001 #include <cassert>
0002 #include <cstdlib>
0003 #include <memory>
0004
0005 #include <hip/hip_runtime.h>
0006
0007 #include <Eigen/Core>
0008 #include <Eigen/Dense>
0009
0010 #include "DataFormats/SoATemplate/interface/SoALayout.h"
0011 #include "DataFormats/SoATemplate/interface/SoAView.h"
0012 #include "HeterogeneousCore/ROCmUtilities/interface/hipCheck.h"
0013 #include "HeterogeneousCore/ROCmUtilities/interface/requireDevices.h"
0014
0015
0016
0017
0018
0019
0020
0021 GENERATE_SOA_LAYOUT(SoAHostDeviceLayoutTemplate,
0022
0023
0024
0025
0026
0027
0028 SOA_COLUMN(double, x),
0029 SOA_COLUMN(double, y),
0030 SOA_COLUMN(double, z),
0031 SOA_EIGEN_COLUMN(Eigen::Vector3d, a),
0032 SOA_EIGEN_COLUMN(Eigen::Vector3d, b),
0033 SOA_EIGEN_COLUMN(Eigen::Vector3d, r),
0034
0035 SOA_SCALAR(const char*, description),
0036 SOA_SCALAR(uint32_t, someNumber))
0037
0038 using SoAHostDeviceLayout = SoAHostDeviceLayoutTemplate<>;
0039 using SoAHostDeviceView = SoAHostDeviceLayout::View;
0040 using SoAHostDeviceConstView = SoAHostDeviceLayout::ConstView;
0041
0042 GENERATE_SOA_LAYOUT(SoADeviceOnlyLayoutTemplate,
0043
0044 SOA_COLUMN(uint16_t, color),
0045 SOA_COLUMN(double, value),
0046 SOA_COLUMN(double*, py),
0047 SOA_COLUMN(uint32_t, count),
0048 SOA_COLUMN(uint32_t, anotherCount))
0049
0050 using SoADeviceOnlyLayout = SoADeviceOnlyLayoutTemplate<>;
0051 using SoADeviceOnlyView = SoADeviceOnlyLayout::View;
0052
0053
0054 GENERATE_SOA_VIEW(SoAFullDeviceConstViewTemplate,
0055 SoAFullDeviceViewTemplate,
0056 SOA_VIEW_LAYOUT_LIST(SOA_VIEW_LAYOUT(SoAHostDeviceLayout, soaHD),
0057 SOA_VIEW_LAYOUT(SoADeviceOnlyLayout, soaDO)),
0058 SOA_VIEW_VALUE_LIST(SOA_VIEW_VALUE(soaHD, x),
0059 SOA_VIEW_VALUE(soaHD, y),
0060 SOA_VIEW_VALUE(soaHD, z),
0061 SOA_VIEW_VALUE(soaDO, color),
0062 SOA_VIEW_VALUE(soaDO, value),
0063 SOA_VIEW_VALUE(soaDO, py),
0064 SOA_VIEW_VALUE(soaDO, count),
0065 SOA_VIEW_VALUE(soaDO, anotherCount),
0066 SOA_VIEW_VALUE(soaHD, description),
0067 SOA_VIEW_VALUE(soaHD, someNumber)))
0068
0069 using SoAFullDeviceView =
0070 SoAFullDeviceViewTemplate<cms::soa::CacheLineSize::NvidiaGPU, cms::soa::AlignmentEnforcement::enforced>;
0071
0072
0073
0074 GENERATE_SOA_LAYOUT(TestSoALayoutNoColumn, SOA_SCALAR(double, r))
0075 GENERATE_SOA_LAYOUT(TestSoALayoutNoColumn2, SOA_SCALAR(double, r), SOA_SCALAR(double, r2))
0076
0077
0078 __global__ void crossProduct(SoAHostDeviceView soa, const int numElements) {
0079 int i = blockIdx.x * blockDim.x + threadIdx.x;
0080 if (i >= numElements)
0081 return;
0082 auto si = soa[i];
0083 si.r() = si.a().cross(si.b());
0084 }
0085
0086
0087 __global__ void producerKernel(SoAFullDeviceView soa, const int numElements) {
0088 int i = blockIdx.x * blockDim.x + threadIdx.x;
0089 if (i >= numElements)
0090 return;
0091 auto si = soa[i];
0092 si.color() &= 0x55 << i % (sizeof(si.color()) - sizeof(char));
0093 si.value() = sqrt(si.x() * si.x() + si.y() * si.y() + si.z() * si.z());
0094 }
0095
0096
0097 __global__ void consumerKernel(SoAFullDeviceView soa, const int numElements) {
0098 int i = blockIdx.x * blockDim.x + threadIdx.x;
0099 if (i >= numElements)
0100 return;
0101 auto si = soa[i];
0102 si.x() = si.color() * si.value();
0103 }
0104
0105
0106 using RangeCheckingHostDeviceView =
0107 SoAHostDeviceLayout::ViewTemplate<SoAHostDeviceView::restrictQualify, cms::soa::RangeChecking::enabled>;
0108
0109
0110 __global__ void rangeCheckKernel(RangeCheckingHostDeviceView soa) {
0111 printf("About to fail range-check (operator[]) in ROCm thread: %d\n", (int)threadIdx.x);
0112 [[maybe_unused]] auto si = soa[soa.metadata().size()];
0113 printf("Fail: range-check failure should have stopped the kernel.\n");
0114 }
0115
0116 int main(void) {
0117 cms::rocmtest::requireDevices();
0118
0119 hipStream_t stream;
0120 hipCheck(hipStreamCreateWithFlags(&stream, hipStreamNonBlocking));
0121
0122
0123 constexpr unsigned int numElements = 65537;
0124
0125
0126 size_t hostDeviceSize = SoAHostDeviceLayout::computeDataSize(numElements);
0127 std::byte* h_buf = nullptr;
0128 hipCheck(hipMallocHost((void**)&h_buf, hostDeviceSize));
0129 SoAHostDeviceLayout h_soahdLayout(h_buf, numElements);
0130 SoAHostDeviceView h_soahd(h_soahdLayout);
0131 SoAHostDeviceConstView h_soahd_c(h_soahdLayout);
0132
0133
0134 size_t deviceOnlySize = SoADeviceOnlyLayout::computeDataSize(numElements);
0135 std::byte* d_buf = nullptr;
0136 hipCheck(hipMallocHost((void**)&d_buf, hostDeviceSize + deviceOnlySize));
0137 SoAHostDeviceLayout d_soahdLayout(d_buf, numElements);
0138 SoADeviceOnlyLayout d_soadoLayout(d_soahdLayout.metadata().nextByte(), numElements);
0139 SoAHostDeviceView d_soahdView(d_soahdLayout);
0140 SoAFullDeviceView d_soaFullView(d_soahdLayout, d_soadoLayout);
0141
0142
0143 assert(0 == reinterpret_cast<uintptr_t>(h_soahd.metadata().addressOf_x()) % decltype(h_soahd)::alignment);
0144 assert(0 == reinterpret_cast<uintptr_t>(h_soahd.metadata().addressOf_y()) % decltype(h_soahd)::alignment);
0145 assert(0 == reinterpret_cast<uintptr_t>(h_soahd.metadata().addressOf_z()) % decltype(h_soahd)::alignment);
0146 assert(0 == reinterpret_cast<uintptr_t>(h_soahd.metadata().addressOf_a()) % decltype(h_soahd)::alignment);
0147 assert(0 == reinterpret_cast<uintptr_t>(h_soahd.metadata().addressOf_b()) % decltype(h_soahd)::alignment);
0148 assert(0 == reinterpret_cast<uintptr_t>(h_soahd.metadata().addressOf_r()) % decltype(h_soahd)::alignment);
0149 assert(0 == reinterpret_cast<uintptr_t>(h_soahd.metadata().addressOf_description()) % decltype(h_soahd)::alignment);
0150 assert(0 == reinterpret_cast<uintptr_t>(h_soahd.metadata().addressOf_someNumber()) % decltype(h_soahd)::alignment);
0151
0152 assert(0 == reinterpret_cast<uintptr_t>(d_soahdLayout.metadata().addressOf_x()) % decltype(d_soahdLayout)::alignment);
0153 assert(0 == reinterpret_cast<uintptr_t>(d_soahdLayout.metadata().addressOf_y()) % decltype(d_soahdLayout)::alignment);
0154 assert(0 == reinterpret_cast<uintptr_t>(d_soahdLayout.metadata().addressOf_z()) % decltype(d_soahdLayout)::alignment);
0155 assert(0 == reinterpret_cast<uintptr_t>(d_soahdLayout.metadata().addressOf_a()) % decltype(d_soahdLayout)::alignment);
0156 assert(0 == reinterpret_cast<uintptr_t>(d_soahdLayout.metadata().addressOf_b()) % decltype(d_soahdLayout)::alignment);
0157 assert(0 == reinterpret_cast<uintptr_t>(d_soahdLayout.metadata().addressOf_r()) % decltype(d_soahdLayout)::alignment);
0158 assert(0 == reinterpret_cast<uintptr_t>(d_soahdLayout.metadata().addressOf_description()) %
0159 decltype(d_soahdLayout)::alignment);
0160 assert(0 == reinterpret_cast<uintptr_t>(d_soahdLayout.metadata().addressOf_someNumber()) %
0161 decltype(d_soahdLayout)::alignment);
0162
0163 assert(0 ==
0164 reinterpret_cast<uintptr_t>(d_soadoLayout.metadata().addressOf_color()) % decltype(d_soadoLayout)::alignment);
0165 assert(0 ==
0166 reinterpret_cast<uintptr_t>(d_soadoLayout.metadata().addressOf_value()) % decltype(d_soadoLayout)::alignment);
0167 assert(0 ==
0168 reinterpret_cast<uintptr_t>(d_soadoLayout.metadata().addressOf_py()) % decltype(d_soadoLayout)::alignment);
0169 assert(0 ==
0170 reinterpret_cast<uintptr_t>(d_soadoLayout.metadata().addressOf_count()) % decltype(d_soadoLayout)::alignment);
0171 assert(0 == reinterpret_cast<uintptr_t>(d_soadoLayout.metadata().addressOf_anotherCount()) %
0172 decltype(d_soadoLayout)::alignment);
0173
0174
0175 assert(0 == reinterpret_cast<uintptr_t>(d_soaFullView.metadata().addressOf_x()) % decltype(d_soaFullView)::alignment);
0176 assert(0 == reinterpret_cast<uintptr_t>(d_soaFullView.metadata().addressOf_y()) % decltype(d_soaFullView)::alignment);
0177 assert(0 == reinterpret_cast<uintptr_t>(d_soaFullView.metadata().addressOf_z()) % decltype(d_soaFullView)::alignment);
0178
0179 assert(0 == reinterpret_cast<uintptr_t>(d_soaFullView.metadata().addressOf_description()) %
0180 decltype(d_soaFullView)::alignment);
0181 assert(0 == reinterpret_cast<uintptr_t>(d_soaFullView.metadata().addressOf_someNumber()) %
0182 decltype(d_soaFullView)::alignment);
0183 assert(0 ==
0184 reinterpret_cast<uintptr_t>(d_soaFullView.metadata().addressOf_color()) % decltype(d_soaFullView)::alignment);
0185 assert(0 ==
0186 reinterpret_cast<uintptr_t>(d_soaFullView.metadata().addressOf_value()) % decltype(d_soaFullView)::alignment);
0187 assert(0 ==
0188 reinterpret_cast<uintptr_t>(d_soaFullView.metadata().addressOf_py()) % decltype(d_soaFullView)::alignment);
0189 assert(0 ==
0190 reinterpret_cast<uintptr_t>(d_soaFullView.metadata().addressOf_count()) % decltype(d_soaFullView)::alignment);
0191 assert(0 == reinterpret_cast<uintptr_t>(d_soaFullView.metadata().addressOf_anotherCount()) %
0192 decltype(d_soaFullView)::alignment);
0193
0194
0195 std::memset(h_soahdLayout.metadata().data(), 0, hostDeviceSize);
0196 for (size_t i = 0; i < numElements; ++i) {
0197 auto si = h_soahd[i];
0198
0199
0200 auto v1 = 1.0 * i + 1.0;
0201 auto v2 = 2.0 * i;
0202 auto v3 = 3.0 * i - 1.0;
0203 if (i % 2) {
0204 si = {v1, v2, v3, {v1, v2, v3}, {v3, v2, v1}, {0, 0, 0}};
0205 } else {
0206 si.x() = si.a()(0) = si.b()(2) = v1;
0207 si.y() = si.a()(1) = si.b()(1) = v2;
0208 si.z() = si.a()(2) = si.b()(0) = v3;
0209 }
0210 }
0211 auto& sn = h_soahd.someNumber();
0212 sn = numElements + 2;
0213
0214
0215 hipCheck(hipMemcpyAsync(d_buf, h_buf, hostDeviceSize, hipMemcpyDefault, stream));
0216
0217
0218 crossProduct<<<(numElements + 255) / 256, 256, 0, stream>>>(d_soahdView, numElements);
0219
0220
0221 hipCheck(hipMemsetAsync(d_soadoLayout.metadata().data(), 0xFF, d_soadoLayout.metadata().byteSize(), stream));
0222
0223
0224 producerKernel<<<(numElements + 255) / 256, 256, 0, stream>>>(d_soaFullView, numElements);
0225
0226
0227 consumerKernel<<<(numElements + 255) / 256, 256, 0, stream>>>(d_soaFullView, numElements);
0228
0229
0230 hipCheck(hipMemcpyAsync(h_buf, d_buf, hostDeviceSize, hipMemcpyDefault, stream));
0231
0232
0233 hipCheck(hipStreamSynchronize(stream));
0234 for (size_t i = 0; i < numElements; ++i) {
0235 auto si = h_soahd_c[i];
0236 assert(si.r() == si.a().cross(si.b()));
0237 double initialX = 1.0 * i + 1.0;
0238 double initialY = 2.0 * i;
0239 double initialZ = 3.0 * i - 1.0;
0240 uint16_t expectedColor = 0x55 << i % (sizeof(uint16_t) - sizeof(char));
0241 double expectedX = expectedColor * sqrt(initialX * initialX + initialY * initialY + initialZ * initialZ);
0242 if (abs(si.x() - expectedX) / expectedX >= 2 * std::numeric_limits<double>::epsilon()) {
0243 std::cout << "X failed: for i=" << i << std::endl
0244 << "initialX=" << initialX << " initialY=" << initialY << " initialZ=" << initialZ << std::endl
0245 << "expectedX=" << expectedX << std::endl
0246 << "resultX=" << si.x() << " resultY=" << si.y() << " resultZ=" << si.z() << std::endl
0247 << "relativeDiff=" << abs(si.x() - expectedX) / expectedX
0248 << " epsilon=" << std::numeric_limits<double>::epsilon() << std::endl;
0249 assert(false);
0250 }
0251 }
0252
0253
0254 try {
0255
0256 SoAHostDeviceLayout::ViewTemplate<SoAHostDeviceView::restrictQualify, cms::soa::RangeChecking::enabled>
0257 soa1viewRangeChecking(h_soahdLayout);
0258
0259 [[maybe_unused]] auto si = soa1viewRangeChecking[soa1viewRangeChecking.metadata().size()];
0260 std::cout << "Fail: expected range-check exception (operator[]) not caught on the host." << std::endl;
0261 assert(false);
0262 } catch (const std::out_of_range&) {
0263 std::cout << "Pass: expected range-check exception (operator[]) successfully caught on the host." << std::endl;
0264 }
0265
0266 try {
0267
0268 SoAHostDeviceLayout::ViewTemplate<SoAHostDeviceView::restrictQualify, cms::soa::RangeChecking::enabled>
0269 soa1viewRangeChecking(h_soahdLayout);
0270
0271 [[maybe_unused]] auto si = soa1viewRangeChecking[soa1viewRangeChecking.metadata().size()];
0272 std::cout << "Fail: expected range-check exception (view-level index access) not caught on the host." << std::endl;
0273 assert(false);
0274 } catch (const std::out_of_range&) {
0275 std::cout << "Pass: expected range-check exception (view-level index access) successfully caught on the host."
0276 << std::endl;
0277 }
0278
0279
0280
0281 RangeCheckingHostDeviceView soa1viewRangeChecking(d_soahdLayout);
0282
0283
0284 rangeCheckKernel<<<1, 1, 0, stream>>>(soa1viewRangeChecking);
0285
0286
0287 try {
0288 hipCheck(hipStreamSynchronize(stream));
0289 std::cout << "Fail: expected range-check exception not caught while executing the kernel." << std::endl;
0290 assert(false);
0291 } catch (const std::runtime_error&) {
0292 std::cout << "Pass: expected range-check exception caught while executing the kernel." << std::endl;
0293 }
0294
0295 std::cout << "OK" << std::endl;
0296 }