File indexing completed on 2024-04-06 12:15:46
0001 #include "catch.hpp"
0002
0003 #include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
0004 #include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"
0005 #include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h"
0006 #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
0007 #include "HeterogeneousCore/CUDAUtilities/interface/requireDevices.h"
0008
0009 TEST_CASE("copyAsync", "[cudaMemTools]") {
0010 if (not cms::cudatest::testDevices()) {
0011 return;
0012 }
0013
0014 cudaStream_t stream;
0015 cudaCheck(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
0016
0017 SECTION("Host to device") {
0018 SECTION("Single element") {
0019 auto host_orig = cms::cuda::make_host_unique<int>(stream);
0020 *host_orig = 42;
0021
0022 auto device = cms::cuda::make_device_unique<int>(stream);
0023 auto host = cms::cuda::make_host_unique<int>(stream);
0024
0025 cms::cuda::copyAsync(device, host_orig, stream);
0026 cudaCheck(cudaMemcpyAsync(host.get(), device.get(), sizeof(int), cudaMemcpyDeviceToHost, stream));
0027 cudaCheck(cudaStreamSynchronize(stream));
0028
0029 REQUIRE(*host == 42);
0030 }
0031
0032 SECTION("Multiple elements") {
0033 constexpr int N = 100;
0034
0035 auto host_orig = cms::cuda::make_host_unique<int[]>(N, stream);
0036 for (int i = 0; i < N; ++i) {
0037 host_orig[i] = i;
0038 }
0039
0040 auto device = cms::cuda::make_device_unique<int[]>(N, stream);
0041 auto host = cms::cuda::make_host_unique<int[]>(N, stream);
0042
0043 SECTION("Copy all") {
0044 cms::cuda::copyAsync(device, host_orig, N, stream);
0045 cudaCheck(cudaMemcpyAsync(host.get(), device.get(), N * sizeof(int), cudaMemcpyDeviceToHost, stream));
0046 cudaCheck(cudaStreamSynchronize(stream));
0047 for (int i = 0; i < N; ++i) {
0048 CHECK(host[i] == i);
0049 }
0050 }
0051
0052 for (int i = 0; i < N; ++i) {
0053 host_orig[i] = 200 + i;
0054 }
0055
0056 SECTION("Copy some") {
0057 cms::cuda::copyAsync(device, host_orig, 42, stream);
0058 cudaCheck(cudaMemcpyAsync(host.get(), device.get(), 42 * sizeof(int), cudaMemcpyDeviceToHost, stream));
0059 cudaCheck(cudaStreamSynchronize(stream));
0060 for (int i = 0; i < 42; ++i) {
0061 CHECK(host[i] == 200 + i);
0062 }
0063 }
0064 }
0065 }
0066
0067 SECTION("Device to host") {
0068 SECTION("Single element") {
0069 auto host_orig = cms::cuda::make_host_unique<int>(stream);
0070 *host_orig = 42;
0071
0072 auto device = cms::cuda::make_device_unique<int>(stream);
0073 auto host = cms::cuda::make_host_unique<int>(stream);
0074
0075 cudaCheck(cudaMemcpyAsync(device.get(), host_orig.get(), sizeof(int), cudaMemcpyHostToDevice, stream));
0076 cms::cuda::copyAsync(host, device, stream);
0077 cudaCheck(cudaStreamSynchronize(stream));
0078
0079 REQUIRE(*host == 42);
0080 }
0081
0082 SECTION("Multiple elements") {
0083 constexpr int N = 100;
0084
0085 auto host_orig = cms::cuda::make_host_unique<int[]>(N, stream);
0086 for (int i = 0; i < N; ++i) {
0087 host_orig[i] = i;
0088 }
0089
0090 auto device = cms::cuda::make_device_unique<int[]>(N, stream);
0091 auto host = cms::cuda::make_host_unique<int[]>(N, stream);
0092
0093 SECTION("Copy all") {
0094 cudaCheck(cudaMemcpyAsync(device.get(), host_orig.get(), N * sizeof(int), cudaMemcpyHostToDevice, stream));
0095 cms::cuda::copyAsync(host, device, N, stream);
0096 cudaCheck(cudaStreamSynchronize(stream));
0097 for (int i = 0; i < N; ++i) {
0098 CHECK(host[i] == i);
0099 }
0100 }
0101
0102 for (int i = 0; i < N; ++i) {
0103 host_orig[i] = 200 + i;
0104 }
0105
0106 SECTION("Copy some") {
0107 cudaCheck(cudaMemcpyAsync(device.get(), host_orig.get(), 42 * sizeof(int), cudaMemcpyHostToDevice, stream));
0108 cms::cuda::copyAsync(host, device, 42, stream);
0109 cudaCheck(cudaStreamSynchronize(stream));
0110 for (int i = 0; i < 42; ++i) {
0111 CHECK(host[i] == 200 + i);
0112 }
0113 }
0114 }
0115 }
0116
0117 cudaCheck(cudaStreamDestroy(stream));
0118 }