CUDAUtilities/test/copyAsync_t.cpp

0001 #include "catch.hpp"
0002
0003 #include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
0004 #include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"
0005 #include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h"
0006 #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
0007 #include "HeterogeneousCore/CUDAUtilities/interface/requireDevices.h"
0008
0009 TEST_CASE("copyAsync", "[cudaMemTools]") {
0010   if (not cms::cudatest::testDevices()) {
0011     return;
0012   }
0013
0014   cudaStream_t stream;
0015   cudaCheck(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
0016
0017   SECTION("Host to device") {
0018     SECTION("Single element") {
0019       auto host_orig = cms::cuda::make_host_unique<int>(stream);
0020       *host_orig = 42;
0021
0022       auto device = cms::cuda::make_device_unique<int>(stream);
0023       auto host = cms::cuda::make_host_unique<int>(stream);
0024
0025       cms::cuda::copyAsync(device, host_orig, stream);
0026       cudaCheck(cudaMemcpyAsync(host.get(), device.get(), sizeof(int), cudaMemcpyDeviceToHost, stream));
0027       cudaCheck(cudaStreamSynchronize(stream));
0028
0029       REQUIRE(*host == 42);
0030     }
0031
0032     SECTION("Multiple elements") {
0033       constexpr int N = 100;
0034
0035       auto host_orig = cms::cuda::make_host_unique<int[]>(N, stream);
0036       for (int i = 0; i < N; ++i) {
0037         host_orig[i] = i;
0038       }
0039
0040       auto device = cms::cuda::make_device_unique<int[]>(N, stream);
0041       auto host = cms::cuda::make_host_unique<int[]>(N, stream);
0042
0043       SECTION("Copy all") {
0044         cms::cuda::copyAsync(device, host_orig, N, stream);
0045         cudaCheck(cudaMemcpyAsync(host.get(), device.get(), N * sizeof(int), cudaMemcpyDeviceToHost, stream));
0046         cudaCheck(cudaStreamSynchronize(stream));
0047         for (int i = 0; i < N; ++i) {
0048           CHECK(host[i] == i);
0049         }
0050       }
0051
0052       for (int i = 0; i < N; ++i) {
0053         host_orig[i] = 200 + i;
0054       }
0055
0056       SECTION("Copy some") {
0057         cms::cuda::copyAsync(device, host_orig, 42, stream);
0058         cudaCheck(cudaMemcpyAsync(host.get(), device.get(), 42 * sizeof(int), cudaMemcpyDeviceToHost, stream));
0059         cudaCheck(cudaStreamSynchronize(stream));
0060         for (int i = 0; i < 42; ++i) {
0061           CHECK(host[i] == 200 + i);
0062         }
0063       }
0064     }
0065   }
0066
0067   SECTION("Device to host") {
0068     SECTION("Single element") {
0069       auto host_orig = cms::cuda::make_host_unique<int>(stream);
0070       *host_orig = 42;
0071
0072       auto device = cms::cuda::make_device_unique<int>(stream);
0073       auto host = cms::cuda::make_host_unique<int>(stream);
0074
0075       cudaCheck(cudaMemcpyAsync(device.get(), host_orig.get(), sizeof(int), cudaMemcpyHostToDevice, stream));
0076       cms::cuda::copyAsync(host, device, stream);
0077       cudaCheck(cudaStreamSynchronize(stream));
0078
0079       REQUIRE(*host == 42);
0080     }
0081
0082     SECTION("Multiple elements") {
0083       constexpr int N = 100;
0084
0085       auto host_orig = cms::cuda::make_host_unique<int[]>(N, stream);
0086       for (int i = 0; i < N; ++i) {
0087         host_orig[i] = i;
0088       }
0089
0090       auto device = cms::cuda::make_device_unique<int[]>(N, stream);
0091       auto host = cms::cuda::make_host_unique<int[]>(N, stream);
0092
0093       SECTION("Copy all") {
0094         cudaCheck(cudaMemcpyAsync(device.get(), host_orig.get(), N * sizeof(int), cudaMemcpyHostToDevice, stream));
0095         cms::cuda::copyAsync(host, device, N, stream);
0096         cudaCheck(cudaStreamSynchronize(stream));
0097         for (int i = 0; i < N; ++i) {
0098           CHECK(host[i] == i);
0099         }
0100       }
0101
0102       for (int i = 0; i < N; ++i) {
0103         host_orig[i] = 200 + i;
0104       }
0105
0106       SECTION("Copy some") {
0107         cudaCheck(cudaMemcpyAsync(device.get(), host_orig.get(), 42 * sizeof(int), cudaMemcpyHostToDevice, stream));
0108         cms::cuda::copyAsync(host, device, 42, stream);
0109         cudaCheck(cudaStreamSynchronize(stream));
0110         for (int i = 0; i < 42; ++i) {
0111           CHECK(host[i] == 200 + i);
0112         }
0113       }
0114     }
0115   }
0116
0117   cudaCheck(cudaStreamDestroy(stream));
0118 }