File indexing completed on 2021-03-09 04:50:20
0001 #include "catch.hpp"
0002
0003 #include "CUDADataFormats/Common/interface/Product.h"
0004 #include "FWCore/Concurrency/interface/WaitingTask.h"
0005 #include "FWCore/ParameterSet/interface/ParameterSet.h"
0006 #include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
0007 #include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
0008 #include "HeterogeneousCore/CUDACore/interface/ScopedContext.h"
0009 #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
0010 #include "HeterogeneousCore/CUDAUtilities/interface/eventWorkHasCompleted.h"
0011 #include "HeterogeneousCore/CUDAUtilities/interface/requireDevices.h"
0012 #include "HeterogeneousCore/CUDAUtilities/interface/StreamCache.h"
0013 #include "HeterogeneousCore/CUDAUtilities/interface/EventCache.h"
0014 #include "HeterogeneousCore/CUDAUtilities/interface/currentDevice.h"
0015 #include "HeterogeneousCore/CUDAUtilities/interface/ScopedSetDevice.h"
0016
0017 #include "test_ScopedContextKernels.h"
0018
0019 namespace cms::cudatest {
0020 class TestScopedContext {
0021 public:
0022 static cuda::ScopedContextProduce make(int dev, bool createEvent) {
0023 cms::cuda::SharedEventPtr event;
0024 if (createEvent) {
0025 event = cms::cuda::getEventCache().get();
0026 }
0027 return cuda::ScopedContextProduce(dev, cms::cuda::getStreamCache().get(), std::move(event));
0028 }
0029 };
0030 }
0031
0032 namespace {
0033 std::unique_ptr<cms::cuda::Product<int*>> produce(int device, int* d, int* h) {
0034 auto ctx = cms::cudatest::TestScopedContext::make(device, true);
0035 cudaCheck(cudaMemcpyAsync(d, h, sizeof(int), cudaMemcpyHostToDevice, ctx.stream()));
0036 cms::cudatest::testScopedContextKernels_single(d, ctx.stream());
0037 return ctx.wrap(d);
0038 }
0039 }
0040
0041 TEST_CASE("Use of cms::cuda::ScopedContext", "[CUDACore]") {
0042 if (not cms::cudatest::testDevices()) {
0043 return;
0044 }
0045
0046 constexpr int defaultDevice = 0;
0047 {
0048 auto ctx = cms::cudatest::TestScopedContext::make(defaultDevice, true);
0049
0050 SECTION("Construct from device ID") { REQUIRE(cms::cuda::currentDevice() == defaultDevice); }
0051
0052 SECTION("Wrap T to cms::cuda::Product<T>") {
0053 std::unique_ptr<cms::cuda::Product<int>> dataPtr = ctx.wrap(10);
0054 REQUIRE(dataPtr.get() != nullptr);
0055 REQUIRE(dataPtr->device() == ctx.device());
0056 REQUIRE(dataPtr->stream() == ctx.stream());
0057 }
0058
0059 SECTION("Construct from from cms::cuda::Product<T>") {
0060 std::unique_ptr<cms::cuda::Product<int>> dataPtr = ctx.wrap(10);
0061 const auto& data = *dataPtr;
0062
0063 cms::cuda::ScopedContextProduce ctx2{data};
0064 REQUIRE(cms::cuda::currentDevice() == data.device());
0065 REQUIRE(ctx2.stream() == data.stream());
0066
0067
0068 cms::cuda::ScopedContextProduce ctx3{data};
0069 REQUIRE(cms::cuda::currentDevice() == data.device());
0070 REQUIRE(ctx3.stream() != data.stream());
0071 }
0072
0073 SECTION("Storing state in cms::cuda::ContextState") {
0074 cms::cuda::ContextState ctxstate;
0075 {
0076 std::unique_ptr<cms::cuda::Product<int>> dataPtr = ctx.wrap(10);
0077 const auto& data = *dataPtr;
0078 tbb::task_group group;
0079 edm::WaitingTaskWithArenaHolder dummy{group, edm::make_waiting_task([](std::exception_ptr const* iPtr) {})};
0080 cms::cuda::ScopedContextAcquire ctx2{data, std::move(dummy), ctxstate};
0081 }
0082
0083 {
0084 cms::cuda::ScopedContextProduce ctx2{ctxstate};
0085 REQUIRE(cms::cuda::currentDevice() == ctx.device());
0086 REQUIRE(ctx2.stream() == ctx.stream());
0087 }
0088 }
0089
0090 SECTION("Joining multiple CUDA streams") {
0091 cms::cuda::ScopedSetDevice setDeviceForThisScope(defaultDevice);
0092
0093
0094 int h_a1 = 1;
0095 auto d_a1 = cms::cuda::make_device_unique<int>(nullptr);
0096 auto wprod1 = produce(defaultDevice, d_a1.get(), &h_a1);
0097
0098
0099 int h_a2 = 2;
0100 auto d_a2 = cms::cuda::make_device_unique<int>(nullptr);
0101 auto wprod2 = produce(defaultDevice, d_a2.get(), &h_a2);
0102
0103 REQUIRE(wprod1->stream() != wprod2->stream());
0104
0105
0106 cms::cuda::ScopedContextProduce ctx2{*wprod1};
0107
0108 auto prod1 = ctx2.get(*wprod1);
0109 auto prod2 = ctx2.get(*wprod2);
0110
0111 auto d_a3 = cms::cuda::make_device_unique<int>(nullptr);
0112 cms::cudatest::testScopedContextKernels_join(prod1, prod2, d_a3.get(), ctx2.stream());
0113 cudaCheck(cudaStreamSynchronize(ctx2.stream()));
0114 REQUIRE(wprod2->isAvailable());
0115 REQUIRE(cms::cuda::eventWorkHasCompleted(wprod2->event()));
0116
0117 h_a1 = 0;
0118 h_a2 = 0;
0119 int h_a3 = 0;
0120
0121 cudaCheck(cudaMemcpyAsync(&h_a1, d_a1.get(), sizeof(int), cudaMemcpyDeviceToHost, ctx.stream()));
0122 cudaCheck(cudaMemcpyAsync(&h_a2, d_a2.get(), sizeof(int), cudaMemcpyDeviceToHost, ctx.stream()));
0123 cudaCheck(cudaMemcpyAsync(&h_a3, d_a3.get(), sizeof(int), cudaMemcpyDeviceToHost, ctx.stream()));
0124
0125 REQUIRE(h_a1 == 2);
0126 REQUIRE(h_a2 == 4);
0127 REQUIRE(h_a3 == 6);
0128 }
0129 }
0130
0131 cudaCheck(cudaSetDevice(defaultDevice));
0132 cudaCheck(cudaDeviceSynchronize());
0133
0134 }