File indexing completed on 2024-04-06 12:15:43
0001 #ifndef HeterogeneousCore_CUDACore_ESProduct_h
0002 #define HeterogeneousCore_CUDACore_ESProduct_h
0003
0004 #include <atomic>
0005 #include <cassert>
0006 #include <mutex>
0007 #include <vector>
0008
0009 #include "FWCore/Utilities/interface/thread_safety_macros.h"
0010 #include "HeterogeneousCore/CUDAServices/interface/numberOfDevices.h"
0011 #include "HeterogeneousCore/CUDAUtilities/interface/EventCache.h"
0012 #include "HeterogeneousCore/CUDAUtilities/interface/ScopedSetDevice.h"
0013 #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
0014 #include "HeterogeneousCore/CUDAUtilities/interface/currentDevice.h"
0015 #include "HeterogeneousCore/CUDAUtilities/interface/eventWorkHasCompleted.h"
0016
0017 namespace cms {
0018 namespace cuda {
0019 template <typename T>
0020 class ESProduct {
0021 public:
0022 ESProduct() : gpuDataPerDevice_(numberOfDevices()) {
0023 if (not gpuDataPerDevice_.empty()) {
0024 cms::cuda::ScopedSetDevice scopedDevice;
0025 for (size_t i = 0; i < gpuDataPerDevice_.size(); ++i) {
0026 scopedDevice.set(i);
0027 gpuDataPerDevice_[i].m_event = getEventCache().get();
0028 }
0029 }
0030 }
0031
0032 ~ESProduct() = default;
0033
0034
0035
0036
0037 template <typename F>
0038 const T& dataForCurrentDeviceAsync(cudaStream_t cudaStream, F transferAsync) const {
0039 int device = currentDevice();
0040 auto& data = gpuDataPerDevice_[device];
0041
0042
0043 if (not data.m_filled.load()) {
0044
0045 std::scoped_lock<std::mutex> lk{data.m_mutex};
0046
0047 if (data.m_filled.load()) {
0048
0049 return data.m_data;
0050 }
0051
0052 if (data.m_fillingStream != nullptr) {
0053
0054
0055
0056 if (eventWorkHasCompleted(data.m_event.get())) {
0057
0058
0059
0060 auto should_be_false = data.m_filled.exchange(true);
0061 assert(not should_be_false);
0062 data.m_fillingStream = nullptr;
0063 } else if (data.m_fillingStream != cudaStream) {
0064
0065
0066
0067
0068 cudaCheck(cudaStreamWaitEvent(cudaStream, data.m_event.get(), 0),
0069 "Failed to make a stream to wait for an event");
0070 }
0071
0072
0073
0074
0075
0076 } else {
0077
0078
0079 transferAsync(data.m_data, cudaStream);
0080 assert(data.m_fillingStream == nullptr);
0081 data.m_fillingStream = cudaStream;
0082
0083
0084 cudaCheck(cudaEventRecord(data.m_event.get(), cudaStream));
0085
0086
0087
0088
0089 }
0090 }
0091
0092 return data.m_data;
0093 }
0094
0095 private:
0096 struct Item {
0097 mutable std::mutex m_mutex;
0098 CMS_THREAD_GUARD(m_mutex) mutable SharedEventPtr m_event;
0099
0100 CMS_THREAD_GUARD(m_mutex) mutable cudaStream_t m_fillingStream = nullptr;
0101 mutable std::atomic<bool> m_filled = false;
0102 CMS_THREAD_GUARD(m_mutex) mutable T m_data;
0103 };
0104
0105 std::vector<Item> gpuDataPerDevice_;
0106 };
0107 }
0108 }
0109
0110 #endif