Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:03:45

0001 #ifndef CUDADataFormatsCommonHeterogeneousSoA_H
0002 #define CUDADataFormatsCommonHeterogeneousSoA_H
0003 
0004 #include <cassert>
0005 
0006 #include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h"
0007 #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
0008 #include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
0009 #include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"
0010 
0011 // a heterogeneous unique pointer...
0012 template <typename T>
0013 class HeterogeneousSoA {
0014 public:
0015   using Product = T;
0016 
0017   HeterogeneousSoA() = default;  // make root happy
0018   ~HeterogeneousSoA() = default;
0019   HeterogeneousSoA(HeterogeneousSoA &&) = default;
0020   HeterogeneousSoA &operator=(HeterogeneousSoA &&) = default;
0021 
0022   explicit HeterogeneousSoA(cms::cuda::device::unique_ptr<T> &&p) : dm_ptr(std::move(p)) {}
0023   explicit HeterogeneousSoA(cms::cuda::host::unique_ptr<T> &&p) : hm_ptr(std::move(p)) {}
0024   explicit HeterogeneousSoA(std::unique_ptr<T> &&p) : std_ptr(std::move(p)) {}
0025 
0026   auto const *get() const { return dm_ptr ? dm_ptr.get() : (hm_ptr ? hm_ptr.get() : std_ptr.get()); }
0027 
0028   auto const &operator*() const { return *get(); }
0029 
0030   auto const *operator->() const { return get(); }
0031 
0032   auto *get() { return dm_ptr ? dm_ptr.get() : (hm_ptr ? hm_ptr.get() : std_ptr.get()); }
0033 
0034   auto &operator*() { return *get(); }
0035 
0036   auto *operator->() { return get(); }
0037 
0038   // in reality valid only for GPU version...
0039   cms::cuda::host::unique_ptr<T> toHostAsync(cudaStream_t stream) const {
0040     assert(dm_ptr);
0041     auto ret = cms::cuda::make_host_unique<T>(stream);
0042     cudaCheck(cudaMemcpyAsync(ret.get(), dm_ptr.get(), sizeof(T), cudaMemcpyDefault, stream));
0043     return ret;
0044   }
0045 
0046 private:
0047   // a union wan't do it, a variant will not be more efficienct
0048   cms::cuda::device::unique_ptr<T> dm_ptr;  //!
0049   cms::cuda::host::unique_ptr<T> hm_ptr;    //!
0050   std::unique_ptr<T> std_ptr;               //!
0051 };
0052 
0053 namespace cms {
0054   namespace cudacompat {
0055 
0056     struct GPUTraits {
0057       template <typename T>
0058       using unique_ptr = cms::cuda::device::unique_ptr<T>;
0059 
0060       template <typename T>
0061       static auto make_unique(cudaStream_t stream) {
0062         return cms::cuda::make_device_unique<T>(stream);
0063       }
0064 
0065       template <typename T>
0066       static auto make_unique(size_t size, cudaStream_t stream) {
0067         return cms::cuda::make_device_unique<T>(size, stream);
0068       }
0069 
0070       template <typename T>
0071       static auto make_host_unique(cudaStream_t stream) {
0072         return cms::cuda::make_host_unique<T>(stream);
0073       }
0074 
0075       template <typename T>
0076       static auto make_device_unique(cudaStream_t stream) {
0077         return cms::cuda::make_device_unique<T>(stream);
0078       }
0079 
0080       template <typename T>
0081       static auto make_device_unique(size_t size, cudaStream_t stream) {
0082         return cms::cuda::make_device_unique<T>(size, stream);
0083       }
0084     };
0085 
0086     struct HostTraits {
0087       template <typename T>
0088       using unique_ptr = cms::cuda::host::unique_ptr<T>;
0089 
0090       template <typename T>
0091       static auto make_unique(cudaStream_t stream) {
0092         return cms::cuda::make_host_unique<T>(stream);
0093       }
0094 
0095       template <typename T>
0096       static auto make_unique(size_t size, cudaStream_t stream) {
0097         return cms::cuda::make_host_unique<T>(size, stream);
0098       }
0099 
0100       template <typename T>
0101       static auto make_host_unique(cudaStream_t stream) {
0102         return cms::cuda::make_host_unique<T>(stream);
0103       }
0104 
0105       template <typename T>
0106       static auto make_device_unique(cudaStream_t stream) {
0107         return cms::cuda::make_device_unique<T>(stream);
0108       }
0109 
0110       template <typename T>
0111       static auto make_device_unique(size_t size, cudaStream_t stream) {
0112         return cms::cuda::make_device_unique<T>(size, stream);
0113       }
0114     };
0115 
0116     struct CPUTraits {
0117       template <typename T>
0118       using unique_ptr = std::unique_ptr<T>;
0119 
0120       template <typename T>
0121       static auto make_unique(cudaStream_t) {
0122         return std::make_unique<T>();
0123       }
0124 
0125       template <typename T>
0126       static auto make_unique(size_t size, cudaStream_t) {
0127         return std::make_unique<T>(size);
0128       }
0129 
0130       template <typename T>
0131       static auto make_host_unique(cudaStream_t) {
0132         return std::make_unique<T>();
0133       }
0134 
0135       template <typename T>
0136       static auto make_device_unique(cudaStream_t) {
0137         return std::make_unique<T>();
0138       }
0139 
0140       template <typename T>
0141       static auto make_device_unique(size_t size, cudaStream_t) {
0142         return std::make_unique<T>(size);
0143       }
0144     };
0145 
0146   }  // namespace cudacompat
0147 }  // namespace cms
0148 
0149 // a heterogeneous unique pointer (of a different sort) ...
0150 template <typename T, typename Traits>
0151 class HeterogeneousSoAImpl {
0152 public:
0153   template <typename V>
0154   using unique_ptr = typename Traits::template unique_ptr<V>;
0155 
0156   HeterogeneousSoAImpl() = default;  // make root happy
0157   ~HeterogeneousSoAImpl() = default;
0158   HeterogeneousSoAImpl(HeterogeneousSoAImpl &&) = default;
0159   HeterogeneousSoAImpl &operator=(HeterogeneousSoAImpl &&) = default;
0160 
0161   explicit HeterogeneousSoAImpl(unique_ptr<T> &&p) : m_ptr(std::move(p)) {}
0162   explicit HeterogeneousSoAImpl(cudaStream_t stream);
0163 
0164   T const *get() const { return m_ptr.get(); }
0165 
0166   T *get() { return m_ptr.get(); }
0167 
0168   cms::cuda::host::unique_ptr<T> toHostAsync(cudaStream_t stream) const;
0169 
0170 private:
0171   unique_ptr<T> m_ptr;  //!
0172 };
0173 
0174 template <typename T, typename Traits>
0175 HeterogeneousSoAImpl<T, Traits>::HeterogeneousSoAImpl(cudaStream_t stream) {
0176   m_ptr = Traits::template make_unique<T>(stream);
0177 }
0178 
0179 // in reality valid only for GPU version...
0180 template <typename T, typename Traits>
0181 cms::cuda::host::unique_ptr<T> HeterogeneousSoAImpl<T, Traits>::toHostAsync(cudaStream_t stream) const {
0182   auto ret = cms::cuda::make_host_unique<T>(stream);
0183   cudaCheck(cudaMemcpyAsync(ret.get(), get(), sizeof(T), cudaMemcpyDefault, stream));
0184   return ret;
0185 }
0186 
0187 template <typename T>
0188 using HeterogeneousSoAGPU = HeterogeneousSoAImpl<T, cms::cudacompat::GPUTraits>;
0189 template <typename T>
0190 using HeterogeneousSoACPU = HeterogeneousSoAImpl<T, cms::cudacompat::CPUTraits>;
0191 template <typename T>
0192 using HeterogeneousSoAHost = HeterogeneousSoAImpl<T, cms::cudacompat::HostTraits>;
0193 
0194 #endif