AlpakaCore/interface/EventCache.h

0001 #ifndef HeterogeneousCore_AlpakaCore_interface_EventCache_h
0002 #define HeterogeneousCore_AlpakaCore_interface_EventCache_h
0003
0004 #include <memory>
0005 #include <utility>
0006 #include <vector>
0007
0008 #include <alpaka/alpaka.hpp>
0009
0010 #include "FWCore/Utilities/interface/ReusableObjectHolder.h"
0011 #include "FWCore/Utilities/interface/thread_safety_macros.h"
0012 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0013 #include "HeterogeneousCore/AlpakaInterface/interface/devices.h"
0014 #include "HeterogeneousCore/AlpakaInterface/interface/AlpakaServiceFwd.h"
0015
0016 namespace cms::alpakatools {
0017
0018   template <typename Event>
0019   class EventCache {
0020   public:
0021 #ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
0022     friend class alpaka_cuda_async::AlpakaService;
0023 #endif
0024 #ifdef ALPAKA_ACC_GPU_HIP_ENABLED
0025     friend class alpaka_rocm_async::AlpakaService;
0026 #endif
0027 #ifdef ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED
0028     friend class alpaka_serial_sync::AlpakaService;
0029 #endif
0030 #ifdef ALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLED
0031     friend class alpaka_tbb_async::AlpakaService;
0032 #endif
0033
0034     using Device = alpaka::Dev<Event>;
0035     using Platform = alpaka::Platform<Device>;
0036
0037     // EventCache should be constructed by the first call to
0038     // getEventCache() only if we have any devices present
0039     EventCache() : cache_(devices<Platform>().size()) {}
0040
0041     // Gets a (cached) event for the current device. The event
0042     // will be returned to the cache by the shared_ptr destructor. The
0043     // returned event is guaranteed to be in the state where all
0044     // captured work has completed.
0045     //
0046     // This function is thread safe
0047     std::shared_ptr<Event> get(Device dev) {
0048       auto event = makeOrGet(dev);
0049       // captured work has completed, or a just-created event
0050       if (alpaka::isComplete(*event)) {
0051         return event;
0052       }
0053
0054       // Got an event with incomplete captured work. Try again until we
0055       // get a completed (or a just-created) event. Need to keep all
0056       // incomplete events until a completed event is found in order to
0057       // avoid ping-pong with an incomplete event.
0058       std::vector<std::shared_ptr<Event>> ptrs{std::move(event)};
0059       bool completed;
0060       do {
0061         event = makeOrGet(dev);
0062         completed = alpaka::isComplete(*event);
0063         if (not completed) {
0064           ptrs.emplace_back(std::move(event));
0065         }
0066       } while (not completed);
0067       return event;
0068     }
0069
0070   private:
0071     std::shared_ptr<Event> makeOrGet(Device dev) {
0072       return cache_[alpaka::getNativeHandle(dev)].makeOrGet([dev]() {
0073         // We want non-busy waits
0074         bool constexpr busyWait = false;
0075         return std::make_unique<Event>(dev, busyWait);
0076       });
0077     }
0078
0079     // not thread safe, intended to be called only from AlpakaService
0080     void clear() {
0081       // Reset the contents of the caches, but leave an
0082       // edm::ReusableObjectHolder alive for each device. This is needed
0083       // mostly for the unit tests, where the function-static
0084       // EventCache lives through multiple tests (and go through
0085       // multiple shutdowns of the framework).
0086       cache_.clear();
0087       cache_.resize(devices<Platform>().size());
0088     }
0089
0090     std::vector<edm::ReusableObjectHolder<Event>> cache_;
0091   };
0092
0093   // Gets the global instance of a EventCache
0094   // This function is thread safe
0095   template <typename Event>
0096   EventCache<Event>& getEventCache() {
0097     // the public interface is thread safe
0098     CMS_THREAD_SAFE static EventCache<Event> cache;
0099     return cache;
0100   }
0101
0102 }  // namespace cms::alpakatools
0103
0104 #endif  // HeterogeneousCore_AlpakaCore_interface_EventCache_h