1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
|
#ifndef HeterogeneousCore_AlpakaCore_interface_EventCache_h
#define HeterogeneousCore_AlpakaCore_interface_EventCache_h
#include <memory>
#include <utility>
#include <vector>
#include <alpaka/alpaka.hpp>
#include "FWCore/Utilities/interface/ReusableObjectHolder.h"
#include "FWCore/Utilities/interface/thread_safety_macros.h"
#include "HeterogeneousCore/AlpakaInterface/interface/config.h"
#include "HeterogeneousCore/AlpakaInterface/interface/devices.h"
#include "HeterogeneousCore/AlpakaInterface/interface/AlpakaServiceFwd.h"
namespace cms::alpakatools {
template <typename Event>
class EventCache {
public:
#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
friend class alpaka_cuda_async::AlpakaService;
#endif
#ifdef ALPAKA_ACC_GPU_HIP_ENABLED
friend class alpaka_rocm_async::AlpakaService;
#endif
#ifdef ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED
friend class alpaka_serial_sync::AlpakaService;
#endif
#ifdef ALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLED
friend class alpaka_tbb_async::AlpakaService;
#endif
using Device = alpaka::Dev<Event>;
using Platform = alpaka::Platform<Device>;
// EventCache should be constructed by the first call to
// getEventCache() only if we have any devices present
EventCache() : cache_(devices<Platform>().size()) {}
// Gets a (cached) event for the current device. The event
// will be returned to the cache by the shared_ptr destructor. The
// returned event is guaranteed to be in the state where all
// captured work has completed.
//
// This function is thread safe
std::shared_ptr<Event> get(Device dev) {
auto event = makeOrGet(dev);
// captured work has completed, or a just-created event
if (alpaka::isComplete(*event)) {
return event;
}
// Got an event with incomplete captured work. Try again until we
// get a completed (or a just-created) event. Need to keep all
// incomplete events until a completed event is found in order to
// avoid ping-pong with an incomplete event.
std::vector<std::shared_ptr<Event>> ptrs{std::move(event)};
bool completed;
do {
event = makeOrGet(dev);
completed = alpaka::isComplete(*event);
if (not completed) {
ptrs.emplace_back(std::move(event));
}
} while (not completed);
return event;
}
private:
std::shared_ptr<Event> makeOrGet(Device dev) {
return cache_[alpaka::getNativeHandle(dev)].makeOrGet([dev]() {
// We want non-busy waits
bool constexpr busyWait = false;
return std::make_unique<Event>(dev, busyWait);
});
}
// not thread safe, intended to be called only from AlpakaService
void clear() {
// Reset the contents of the caches, but leave an
// edm::ReusableObjectHolder alive for each device. This is needed
// mostly for the unit tests, where the function-static
// EventCache lives through multiple tests (and go through
// multiple shutdowns of the framework).
cache_.clear();
cache_.resize(devices<Platform>().size());
}
std::vector<edm::ReusableObjectHolder<Event>> cache_;
};
// Gets the global instance of a EventCache
// This function is thread safe
template <typename Event>
EventCache<Event>& getEventCache() {
// the public interface is thread safe
CMS_THREAD_SAFE static EventCache<Event> cache;
return cache;
}
} // namespace cms::alpakatools
#endif // HeterogeneousCore_AlpakaCore_interface_EventCache_h
|