File indexing completed on 2024-04-06 12:15:40
0001 #ifndef HeterogeneousCore_AlpakaInterface_interface_OneToManyAssoc_h
0002 #define HeterogeneousCore_AlpakaInterface_interface_OneToManyAssoc_h
0003
0004 #include <algorithm>
0005 #include <cstddef>
0006 #include <cstdint>
0007 #include <type_traits>
0008
0009 #include <alpaka/alpaka.hpp>
0010
0011 #include "HeterogeneousCore/AlpakaInterface/interface/AtomicPairCounter.h"
0012 #include "HeterogeneousCore/AlpakaInterface/interface/FlexiStorage.h"
0013 #include "HeterogeneousCore/AlpakaInterface/interface/prefixScan.h"
0014 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0015
0016 namespace cms::alpakatools {
0017
0018 template <typename I,
0019 int32_t ONES,
0020 int32_t SIZE
0021 >
0022 class OneToManyAssocBase {
0023 public:
0024 using Counter = uint32_t;
0025
0026 using CountersOnly = OneToManyAssocBase<I, ONES, 0>;
0027
0028 using index_type = I;
0029
0030 struct View {
0031 OneToManyAssocBase *assoc = nullptr;
0032 Counter *offStorage = nullptr;
0033 index_type *contentStorage = nullptr;
0034 int32_t offSize = -1;
0035 int32_t contentSize = -1;
0036 };
0037
0038 static constexpr int32_t ctNOnes() { return ONES; }
0039 constexpr auto totOnes() const { return off.capacity(); }
0040 constexpr auto nOnes() const { return totOnes() - 1; }
0041 static constexpr int32_t ctCapacity() { return SIZE; }
0042 constexpr auto capacity() const { return content.capacity(); }
0043
0044 ALPAKA_FN_HOST_ACC void initStorage(View view) {
0045 ALPAKA_ASSERT_ACC(view.assoc == this);
0046 if constexpr (ctCapacity() < 0) {
0047 ALPAKA_ASSERT_ACC(view.contentStorage);
0048 ALPAKA_ASSERT_ACC(view.contentSize > 0);
0049 content.init(view.contentStorage, view.contentSize);
0050 }
0051 if constexpr (ctNOnes() < 0) {
0052 ALPAKA_ASSERT_ACC(view.offStorage);
0053 ALPAKA_ASSERT_ACC(view.offSize > 0);
0054 off.init(view.offStorage, view.offSize);
0055 }
0056 }
0057
0058 ALPAKA_FN_HOST_ACC void zero() {
0059 for (int32_t i = 0; i < totOnes(); ++i) {
0060 off[i] = 0;
0061 }
0062 }
0063
0064 template <typename TAcc>
0065 ALPAKA_FN_ACC ALPAKA_FN_INLINE void add(const TAcc &acc, CountersOnly const &co) {
0066 for (uint32_t i = 0; static_cast<int>(i) < totOnes(); ++i) {
0067 alpaka::atomicAdd(acc, off.data() + i, co.off[i], alpaka::hierarchy::Blocks{});
0068 }
0069 }
0070
0071 template <typename TAcc>
0072 ALPAKA_FN_ACC ALPAKA_FN_INLINE static uint32_t atomicIncrement(const TAcc &acc, Counter &x) {
0073 return alpaka::atomicAdd(acc, &x, 1u, alpaka::hierarchy::Blocks{});
0074 }
0075
0076 template <typename TAcc>
0077 ALPAKA_FN_ACC ALPAKA_FN_INLINE static uint32_t atomicDecrement(const TAcc &acc, Counter &x) {
0078 return alpaka::atomicSub(acc, &x, 1u, alpaka::hierarchy::Blocks{});
0079 }
0080
0081 template <typename TAcc>
0082 ALPAKA_FN_ACC ALPAKA_FN_INLINE void count(const TAcc &acc, I b) {
0083 ALPAKA_ASSERT_ACC(b < static_cast<uint32_t>(nOnes()));
0084 atomicIncrement(acc, off[b]);
0085 }
0086
0087 template <typename TAcc>
0088 ALPAKA_FN_ACC ALPAKA_FN_INLINE void fill(const TAcc &acc, I b, index_type j) {
0089 ALPAKA_ASSERT_ACC(b < static_cast<uint32_t>(nOnes()));
0090 auto w = atomicDecrement(acc, off[b]);
0091 ALPAKA_ASSERT_ACC(w > 0);
0092 content[w - 1] = j;
0093 }
0094
0095
0096 struct zeroAndInit {
0097 template <typename TAcc>
0098 ALPAKA_FN_ACC void operator()(const TAcc &acc, View view) const {
0099 ALPAKA_ASSERT_ACC((1 == alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[0]));
0100 ALPAKA_ASSERT_ACC((0 == alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[0]));
0101 auto h = view.assoc;
0102 if (cms::alpakatools::once_per_block(acc)) {
0103 h->psws = 0;
0104 h->initStorage(view);
0105 }
0106 alpaka::syncBlockThreads(acc);
0107 for (int i : cms::alpakatools::independent_group_elements(acc, h->totOnes())) {
0108 h->off[i] = 0;
0109 }
0110 }
0111 };
0112
0113 template <typename TAcc, typename TQueue>
0114 ALPAKA_FN_INLINE static void launchZero(OneToManyAssocBase *h, TQueue &queue) {
0115 View view = {h, nullptr, nullptr, -1, -1};
0116 launchZero<TAcc>(view, queue);
0117 }
0118
0119 template <typename TAcc, typename TQueue>
0120 ALPAKA_FN_INLINE static void launchZero(View view, TQueue &queue) {
0121 if constexpr (ctCapacity() < 0) {
0122 ALPAKA_ASSERT_ACC(view.contentStorage);
0123 ALPAKA_ASSERT_ACC(view.contentSize > 0);
0124 }
0125 if constexpr (ctNOnes() < 0) {
0126 ALPAKA_ASSERT_ACC(view.offStorage);
0127 ALPAKA_ASSERT_ACC(view.offSize > 0);
0128 }
0129 if constexpr (!requires_single_thread_per_block_v<TAcc>) {
0130 auto nthreads = 1024;
0131 auto nblocks = 1;
0132 auto workDiv = cms::alpakatools::make_workdiv<TAcc>(nblocks, nthreads);
0133 alpaka::exec<TAcc>(queue, workDiv, zeroAndInit{}, view);
0134 } else {
0135 auto h = view.assoc;
0136 ALPAKA_ASSERT_ACC(h);
0137 h->initStorage(view);
0138 h->zero();
0139 h->psws = 0;
0140 }
0141 }
0142
0143 constexpr auto size() const { return uint32_t(off[totOnes() - 1]); }
0144 constexpr auto size(uint32_t b) const { return off[b + 1] - off[b]; }
0145
0146 constexpr index_type const *begin() const { return content.data(); }
0147 constexpr index_type const *end() const { return begin() + size(); }
0148
0149 constexpr index_type const *begin(uint32_t b) const { return content.data() + off[b]; }
0150 constexpr index_type const *end(uint32_t b) const { return content.data() + off[b + 1]; }
0151
0152 FlexiStorage<Counter, ONES> off;
0153 FlexiStorage<index_type, SIZE> content;
0154 int32_t psws;
0155 };
0156
0157 template <typename I,
0158 int32_t ONES,
0159 int32_t SIZE
0160 >
0161 class OneToManyAssocSequential : public OneToManyAssocBase<I, ONES, SIZE> {
0162 public:
0163 using index_type = typename OneToManyAssocBase<I, ONES, SIZE>::index_type;
0164
0165 template <typename TAcc>
0166 ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE int32_t
0167 bulkFill(const TAcc &acc, AtomicPairCounter &apc, index_type const *v, uint32_t n) {
0168 auto c = apc.inc_add(acc, n);
0169 if (int(c.first) >= this->nOnes())
0170 return -int32_t(c.first);
0171 this->off[c.first] = c.second;
0172 for (uint32_t j = 0; j < n; ++j)
0173 this->content[c.second + j] = v[j];
0174 return c.first;
0175 }
0176
0177 ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE void bulkFinalize(AtomicPairCounter const &apc) {
0178 this->off[apc.get().first] = apc.get().second;
0179 }
0180
0181 template <typename TAcc>
0182 ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE void bulkFinalizeFill(TAcc &acc, AtomicPairCounter const &apc) {
0183 int f = apc.get().first;
0184 auto s = apc.get().second;
0185 if (f >= this->nOnes()) {
0186 this->off[this->nOnes()] = uint32_t(this->off[this->nOnes() - 1]);
0187 return;
0188 }
0189 auto first = f + alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0];
0190 for (int i = first; i < this->totOnes(); i += alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc)[0]) {
0191 this->off[i] = s;
0192 }
0193 }
0194
0195 struct finalizeBulk {
0196 template <typename TAcc>
0197 ALPAKA_FN_ACC void operator()(const TAcc &acc,
0198 AtomicPairCounter const *apc,
0199 OneToManyAssocSequential *__restrict__ assoc) const {
0200 assoc->bulkFinalizeFill(acc, *apc);
0201 }
0202 };
0203 };
0204
0205 template <typename I,
0206 int32_t ONES,
0207 int32_t SIZE
0208 >
0209 class OneToManyAssocRandomAccess : public OneToManyAssocBase<I, ONES, SIZE> {
0210 public:
0211 using Counter = typename OneToManyAssocBase<I, ONES, SIZE>::Counter;
0212 using View = typename OneToManyAssocBase<I, ONES, SIZE>::View;
0213
0214 template <typename TAcc>
0215 ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE void finalize(TAcc &acc, Counter *ws = nullptr) {
0216 ALPAKA_ASSERT_ACC(this->off[this->totOnes() - 1] == 0);
0217 blockPrefixScan(acc, this->off.data(), this->totOnes(), ws);
0218 ALPAKA_ASSERT_ACC(this->off[this->totOnes() - 1] == this->off[this->totOnes() - 2]);
0219 }
0220
0221 ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE void finalize() {
0222
0223 for (uint32_t i = 1; static_cast<int>(i) < this->totOnes(); ++i)
0224 this->off[i] += this->off[i - 1];
0225 }
0226
0227 template <typename TAcc, typename TQueue>
0228 ALPAKA_FN_INLINE static void launchFinalize(OneToManyAssocRandomAccess *h, TQueue &queue) {
0229 View view = {h, nullptr, nullptr, -1, -1};
0230 launchFinalize<TAcc>(view, queue);
0231 }
0232
0233 template <typename TAcc, typename TQueue>
0234 ALPAKA_FN_INLINE static void launchFinalize(View view, TQueue &queue) {
0235
0236 auto h = static_cast<OneToManyAssocRandomAccess *>(view.assoc);
0237 ALPAKA_ASSERT_ACC(h);
0238 if constexpr (!requires_single_thread_per_block_v<TAcc>) {
0239 Counter *poff = (Counter *)((char *)(h) + offsetof(OneToManyAssocRandomAccess, off));
0240 auto nOnes = OneToManyAssocRandomAccess::ctNOnes();
0241 if constexpr (OneToManyAssocRandomAccess::ctNOnes() < 0) {
0242 ALPAKA_ASSERT_ACC(view.offStorage);
0243 ALPAKA_ASSERT_ACC(view.offSize > 0);
0244 nOnes = view.offSize;
0245 poff = view.offStorage;
0246 }
0247 ALPAKA_ASSERT_ACC(nOnes > 0);
0248 int32_t *ppsws = (int32_t *)((char *)(h) + offsetof(OneToManyAssocRandomAccess, psws));
0249 auto nthreads = 1024;
0250 auto nblocks = (nOnes + nthreads - 1) / nthreads;
0251 auto workDiv = cms::alpakatools::make_workdiv<TAcc>(nblocks, nthreads);
0252 alpaka::exec<TAcc>(queue,
0253 workDiv,
0254 multiBlockPrefixScan<Counter>(),
0255 poff,
0256 poff,
0257 nOnes,
0258 nblocks,
0259 ppsws,
0260 alpaka::getPreferredWarpSize(alpaka::getDev(queue)));
0261 } else {
0262 h->finalize();
0263 }
0264 }
0265 };
0266
0267 }
0268
0269 #endif