File indexing completed on 2024-04-06 12:15:40
0001 #ifndef HeterogeneousCore_AlpakaInterface_interface_HistoContainer_h
0002 #define HeterogeneousCore_AlpakaInterface_interface_HistoContainer_h
0003
0004 #include <algorithm>
0005 #include <cstddef>
0006 #include <cstdint>
0007 #include <type_traits>
0008
0009 #include <alpaka/alpaka.hpp>
0010
0011 #include "HeterogeneousCore/AlpakaInterface/interface/AtomicPairCounter.h"
0012 #include "HeterogeneousCore/AlpakaInterface/interface/OneToManyAssoc.h"
0013 #include "HeterogeneousCore/AlpakaInterface/interface/alpakastdAlgorithm.h"
0014 #include "HeterogeneousCore/AlpakaInterface/interface/memory.h"
0015 #include "HeterogeneousCore/AlpakaInterface/interface/prefixScan.h"
0016 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0017
0018 namespace cms::alpakatools {
0019
0020 struct countFromVector {
0021 template <typename TAcc, typename Histo, typename T>
0022 ALPAKA_FN_ACC void operator()(const TAcc &acc,
0023 Histo *__restrict__ h,
0024 uint32_t nh,
0025 T const *__restrict__ v,
0026 uint32_t const *__restrict__ offsets) const {
0027 const uint32_t nt = offsets[nh];
0028 for (uint32_t i : uniform_elements(acc, nt)) {
0029 auto off = alpaka_std::upper_bound(offsets, offsets + nh + 1, i);
0030 ALPAKA_ASSERT_ACC((*off) > 0);
0031 int32_t ih = off - offsets - 1;
0032 ALPAKA_ASSERT_ACC(ih >= 0);
0033 ALPAKA_ASSERT_ACC(ih < int(nh));
0034 h->count(acc, v[i], ih);
0035 }
0036 }
0037 };
0038
0039 struct fillFromVector {
0040 template <typename TAcc, typename Histo, typename T>
0041 ALPAKA_FN_ACC void operator()(const TAcc &acc,
0042 Histo *__restrict__ h,
0043 uint32_t nh,
0044 T const *__restrict__ v,
0045 uint32_t const *__restrict__ offsets) const {
0046 const uint32_t nt = offsets[nh];
0047 for (uint32_t i : uniform_elements(acc, nt)) {
0048 auto off = alpaka_std::upper_bound(offsets, offsets + nh + 1, i);
0049 ALPAKA_ASSERT_ACC((*off) > 0);
0050 int32_t ih = off - offsets - 1;
0051 ALPAKA_ASSERT_ACC(ih >= 0);
0052 ALPAKA_ASSERT_ACC(ih < int(nh));
0053 h->fill(acc, v[i], i, ih);
0054 }
0055 }
0056 };
0057
0058 template <typename TAcc, typename Histo, typename T, typename TQueue>
0059 ALPAKA_FN_INLINE void fillManyFromVector(Histo *__restrict__ h,
0060 uint32_t nh,
0061 T const *__restrict__ v,
0062 uint32_t const *__restrict__ offsets,
0063 uint32_t totSize,
0064 uint32_t nthreads,
0065 TQueue &queue) {
0066 Histo::template launchZero<TAcc>(h, queue);
0067
0068 const auto threadsPerBlockOrElementsPerThread = nthreads;
0069 const auto blocksPerGrid = divide_up_by(totSize, nthreads);
0070 const auto workDiv = make_workdiv<TAcc>(blocksPerGrid, threadsPerBlockOrElementsPerThread);
0071
0072 alpaka::exec<TAcc>(queue, workDiv, countFromVector(), h, nh, v, offsets);
0073 Histo::template launchFinalize<TAcc>(h, queue);
0074
0075 alpaka::exec<TAcc>(queue, workDiv, fillFromVector(), h, nh, v, offsets);
0076 }
0077
0078 template <typename TAcc, typename Histo, typename T, typename TQueue>
0079 ALPAKA_FN_INLINE void fillManyFromVector(Histo *__restrict__ h,
0080 typename Histo::View hv,
0081 uint32_t nh,
0082 T const *__restrict__ v,
0083 uint32_t const *__restrict__ offsets,
0084 uint32_t totSize,
0085 uint32_t nthreads,
0086 TQueue &queue) {
0087 Histo::template launchZero<TAcc>(hv, queue);
0088
0089 const auto threadsPerBlockOrElementsPerThread = nthreads;
0090 const auto blocksPerGrid = divide_up_by(totSize, nthreads);
0091 const auto workDiv = make_workdiv<TAcc>(blocksPerGrid, threadsPerBlockOrElementsPerThread);
0092
0093 alpaka::exec<TAcc>(queue, workDiv, countFromVector(), h, nh, v, offsets);
0094 Histo::template launchFinalize<TAcc>(h, queue);
0095
0096 alpaka::exec<TAcc>(queue, workDiv, fillFromVector(), h, nh, v, offsets);
0097 }
0098
0099
0100 template <typename Hist, typename V, typename Func>
0101 ALPAKA_FN_ACC ALPAKA_FN_INLINE void forEachInBins(Hist const &hist, V value, int n, Func func) {
0102 int bs = Hist::bin(value);
0103 int be = std::min(int(Hist::nbins() - 1), bs + n);
0104 bs = std::max(0, bs - n);
0105 ALPAKA_ASSERT_ACC(be >= bs);
0106 for (auto pj = hist.begin(bs); pj < hist.end(be); ++pj) {
0107 func(*pj);
0108 }
0109 }
0110
0111
0112 template <typename Hist, typename V, typename Func>
0113 ALPAKA_FN_ACC ALPAKA_FN_INLINE void forEachInWindow(Hist const &hist, V wmin, V wmax, Func const &func) {
0114 auto bs = Hist::bin(wmin);
0115 auto be = Hist::bin(wmax);
0116 ALPAKA_ASSERT_ACC(be >= bs);
0117 for (auto pj = hist.begin(bs); pj < hist.end(be); ++pj) {
0118 func(*pj);
0119 }
0120 }
0121
0122 template <typename T,
0123 uint32_t NBINS,
0124 int32_t SIZE,
0125 uint32_t S = sizeof(T) * 8,
0126 typename I = uint32_t,
0127 uint32_t NHISTS = 1
0128 >
0129 class HistoContainer : public OneToManyAssocRandomAccess<I, NHISTS * NBINS + 1, SIZE> {
0130 public:
0131 using Base = OneToManyAssocRandomAccess<I, NHISTS * NBINS + 1, SIZE>;
0132 using View = typename Base::View;
0133 using Counter = typename Base::Counter;
0134 using index_type = typename Base::index_type;
0135 using UT = typename std::make_unsigned<T>::type;
0136
0137 static constexpr uint32_t ilog2(uint32_t v) {
0138 constexpr uint32_t b[] = {0x2, 0xC, 0xF0, 0xFF00, 0xFFFF0000};
0139 constexpr uint32_t s[] = {1, 2, 4, 8, 16};
0140
0141 uint32_t r = 0;
0142 for (auto i = 4; i >= 0; i--)
0143 if (v & b[i]) {
0144 v >>= s[i];
0145 r |= s[i];
0146 }
0147 return r;
0148 }
0149
0150 static constexpr uint32_t sizeT() { return S; }
0151 static constexpr int32_t nhists() { return NHISTS; }
0152 static constexpr uint32_t nbins() { return NBINS; }
0153 static constexpr uint32_t totbins() { return NHISTS * NBINS + 1; }
0154 static constexpr uint32_t nbits() { return ilog2(NBINS - 1) + 1; }
0155
0156 static constexpr auto histOff(uint32_t nh) { return NBINS * nh; }
0157
0158 static constexpr UT bin(T t) {
0159 constexpr uint32_t shift = sizeT() - nbits();
0160 constexpr uint32_t mask = (1 << nbits()) - 1;
0161 return (t >> shift) & mask;
0162 }
0163
0164 template <typename TAcc>
0165 ALPAKA_FN_ACC ALPAKA_FN_INLINE void count(const TAcc &acc, T t) {
0166 uint32_t b = bin(t);
0167 ALPAKA_ASSERT_ACC(b < nbins());
0168 Base::atomicIncrement(acc, this->off[b]);
0169 }
0170
0171 template <typename TAcc>
0172 ALPAKA_FN_ACC ALPAKA_FN_INLINE void fill(const TAcc &acc, T t, index_type j) {
0173 uint32_t b = bin(t);
0174 ALPAKA_ASSERT_ACC(b < nbins());
0175 auto w = Base::atomicDecrement(acc, this->off[b]);
0176 ALPAKA_ASSERT_ACC(w > 0);
0177 this->content[w - 1] = j;
0178 }
0179
0180 template <typename TAcc>
0181 ALPAKA_FN_ACC ALPAKA_FN_INLINE void count(const TAcc &acc, T t, uint32_t nh) {
0182 uint32_t b = bin(t);
0183 ALPAKA_ASSERT_ACC(b < nbins());
0184 b += histOff(nh);
0185 ALPAKA_ASSERT_ACC(b < totbins());
0186 Base::atomicIncrement(acc, this->off[b]);
0187 }
0188
0189 template <typename TAcc>
0190 ALPAKA_FN_ACC ALPAKA_FN_INLINE void fill(const TAcc &acc, T t, index_type j, uint32_t nh) {
0191 uint32_t b = bin(t);
0192 ALPAKA_ASSERT_ACC(b < nbins());
0193 b += histOff(nh);
0194 ALPAKA_ASSERT_ACC(b < totbins());
0195 auto w = Base::atomicDecrement(acc, this->off[b]);
0196 ALPAKA_ASSERT_ACC(w > 0);
0197 this->content[w - 1] = j;
0198 }
0199 };
0200 }
0201 #endif