Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:15:40

0001 #ifndef HeterogeneousCore_AlpakaInterface_interface_HistoContainer_h
0002 #define HeterogeneousCore_AlpakaInterface_interface_HistoContainer_h
0003 
0004 #include <algorithm>
0005 #include <cstddef>
0006 #include <cstdint>
0007 #include <type_traits>
0008 
0009 #include <alpaka/alpaka.hpp>
0010 
0011 #include "HeterogeneousCore/AlpakaInterface/interface/AtomicPairCounter.h"
0012 #include "HeterogeneousCore/AlpakaInterface/interface/OneToManyAssoc.h"
0013 #include "HeterogeneousCore/AlpakaInterface/interface/alpakastdAlgorithm.h"
0014 #include "HeterogeneousCore/AlpakaInterface/interface/memory.h"
0015 #include "HeterogeneousCore/AlpakaInterface/interface/prefixScan.h"
0016 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0017 
0018 namespace cms::alpakatools {
0019 
0020   struct countFromVector {
0021     template <typename TAcc, typename Histo, typename T>
0022     ALPAKA_FN_ACC void operator()(const TAcc &acc,
0023                                   Histo *__restrict__ h,
0024                                   uint32_t nh,
0025                                   T const *__restrict__ v,
0026                                   uint32_t const *__restrict__ offsets) const {
0027       const uint32_t nt = offsets[nh];
0028       for (uint32_t i : uniform_elements(acc, nt)) {
0029         auto off = alpaka_std::upper_bound(offsets, offsets + nh + 1, i);
0030         ALPAKA_ASSERT_ACC((*off) > 0);
0031         int32_t ih = off - offsets - 1;
0032         ALPAKA_ASSERT_ACC(ih >= 0);
0033         ALPAKA_ASSERT_ACC(ih < int(nh));
0034         h->count(acc, v[i], ih);
0035       }
0036     }
0037   };
0038 
0039   struct fillFromVector {
0040     template <typename TAcc, typename Histo, typename T>
0041     ALPAKA_FN_ACC void operator()(const TAcc &acc,
0042                                   Histo *__restrict__ h,
0043                                   uint32_t nh,
0044                                   T const *__restrict__ v,
0045                                   uint32_t const *__restrict__ offsets) const {
0046       const uint32_t nt = offsets[nh];
0047       for (uint32_t i : uniform_elements(acc, nt)) {
0048         auto off = alpaka_std::upper_bound(offsets, offsets + nh + 1, i);
0049         ALPAKA_ASSERT_ACC((*off) > 0);
0050         int32_t ih = off - offsets - 1;
0051         ALPAKA_ASSERT_ACC(ih >= 0);
0052         ALPAKA_ASSERT_ACC(ih < int(nh));
0053         h->fill(acc, v[i], i, ih);
0054       }
0055     }
0056   };
0057 
0058   template <typename TAcc, typename Histo, typename T, typename TQueue>
0059   ALPAKA_FN_INLINE void fillManyFromVector(Histo *__restrict__ h,
0060                                            uint32_t nh,
0061                                            T const *__restrict__ v,
0062                                            uint32_t const *__restrict__ offsets,
0063                                            uint32_t totSize,
0064                                            uint32_t nthreads,
0065                                            TQueue &queue) {
0066     Histo::template launchZero<TAcc>(h, queue);
0067 
0068     const auto threadsPerBlockOrElementsPerThread = nthreads;
0069     const auto blocksPerGrid = divide_up_by(totSize, nthreads);
0070     const auto workDiv = make_workdiv<TAcc>(blocksPerGrid, threadsPerBlockOrElementsPerThread);
0071 
0072     alpaka::exec<TAcc>(queue, workDiv, countFromVector(), h, nh, v, offsets);
0073     Histo::template launchFinalize<TAcc>(h, queue);
0074 
0075     alpaka::exec<TAcc>(queue, workDiv, fillFromVector(), h, nh, v, offsets);
0076   }
0077 
0078   template <typename TAcc, typename Histo, typename T, typename TQueue>
0079   ALPAKA_FN_INLINE void fillManyFromVector(Histo *__restrict__ h,
0080                                            typename Histo::View hv,
0081                                            uint32_t nh,
0082                                            T const *__restrict__ v,
0083                                            uint32_t const *__restrict__ offsets,
0084                                            uint32_t totSize,
0085                                            uint32_t nthreads,
0086                                            TQueue &queue) {
0087     Histo::template launchZero<TAcc>(hv, queue);
0088 
0089     const auto threadsPerBlockOrElementsPerThread = nthreads;
0090     const auto blocksPerGrid = divide_up_by(totSize, nthreads);
0091     const auto workDiv = make_workdiv<TAcc>(blocksPerGrid, threadsPerBlockOrElementsPerThread);
0092 
0093     alpaka::exec<TAcc>(queue, workDiv, countFromVector(), h, nh, v, offsets);
0094     Histo::template launchFinalize<TAcc>(h, queue);
0095 
0096     alpaka::exec<TAcc>(queue, workDiv, fillFromVector(), h, nh, v, offsets);
0097   }
0098 
0099   // iteratate over N bins left and right of the one containing "v"
0100   template <typename Hist, typename V, typename Func>
0101   ALPAKA_FN_ACC ALPAKA_FN_INLINE void forEachInBins(Hist const &hist, V value, int n, Func func) {
0102     int bs = Hist::bin(value);
0103     int be = std::min(int(Hist::nbins() - 1), bs + n);
0104     bs = std::max(0, bs - n);
0105     ALPAKA_ASSERT_ACC(be >= bs);
0106     for (auto pj = hist.begin(bs); pj < hist.end(be); ++pj) {
0107       func(*pj);
0108     }
0109   }
0110 
0111   // iteratate over bins containing all values in window wmin, wmax
0112   template <typename Hist, typename V, typename Func>
0113   ALPAKA_FN_ACC ALPAKA_FN_INLINE void forEachInWindow(Hist const &hist, V wmin, V wmax, Func const &func) {
0114     auto bs = Hist::bin(wmin);
0115     auto be = Hist::bin(wmax);
0116     ALPAKA_ASSERT_ACC(be >= bs);
0117     for (auto pj = hist.begin(bs); pj < hist.end(be); ++pj) {
0118       func(*pj);
0119     }
0120   }
0121 
0122   template <typename T,      // the type of the discretized input values
0123             uint32_t NBINS,  // number of bins
0124             int32_t SIZE,    // max number of element. If -1 is initialized at runtime using external storage
0125             uint32_t S = sizeof(T) * 8,  // number of significant bits in T
0126             typename I = uint32_t,  // type stored in the container (usually an index in a vector of the input values)
0127             uint32_t NHISTS = 1     // number of histos stored
0128             >
0129   class HistoContainer : public OneToManyAssocRandomAccess<I, NHISTS * NBINS + 1, SIZE> {
0130   public:
0131     using Base = OneToManyAssocRandomAccess<I, NHISTS * NBINS + 1, SIZE>;
0132     using View = typename Base::View;
0133     using Counter = typename Base::Counter;
0134     using index_type = typename Base::index_type;
0135     using UT = typename std::make_unsigned<T>::type;
0136 
0137     static constexpr uint32_t ilog2(uint32_t v) {
0138       constexpr uint32_t b[] = {0x2, 0xC, 0xF0, 0xFF00, 0xFFFF0000};
0139       constexpr uint32_t s[] = {1, 2, 4, 8, 16};
0140 
0141       uint32_t r = 0;  // result of log2(v) will go here
0142       for (auto i = 4; i >= 0; i--)
0143         if (v & b[i]) {
0144           v >>= s[i];
0145           r |= s[i];
0146         }
0147       return r;
0148     }
0149 
0150     static constexpr uint32_t sizeT() { return S; }
0151     static constexpr int32_t nhists() { return NHISTS; }
0152     static constexpr uint32_t nbins() { return NBINS; }
0153     static constexpr uint32_t totbins() { return NHISTS * NBINS + 1; }
0154     static constexpr uint32_t nbits() { return ilog2(NBINS - 1) + 1; }
0155 
0156     static constexpr auto histOff(uint32_t nh) { return NBINS * nh; }
0157 
0158     static constexpr UT bin(T t) {
0159       constexpr uint32_t shift = sizeT() - nbits();
0160       constexpr uint32_t mask = (1 << nbits()) - 1;
0161       return (t >> shift) & mask;
0162     }
0163 
0164     template <typename TAcc>
0165     ALPAKA_FN_ACC ALPAKA_FN_INLINE void count(const TAcc &acc, T t) {
0166       uint32_t b = bin(t);
0167       ALPAKA_ASSERT_ACC(b < nbins());
0168       Base::atomicIncrement(acc, this->off[b]);
0169     }
0170 
0171     template <typename TAcc>
0172     ALPAKA_FN_ACC ALPAKA_FN_INLINE void fill(const TAcc &acc, T t, index_type j) {
0173       uint32_t b = bin(t);
0174       ALPAKA_ASSERT_ACC(b < nbins());
0175       auto w = Base::atomicDecrement(acc, this->off[b]);
0176       ALPAKA_ASSERT_ACC(w > 0);
0177       this->content[w - 1] = j;
0178     }
0179 
0180     template <typename TAcc>
0181     ALPAKA_FN_ACC ALPAKA_FN_INLINE void count(const TAcc &acc, T t, uint32_t nh) {
0182       uint32_t b = bin(t);
0183       ALPAKA_ASSERT_ACC(b < nbins());
0184       b += histOff(nh);
0185       ALPAKA_ASSERT_ACC(b < totbins());
0186       Base::atomicIncrement(acc, this->off[b]);
0187     }
0188 
0189     template <typename TAcc>
0190     ALPAKA_FN_ACC ALPAKA_FN_INLINE void fill(const TAcc &acc, T t, index_type j, uint32_t nh) {
0191       uint32_t b = bin(t);
0192       ALPAKA_ASSERT_ACC(b < nbins());
0193       b += histOff(nh);
0194       ALPAKA_ASSERT_ACC(b < totbins());
0195       auto w = Base::atomicDecrement(acc, this->off[b]);
0196       ALPAKA_ASSERT_ACC(w > 0);
0197       this->content[w - 1] = j;
0198     }
0199   };
0200 }  // namespace cms::alpakatools
0201 #endif  // HeterogeneousCore_AlpakaInterface_interface_HistoContainer_h