File indexing completed on 2024-04-20 02:31:58
0001 #ifndef HeterogeneousCore_AlpakaInterface_interface_workdivision_h
0002 #define HeterogeneousCore_AlpakaInterface_interface_workdivision_h
0003
0004 #include <algorithm>
0005 #include <cstddef>
0006 #include <type_traits>
0007
0008 #include <alpaka/alpaka.hpp>
0009
0010 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0011
0012 namespace cms::alpakatools {
0013
0014 using namespace alpaka_common;
0015
0016
0017 inline constexpr Idx round_up_by(Idx value, Idx divisor) { return (value + divisor - 1) / divisor * divisor; }
0018
0019
0020 inline constexpr Idx divide_up_by(Idx value, Idx divisor) { return (value + divisor - 1) / divisor; }
0021
0022
0023 template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0024 struct requires_single_thread_per_block : public std::true_type {};
0025
0026 #ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
0027 template <typename TDim>
0028 struct requires_single_thread_per_block<alpaka::AccGpuCudaRt<TDim, Idx>> : public std::false_type {};
0029 #endif
0030
0031 #ifdef ALPAKA_ACC_GPU_HIP_ENABLED
0032 template <typename TDim>
0033 struct requires_single_thread_per_block<alpaka::AccGpuHipRt<TDim, Idx>> : public std::false_type {};
0034 #endif
0035
0036 #ifdef ALPAKA_ACC_CPU_B_SEQ_T_THREADS_ENABLED
0037 template <typename TDim>
0038 struct requires_single_thread_per_block<alpaka::AccCpuThreads<TDim, Idx>> : public std::false_type {};
0039 #endif
0040
0041
0042 template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0043 inline constexpr bool requires_single_thread_per_block_v = requires_single_thread_per_block<TAcc>::value;
0044
0045
0046 template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value == 1>>
0047 inline WorkDiv<Dim1D> make_workdiv(Idx blocks, Idx elements) {
0048 if constexpr (not requires_single_thread_per_block_v<TAcc>) {
0049
0050
0051
0052 return WorkDiv<Dim1D>(blocks, elements, Idx{1});
0053 } else {
0054
0055
0056
0057 return WorkDiv<Dim1D>(blocks, Idx{1}, elements);
0058 }
0059 }
0060
0061
0062 template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0063 inline WorkDiv<alpaka::Dim<TAcc>> make_workdiv(const Vec<alpaka::Dim<TAcc>>& blocks,
0064 const Vec<alpaka::Dim<TAcc>>& elements) {
0065 using Dim = alpaka::Dim<TAcc>;
0066 if constexpr (not requires_single_thread_per_block_v<TAcc>) {
0067
0068
0069
0070 return WorkDiv<Dim>(blocks, elements, Vec<Dim>::ones());
0071 } else {
0072
0073
0074
0075 return WorkDiv<Dim>(blocks, Vec<Dim>::ones(), elements);
0076 }
0077 }
0078
0079
0080
0081
0082
0083
0084
0085 struct ElementIndex {
0086 Idx global;
0087 Idx local;
0088 };
0089
0090 namespace detail {
0091
0092
0093
0094
0095
0096
0097
0098
0099
0100
0101
0102
0103
0104
0105
0106
0107
0108
0109
0110
0111
0112
0113
0114
0115
0116
0117
0118
0119
0120
0121
0122
0123
0124
0125
0126
0127
0128
0129
0130
0131
0132
0133
0134
0135
0136
0137
0138
0139
0140
0141
0142
0143
0144
0145
0146
0147
0148
0149
0150
0151
0152 template <typename TAcc,
0153 std::size_t Dim,
0154 typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value >= Dim>>
0155 class UniformElementsAlong {
0156 public:
0157 ALPAKA_FN_ACC inline UniformElementsAlong(TAcc const& acc)
0158 : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[Dim]},
0159 first_{alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[Dim] * elements_},
0160 stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc)[Dim] * elements_},
0161 extent_{stride_} {}
0162
0163 ALPAKA_FN_ACC inline UniformElementsAlong(TAcc const& acc, Idx extent)
0164 : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[Dim]},
0165 first_{alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[Dim] * elements_},
0166 stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc)[Dim] * elements_},
0167 extent_{extent} {}
0168
0169 ALPAKA_FN_ACC inline UniformElementsAlong(TAcc const& acc, Idx first, Idx extent)
0170 : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[Dim]},
0171 first_{alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[Dim] * elements_ + first},
0172 stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc)[Dim] * elements_},
0173 extent_{extent} {}
0174
0175 class const_iterator;
0176 using iterator = const_iterator;
0177
0178 ALPAKA_FN_ACC inline const_iterator begin() const { return const_iterator(elements_, stride_, extent_, first_); }
0179
0180 ALPAKA_FN_ACC inline const_iterator end() const { return const_iterator(elements_, stride_, extent_, extent_); }
0181
0182 class const_iterator {
0183 friend class UniformElementsAlong;
0184
0185 ALPAKA_FN_ACC inline const_iterator(Idx elements, Idx stride, Idx extent, Idx first)
0186 : elements_{elements},
0187 stride_{stride},
0188 extent_{extent},
0189 first_{std::min(first, extent)},
0190 index_{first_},
0191 range_{std::min(first + elements, extent)} {}
0192
0193 public:
0194 ALPAKA_FN_ACC inline Idx operator*() const { return index_; }
0195
0196
0197 ALPAKA_FN_ACC inline const_iterator& operator++() {
0198 if constexpr (requires_single_thread_per_block_v<TAcc>) {
0199
0200 ++index_;
0201 if (index_ < range_)
0202 return *this;
0203 }
0204
0205
0206 first_ += stride_;
0207 index_ = first_;
0208 range_ = std::min(first_ + elements_, extent_);
0209 if (index_ < extent_)
0210 return *this;
0211
0212
0213 first_ = extent_;
0214 index_ = extent_;
0215 range_ = extent_;
0216 return *this;
0217 }
0218
0219
0220 ALPAKA_FN_ACC inline const_iterator operator++(int) {
0221 const_iterator old = *this;
0222 ++(*this);
0223 return old;
0224 }
0225
0226 ALPAKA_FN_ACC inline bool operator==(const_iterator const& other) const {
0227 return (index_ == other.index_) and (first_ == other.first_);
0228 }
0229
0230 ALPAKA_FN_ACC inline bool operator!=(const_iterator const& other) const { return not(*this == other); }
0231
0232 private:
0233
0234 Idx elements_;
0235 Idx stride_;
0236 Idx extent_;
0237
0238 Idx first_;
0239 Idx index_;
0240 Idx range_;
0241 };
0242
0243 private:
0244 const Idx elements_;
0245 const Idx first_;
0246 const Idx stride_;
0247 const Idx extent_;
0248 };
0249
0250 }
0251
0252
0253
0254
0255
0256
0257
0258
0259
0260
0261
0262
0263
0264
0265
0266
0267
0268
0269
0270
0271
0272
0273
0274
0275
0276
0277
0278
0279
0280
0281
0282
0283
0284
0285
0286
0287
0288
0289
0290
0291
0292
0293
0294
0295
0296
0297
0298
0299
0300
0301
0302
0303
0304
0305
0306
0307
0308 template <typename TAcc,
0309 typename... TArgs,
0310 typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value == 1>>
0311 ALPAKA_FN_ACC inline auto uniform_elements(TAcc const& acc, TArgs... args) {
0312 return detail::UniformElementsAlong<TAcc, 0>(acc, static_cast<Idx>(args)...);
0313 }
0314
0315
0316
0317
0318
0319
0320
0321 template <typename TAcc,
0322 std::size_t Dim,
0323 typename... TArgs,
0324 typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value >= Dim>>
0325 ALPAKA_FN_ACC inline auto uniform_elements_along(TAcc const& acc, TArgs... args) {
0326 return detail::UniformElementsAlong<TAcc, Dim>(acc, static_cast<Idx>(args)...);
0327 }
0328
0329
0330
0331
0332
0333
0334 template <typename TAcc,
0335 typename... TArgs,
0336 typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 0)>>
0337 ALPAKA_FN_ACC inline auto uniform_elements_x(TAcc const& acc, TArgs... args) {
0338 return detail::UniformElementsAlong<TAcc, alpaka::Dim<TAcc>::value - 1>(acc, static_cast<Idx>(args)...);
0339 }
0340
0341 template <typename TAcc,
0342 typename... TArgs,
0343 typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 1)>>
0344 ALPAKA_FN_ACC inline auto uniform_elements_y(TAcc const& acc, TArgs... args) {
0345 return detail::UniformElementsAlong<TAcc, alpaka::Dim<TAcc>::value - 2>(acc, static_cast<Idx>(args)...);
0346 }
0347
0348 template <typename TAcc,
0349 typename... TArgs,
0350 typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 2)>>
0351 ALPAKA_FN_ACC inline auto uniform_elements_z(TAcc const& acc, TArgs... args) {
0352 return detail::UniformElementsAlong<TAcc, alpaka::Dim<TAcc>::value - 3>(acc, static_cast<Idx>(args)...);
0353 }
0354
0355 namespace detail {
0356
0357
0358
0359
0360
0361
0362
0363
0364
0365
0366
0367
0368
0369
0370
0371
0372
0373
0374
0375
0376
0377
0378
0379
0380
0381
0382
0383
0384
0385
0386
0387
0388
0389
0390
0391
0392
0393
0394
0395
0396 template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 0)>>
0397 class UniformElementsND {
0398 public:
0399 using Dim = alpaka::Dim<TAcc>;
0400 using Vec = alpaka::Vec<Dim, Idx>;
0401
0402 ALPAKA_FN_ACC inline UniformElementsND(TAcc const& acc)
0403 : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)},
0404 thread_{alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc) * elements_},
0405 stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc) * elements_},
0406 extent_{stride_} {}
0407
0408 ALPAKA_FN_ACC inline UniformElementsND(TAcc const& acc, Vec extent)
0409 : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)},
0410 thread_{alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc) * elements_},
0411 stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc) * elements_},
0412 extent_{extent} {}
0413
0414
0415 struct at_end_t {};
0416
0417 class const_iterator;
0418 using iterator = const_iterator;
0419
0420 ALPAKA_FN_ACC inline const_iterator begin() const {
0421
0422 if ((thread_ < extent_).all()) {
0423
0424 return const_iterator{this, thread_};
0425 } else {
0426
0427 return const_iterator{this, at_end_t{}};
0428 }
0429 }
0430
0431 ALPAKA_FN_ACC inline const_iterator end() const {
0432
0433 return const_iterator{this, at_end_t{}};
0434 }
0435
0436 class const_iterator {
0437 friend class UniformElementsND;
0438
0439 public:
0440 ALPAKA_FN_ACC inline Vec operator*() const { return index_; }
0441
0442
0443 ALPAKA_FN_ACC constexpr inline const_iterator operator++() {
0444 increment();
0445 return *this;
0446 }
0447
0448
0449 ALPAKA_FN_ACC constexpr inline const_iterator operator++(int) {
0450 const_iterator old = *this;
0451 increment();
0452 return old;
0453 }
0454
0455 ALPAKA_FN_ACC constexpr inline bool operator==(const_iterator const& other) const {
0456 return (index_ == other.index_);
0457 }
0458
0459 ALPAKA_FN_ACC constexpr inline bool operator!=(const_iterator const& other) const {
0460 return not(*this == other);
0461 }
0462
0463 private:
0464
0465 ALPAKA_FN_ACC inline const_iterator(UniformElementsND const* loop, Vec first)
0466 : loop_{loop},
0467 first_{alpaka::elementwise_min(first, loop->extent_)},
0468 range_{alpaka::elementwise_min(first + loop->elements_, loop->extent_)},
0469 index_{first_} {}
0470
0471
0472 ALPAKA_FN_ACC inline const_iterator(UniformElementsND const* loop, at_end_t const&)
0473 : loop_{loop}, first_{loop_->extent_}, range_{loop_->extent_}, index_{loop_->extent_} {}
0474
0475 template <size_t I>
0476 ALPAKA_FN_ACC inline constexpr bool nth_elements_loop() {
0477 bool overflow = false;
0478 ++index_[I];
0479 if (index_[I] >= range_[I]) {
0480 index_[I] = first_[I];
0481 overflow = true;
0482 }
0483 return overflow;
0484 }
0485
0486 template <size_t N>
0487 ALPAKA_FN_ACC inline constexpr bool do_elements_loops() {
0488 if constexpr (N == 0) {
0489
0490 return true;
0491 } else {
0492 if (not nth_elements_loop<N - 1>()) {
0493 return false;
0494 } else {
0495 return do_elements_loops<N - 1>();
0496 }
0497 }
0498 }
0499
0500 template <size_t I>
0501 ALPAKA_FN_ACC inline constexpr bool nth_strided_loop() {
0502 bool overflow = false;
0503 first_[I] += loop_->stride_[I];
0504 if (first_[I] >= loop_->extent_[I]) {
0505 first_[I] = loop_->thread_[I];
0506 overflow = true;
0507 }
0508 index_[I] = first_[I];
0509 range_[I] = std::min(first_[I] + loop_->elements_[I], loop_->extent_[I]);
0510 return overflow;
0511 }
0512
0513 template <size_t N>
0514 ALPAKA_FN_ACC inline constexpr bool do_strided_loops() {
0515 if constexpr (N == 0) {
0516
0517 return true;
0518 } else {
0519 if (not nth_strided_loop<N - 1>()) {
0520 return false;
0521 } else {
0522 return do_strided_loops<N - 1>();
0523 }
0524 }
0525 }
0526
0527
0528 ALPAKA_FN_ACC inline constexpr void increment() {
0529 if constexpr (requires_single_thread_per_block_v<TAcc>) {
0530
0531
0532 if (not do_elements_loops<Dim::value>()) {
0533
0534 return;
0535 }
0536 }
0537
0538
0539
0540 if (not do_strided_loops<Dim::value>()) {
0541
0542 return;
0543 }
0544
0545
0546 first_ = loop_->extent_;
0547 range_ = loop_->extent_;
0548 index_ = loop_->extent_;
0549 }
0550
0551
0552 const UniformElementsND* loop_;
0553
0554
0555 Vec first_;
0556 Vec range_;
0557 Vec index_;
0558 };
0559
0560 private:
0561 const Vec elements_;
0562 const Vec thread_;
0563 const Vec stride_;
0564 const Vec extent_;
0565 };
0566
0567 }
0568
0569
0570
0571
0572
0573
0574 template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 0)>>
0575 ALPAKA_FN_ACC inline auto uniform_elements_nd(TAcc const& acc) {
0576 return detail::UniformElementsND<TAcc>(acc);
0577 }
0578
0579 template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 0)>>
0580 ALPAKA_FN_ACC inline auto uniform_elements_nd(TAcc const& acc, alpaka::Vec<alpaka::Dim<TAcc>, Idx> extent) {
0581 return detail::UniformElementsND<TAcc>(acc, extent);
0582 }
0583
0584 namespace detail {
0585
0586
0587
0588
0589
0590
0591
0592
0593
0594
0595
0596
0597
0598
0599
0600
0601
0602
0603
0604
0605
0606
0607
0608
0609
0610
0611
0612
0613
0614
0615
0616
0617
0618
0619
0620
0621
0622
0623
0624
0625
0626
0627
0628
0629
0630
0631
0632
0633
0634
0635
0636
0637
0638 template <typename TAcc,
0639 std::size_t Dim,
0640 typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value >= Dim>>
0641 class UniformGroupsAlong {
0642 public:
0643 ALPAKA_FN_ACC inline UniformGroupsAlong(TAcc const& acc)
0644 : first_{alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[Dim]},
0645 stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[Dim]},
0646 extent_{stride_} {}
0647
0648
0649 ALPAKA_FN_ACC inline UniformGroupsAlong(TAcc const& acc, Idx extent)
0650 : first_{alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[Dim]},
0651 stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[Dim]},
0652 extent_{divide_up_by(extent, alpaka::getWorkDiv<alpaka::Block, alpaka::Elems>(acc)[Dim])} {}
0653
0654 class const_iterator;
0655 using iterator = const_iterator;
0656
0657 ALPAKA_FN_ACC inline const_iterator begin() const { return const_iterator(stride_, extent_, first_); }
0658
0659 ALPAKA_FN_ACC inline const_iterator end() const { return const_iterator(stride_, extent_, extent_); }
0660
0661 class const_iterator {
0662 friend class UniformGroupsAlong;
0663
0664 ALPAKA_FN_ACC inline const_iterator(Idx stride, Idx extent, Idx first)
0665 : stride_{stride}, extent_{extent}, first_{std::min(first, extent)} {}
0666
0667 public:
0668 ALPAKA_FN_ACC inline Idx operator*() const { return first_; }
0669
0670
0671 ALPAKA_FN_ACC inline const_iterator& operator++() {
0672
0673 first_ += stride_;
0674 if (first_ < extent_)
0675 return *this;
0676
0677
0678 first_ = extent_;
0679 return *this;
0680 }
0681
0682
0683 ALPAKA_FN_ACC inline const_iterator operator++(int) {
0684 const_iterator old = *this;
0685 ++(*this);
0686 return old;
0687 }
0688
0689 ALPAKA_FN_ACC inline bool operator==(const_iterator const& other) const { return (first_ == other.first_); }
0690
0691 ALPAKA_FN_ACC inline bool operator!=(const_iterator const& other) const { return not(*this == other); }
0692
0693 private:
0694
0695 Idx stride_;
0696 Idx extent_;
0697
0698 Idx first_;
0699 };
0700
0701 private:
0702 const Idx first_;
0703 const Idx stride_;
0704 const Idx extent_;
0705 };
0706
0707 }
0708
0709
0710
0711
0712
0713
0714
0715
0716
0717
0718
0719
0720
0721
0722
0723
0724
0725
0726
0727
0728
0729
0730
0731
0732
0733
0734
0735
0736
0737
0738
0739
0740
0741
0742
0743
0744
0745
0746
0747
0748
0749
0750
0751
0752
0753
0754
0755
0756 template <typename TAcc,
0757 typename... TArgs,
0758 typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value == 1>>
0759 ALPAKA_FN_ACC inline auto uniform_groups(TAcc const& acc, TArgs... args) {
0760 return detail::UniformGroupsAlong<TAcc, 0>(acc, static_cast<Idx>(args)...);
0761 }
0762
0763
0764
0765
0766
0767
0768
0769 template <typename TAcc,
0770 std::size_t Dim,
0771 typename... TArgs,
0772 typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value >= Dim>>
0773 ALPAKA_FN_ACC inline auto uniform_groups_along(TAcc const& acc, TArgs... args) {
0774 return detail::UniformGroupsAlong<TAcc, Dim>(acc, static_cast<Idx>(args)...);
0775 }
0776
0777
0778
0779
0780
0781
0782 template <typename TAcc,
0783 typename... TArgs,
0784 typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 0)>>
0785 ALPAKA_FN_ACC inline auto uniform_groups_x(TAcc const& acc, TArgs... args) {
0786 return detail::UniformGroupsAlong<TAcc, alpaka::Dim<TAcc>::value - 1>(acc, static_cast<Idx>(args)...);
0787 }
0788
0789 template <typename TAcc,
0790 typename... TArgs,
0791 typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 1)>>
0792 ALPAKA_FN_ACC inline auto uniform_groups_y(TAcc const& acc, TArgs... args) {
0793 return detail::UniformGroupsAlong<TAcc, alpaka::Dim<TAcc>::value - 2>(acc, static_cast<Idx>(args)...);
0794 }
0795
0796 template <typename TAcc,
0797 typename... TArgs,
0798 typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 2)>>
0799 ALPAKA_FN_ACC inline auto uniform_groups_z(TAcc const& acc, TArgs... args) {
0800 return detail::UniformGroupsAlong<TAcc, alpaka::Dim<TAcc>::value - 3>(acc, static_cast<Idx>(args)...);
0801 }
0802
0803 namespace detail {
0804
0805
0806
0807
0808
0809
0810
0811
0812
0813
0814
0815
0816
0817
0818
0819
0820
0821
0822
0823
0824
0825
0826
0827
0828
0829
0830
0831
0832
0833
0834
0835
0836
0837
0838
0839
0840
0841
0842
0843
0844
0845
0846
0847
0848
0849
0850
0851
0852
0853
0854
0855
0856 template <typename TAcc,
0857 std::size_t Dim,
0858 typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value >= Dim>>
0859 class UniformGroupElementsAlong {
0860 public:
0861 ALPAKA_FN_ACC inline UniformGroupElementsAlong(TAcc const& acc, Idx block)
0862 : first_{block * alpaka::getWorkDiv<alpaka::Block, alpaka::Elems>(acc)[Dim]},
0863 local_{alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[Dim] *
0864 alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[Dim]},
0865 range_{local_ + alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[Dim]} {}
0866
0867 ALPAKA_FN_ACC inline UniformGroupElementsAlong(TAcc const& acc, Idx block, Idx extent)
0868 : first_{block * alpaka::getWorkDiv<alpaka::Block, alpaka::Elems>(acc)[Dim]},
0869 local_{std::min(extent - first_,
0870 alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[Dim] *
0871 alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[Dim])},
0872 range_{std::min(extent - first_, local_ + alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[Dim])} {}
0873
0874 class const_iterator;
0875 using iterator = const_iterator;
0876
0877 ALPAKA_FN_ACC inline const_iterator begin() const { return const_iterator(local_, first_, range_); }
0878
0879 ALPAKA_FN_ACC inline const_iterator end() const { return const_iterator(range_, first_, range_); }
0880
0881 class const_iterator {
0882 friend class UniformGroupElementsAlong;
0883
0884 ALPAKA_FN_ACC inline const_iterator(Idx local, Idx first, Idx range)
0885 : index_{local}, first_{first}, range_{range} {}
0886
0887 public:
0888 ALPAKA_FN_ACC inline ElementIndex operator*() const { return ElementIndex{index_ + first_, index_}; }
0889
0890
0891 ALPAKA_FN_ACC inline const_iterator& operator++() {
0892 if constexpr (requires_single_thread_per_block_v<TAcc>) {
0893
0894 ++index_;
0895 if (index_ < range_)
0896 return *this;
0897 }
0898
0899
0900 index_ = range_;
0901 return *this;
0902 }
0903
0904
0905 ALPAKA_FN_ACC inline const_iterator operator++(int) {
0906 const_iterator old = *this;
0907 ++(*this);
0908 return old;
0909 }
0910
0911 ALPAKA_FN_ACC inline bool operator==(const_iterator const& other) const { return (index_ == other.index_); }
0912
0913 ALPAKA_FN_ACC inline bool operator!=(const_iterator const& other) const { return not(*this == other); }
0914
0915 private:
0916
0917 Idx index_;
0918
0919 Idx first_;
0920 Idx range_;
0921 };
0922
0923 private:
0924 const Idx first_;
0925 const Idx local_;
0926 const Idx range_;
0927 };
0928
0929 }
0930
0931
0932
0933
0934
0935
0936
0937
0938
0939
0940
0941
0942
0943
0944
0945
0946
0947
0948
0949
0950
0951
0952
0953
0954
0955
0956
0957
0958
0959
0960
0961
0962
0963
0964
0965
0966
0967
0968
0969
0970
0971
0972
0973
0974
0975 template <typename TAcc,
0976 typename... TArgs,
0977 typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value == 1>>
0978 ALPAKA_FN_ACC inline auto uniform_group_elements(TAcc const& acc, TArgs... args) {
0979 return detail::UniformGroupElementsAlong<TAcc, 0>(acc, static_cast<Idx>(args)...);
0980 }
0981
0982
0983
0984
0985
0986
0987
0988 template <typename TAcc,
0989 std::size_t Dim,
0990 typename... TArgs,
0991 typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value >= Dim>>
0992 ALPAKA_FN_ACC inline auto uniform_group_elements_along(TAcc const& acc, TArgs... args) {
0993 return detail::UniformGroupElementsAlong<TAcc, Dim>(acc, static_cast<Idx>(args)...);
0994 }
0995
0996
0997
0998
0999
1000
1001
1002 template <typename TAcc,
1003 typename... TArgs,
1004 typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 0)>>
1005 ALPAKA_FN_ACC inline auto uniform_group_elements_x(TAcc const& acc, TArgs... args) {
1006 return detail::UniformGroupElementsAlong<TAcc, alpaka::Dim<TAcc>::value - 1>(acc, static_cast<Idx>(args)...);
1007 }
1008
1009 template <typename TAcc,
1010 typename... TArgs,
1011 typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 1)>>
1012 ALPAKA_FN_ACC inline auto uniform_group_elements_y(TAcc const& acc, TArgs... args) {
1013 return detail::UniformGroupElementsAlong<TAcc, alpaka::Dim<TAcc>::value - 2>(acc, static_cast<Idx>(args)...);
1014 }
1015
1016 template <typename TAcc,
1017 typename... TArgs,
1018 typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 2)>>
1019 ALPAKA_FN_ACC inline auto uniform_group_elements_z(TAcc const& acc, TArgs... args) {
1020 return detail::UniformGroupElementsAlong<TAcc, alpaka::Dim<TAcc>::value - 3>(acc, static_cast<Idx>(args)...);
1021 }
1022
1023 namespace detail {
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063 template <typename TAcc,
1064 std::size_t Dim,
1065 typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value >= Dim>>
1066 class IndependentGroupsAlong {
1067 public:
1068 ALPAKA_FN_ACC inline IndependentGroupsAlong(TAcc const& acc)
1069 : first_{alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[Dim]},
1070 stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[Dim]},
1071 extent_{stride_} {}
1072
1073 ALPAKA_FN_ACC inline IndependentGroupsAlong(TAcc const& acc, Idx groups)
1074 : first_{alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[Dim]},
1075 stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[Dim]},
1076 extent_{groups} {}
1077
1078 class const_iterator;
1079 using iterator = const_iterator;
1080
1081 ALPAKA_FN_ACC inline const_iterator begin() const { return const_iterator(stride_, extent_, first_); }
1082
1083 ALPAKA_FN_ACC inline const_iterator end() const { return const_iterator(stride_, extent_, extent_); }
1084
1085 class const_iterator {
1086 friend class IndependentGroupsAlong;
1087
1088 ALPAKA_FN_ACC inline const_iterator(Idx stride, Idx extent, Idx first)
1089 : stride_{stride}, extent_{extent}, first_{std::min(first, extent)} {}
1090
1091 public:
1092 ALPAKA_FN_ACC inline Idx operator*() const { return first_; }
1093
1094
1095 ALPAKA_FN_ACC inline const_iterator& operator++() {
1096
1097 first_ += stride_;
1098 if (first_ < extent_)
1099 return *this;
1100
1101
1102 first_ = extent_;
1103 return *this;
1104 }
1105
1106
1107 ALPAKA_FN_ACC inline const_iterator operator++(int) {
1108 const_iterator old = *this;
1109 ++(*this);
1110 return old;
1111 }
1112
1113 ALPAKA_FN_ACC inline bool operator==(const_iterator const& other) const { return (first_ == other.first_); }
1114
1115 ALPAKA_FN_ACC inline bool operator!=(const_iterator const& other) const { return not(*this == other); }
1116
1117 private:
1118
1119 Idx stride_;
1120 Idx extent_;
1121
1122 Idx first_;
1123 };
1124
1125 private:
1126 const Idx first_;
1127 const Idx stride_;
1128 const Idx extent_;
1129 };
1130
1131 }
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167 template <typename TAcc,
1168 typename... TArgs,
1169 typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value == 1>>
1170 ALPAKA_FN_ACC inline auto independent_groups(TAcc const& acc, TArgs... args) {
1171 return detail::IndependentGroupsAlong<TAcc, 0>(acc, static_cast<Idx>(args)...);
1172 }
1173
1174
1175
1176
1177
1178
1179
1180 template <typename TAcc,
1181 std::size_t Dim,
1182 typename... TArgs,
1183 typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value >= Dim>>
1184 ALPAKA_FN_ACC inline auto independent_groups_along(TAcc const& acc, TArgs... args) {
1185 return detail::IndependentGroupsAlong<TAcc, Dim>(acc, static_cast<Idx>(args)...);
1186 }
1187
1188
1189
1190
1191
1192
1193
1194 template <typename TAcc,
1195 typename... TArgs,
1196 typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 0)>>
1197 ALPAKA_FN_ACC inline auto independent_groups_x(TAcc const& acc, TArgs... args) {
1198 return detail::IndependentGroupsAlong<TAcc, alpaka::Dim<TAcc>::value - 1>(acc, static_cast<Idx>(args)...);
1199 }
1200
1201 template <typename TAcc,
1202 typename... TArgs,
1203 typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 1)>>
1204 ALPAKA_FN_ACC inline auto independent_groups_y(TAcc const& acc, TArgs... args) {
1205 return detail::IndependentGroupsAlong<TAcc, alpaka::Dim<TAcc>::value - 2>(acc, static_cast<Idx>(args)...);
1206 }
1207
1208 template <typename TAcc,
1209 typename... TArgs,
1210 typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 2)>>
1211 ALPAKA_FN_ACC inline auto independent_groups_z(TAcc const& acc, TArgs... args) {
1212 return detail::IndependentGroupsAlong<TAcc, alpaka::Dim<TAcc>::value - 3>(acc, static_cast<Idx>(args)...);
1213 }
1214
1215 namespace detail {
1216
1217
1218
1219
1220
1221
1222
1223 template <typename TAcc,
1224 std::size_t Dim,
1225 typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value >= Dim>>
1226 class IndependentGroupElementsAlong {
1227 public:
1228 ALPAKA_FN_ACC inline IndependentGroupElementsAlong(TAcc const& acc)
1229 : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[Dim]},
1230 thread_{alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[Dim] * elements_},
1231 stride_{alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc)[Dim] * elements_},
1232 extent_{stride_} {}
1233
1234 ALPAKA_FN_ACC inline IndependentGroupElementsAlong(TAcc const& acc, Idx extent)
1235 : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[Dim]},
1236 thread_{alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[Dim] * elements_},
1237 stride_{alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc)[Dim] * elements_},
1238 extent_{extent} {}
1239
1240 ALPAKA_FN_ACC inline IndependentGroupElementsAlong(TAcc const& acc, Idx first, Idx extent)
1241 : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[Dim]},
1242 thread_{alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[Dim] * elements_ + first},
1243 stride_{alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc)[Dim] * elements_},
1244 extent_{extent} {}
1245
1246 class const_iterator;
1247 using iterator = const_iterator;
1248
1249 ALPAKA_FN_ACC inline const_iterator begin() const { return const_iterator(elements_, stride_, extent_, thread_); }
1250
1251 ALPAKA_FN_ACC inline const_iterator end() const { return const_iterator(elements_, stride_, extent_, extent_); }
1252
1253 class const_iterator {
1254 friend class IndependentGroupElementsAlong;
1255
1256 ALPAKA_FN_ACC inline const_iterator(Idx elements, Idx stride, Idx extent, Idx first)
1257 : elements_{elements},
1258 stride_{stride},
1259 extent_{extent},
1260 first_{std::min(first, extent)},
1261 index_{first_},
1262 range_{std::min(first + elements, extent)} {}
1263
1264 public:
1265 ALPAKA_FN_ACC inline Idx operator*() const { return index_; }
1266
1267
1268 ALPAKA_FN_ACC inline const_iterator& operator++() {
1269 if constexpr (requires_single_thread_per_block_v<TAcc>) {
1270
1271 ++index_;
1272 if (index_ < range_)
1273 return *this;
1274 }
1275
1276
1277 first_ += stride_;
1278 index_ = first_;
1279 range_ = std::min(first_ + elements_, extent_);
1280 if (index_ < extent_)
1281 return *this;
1282
1283
1284 first_ = extent_;
1285 index_ = extent_;
1286 range_ = extent_;
1287 return *this;
1288 }
1289
1290
1291 ALPAKA_FN_ACC inline const_iterator operator++(int) {
1292 const_iterator old = *this;
1293 ++(*this);
1294 return old;
1295 }
1296
1297 ALPAKA_FN_ACC inline bool operator==(const_iterator const& other) const {
1298 return (index_ == other.index_) and (first_ == other.first_);
1299 }
1300
1301 ALPAKA_FN_ACC inline bool operator!=(const_iterator const& other) const { return not(*this == other); }
1302
1303 private:
1304
1305 Idx elements_;
1306 Idx stride_;
1307 Idx extent_;
1308
1309 Idx first_;
1310 Idx index_;
1311 Idx range_;
1312 };
1313
1314 private:
1315 const Idx elements_;
1316 const Idx thread_;
1317 const Idx stride_;
1318 const Idx extent_;
1319 };
1320
1321 }
1322
1323
1324
1325
1326 template <typename TAcc,
1327 typename... TArgs,
1328 typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value == 1>>
1329 ALPAKA_FN_ACC inline auto independent_group_elements(TAcc const& acc, TArgs... args) {
1330 return detail::IndependentGroupElementsAlong<TAcc, 0>(acc, static_cast<Idx>(args)...);
1331 }
1332
1333
1334
1335
1336
1337
1338
1339 template <typename TAcc,
1340 std::size_t Dim,
1341 typename... TArgs,
1342 typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value >= Dim>>
1343 ALPAKA_FN_ACC inline auto independent_group_elements_along(TAcc const& acc, TArgs... args) {
1344 return detail::IndependentGroupElementsAlong<TAcc, Dim>(acc, static_cast<Idx>(args)...);
1345 }
1346
1347
1348
1349
1350
1351
1352
1353 template <typename TAcc,
1354 typename... TArgs,
1355 typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 0)>>
1356 ALPAKA_FN_ACC inline auto independent_group_elements_x(TAcc const& acc, TArgs... args) {
1357 return detail::IndependentGroupElementsAlong<TAcc, alpaka::Dim<TAcc>::value - 1>(acc, static_cast<Idx>(args)...);
1358 }
1359
1360 template <typename TAcc,
1361 typename... TArgs,
1362 typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 1)>>
1363 ALPAKA_FN_ACC inline auto independent_group_elements_y(TAcc const& acc, TArgs... args) {
1364 return detail::IndependentGroupElementsAlong<TAcc, alpaka::Dim<TAcc>::value - 2>(acc, static_cast<Idx>(args)...);
1365 }
1366
1367 template <typename TAcc,
1368 typename... TArgs,
1369 typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 2)>>
1370 ALPAKA_FN_ACC inline auto independent_group_elements_z(TAcc const& acc, TArgs... args) {
1371 return detail::IndependentGroupElementsAlong<TAcc, alpaka::Dim<TAcc>::value - 3>(acc, static_cast<Idx>(args)...);
1372 }
1373
1374
1375
1376
1377
1378
1379
1380
1381 template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
1382 ALPAKA_FN_ACC inline constexpr bool once_per_grid(TAcc const& acc) {
1383 return alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc) == Vec<alpaka::Dim<TAcc>>::zeros();
1384 }
1385
1386
1387
1388
1389
1390
1391
1392
1393 template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
1394 ALPAKA_FN_ACC inline constexpr bool once_per_block(TAcc const& acc) {
1395 return alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc) == Vec<alpaka::Dim<TAcc>>::zeros();
1396 }
1397
1398 }
1399
1400 #endif