File indexing completed on 2024-04-20 02:31:58
0001
0002 #ifdef ALPAKA_HOST_ONLY
0003 #error ALPAKA_HOST_ONLY defined in device compilation
0004 #endif
0005
0006 #include <alpaka/alpaka.hpp>
0007
0008 #include "DataFormats/PortableTestObjects/interface/alpaka/TestDeviceCollection.h"
0009 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0010 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0011
0012 #include "TestAlgo.h"
0013
0014 namespace ALPAKA_ACCELERATOR_NAMESPACE {
0015
0016 using namespace cms::alpakatools;
0017
0018 class TestAlgoKernel {
0019 public:
0020 template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0021 ALPAKA_FN_ACC void operator()(TAcc const& acc, portabletest::TestDeviceCollection::View view, double xvalue) const {
0022
0023 const portabletest::Matrix matrix{{1, 2, 3, 4, 5, 6}, {2, 4, 6, 8, 10, 12}, {3, 6, 9, 12, 15, 18}};
0024 const portabletest::Array flags = {{6, 4, 2, 0}};
0025
0026
0027 if (once_per_grid(acc)) {
0028 view.r() = 1.;
0029 }
0030
0031
0032 for (int32_t i : uniform_elements(acc, view.metadata().size())) {
0033 view[i] = {xvalue, 0., 0., i, flags, matrix * i};
0034 }
0035 }
0036 };
0037
0038 class TestAlgoMultiKernel2 {
0039 public:
0040 template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0041 ALPAKA_FN_ACC void operator()(TAcc const& acc,
0042 portabletest::TestDeviceMultiCollection2::View<1> view,
0043 double xvalue) const {
0044
0045 const int32_t thread = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0u];
0046 const portabletest::Matrix matrix{{1, 2, 3, 4, 5, 6}, {2, 4, 6, 8, 10, 12}, {3, 6, 9, 12, 15, 18}};
0047
0048
0049 if (thread == 0) {
0050 view.r2() = 2.;
0051 }
0052
0053
0054 for (int32_t i : uniform_elements(acc, view.metadata().size())) {
0055 view[i] = {xvalue, 0., 0., i, matrix * i};
0056 }
0057 }
0058 };
0059
0060 class TestAlgoMultiKernel3 {
0061 public:
0062 template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0063 ALPAKA_FN_ACC void operator()(TAcc const& acc,
0064 portabletest::TestDeviceMultiCollection3::View<2> view,
0065 double xvalue) const {
0066
0067 const int32_t thread = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0u];
0068 const portabletest::Matrix matrix{{1, 2, 3, 4, 5, 6}, {2, 4, 6, 8, 10, 12}, {3, 6, 9, 12, 15, 18}};
0069
0070
0071 if (thread == 0) {
0072 view.r3() = 3.;
0073 }
0074
0075
0076 for (int32_t i : uniform_elements(acc, view.metadata().size())) {
0077 view[i] = {xvalue, 0., 0., i, matrix * i};
0078 }
0079 }
0080 };
0081
0082 void TestAlgo::fill(Queue& queue, portabletest::TestDeviceCollection& collection, double xvalue) const {
0083
0084 uint32_t items = 64;
0085
0086
0087 uint32_t groups = divide_up_by(collection->metadata().size(), items);
0088
0089
0090
0091
0092 auto workDiv = make_workdiv<Acc1D>(groups, items);
0093
0094 alpaka::exec<Acc1D>(queue, workDiv, TestAlgoKernel{}, collection.view(), xvalue);
0095 }
0096
0097 void TestAlgo::fillMulti2(Queue& queue, portabletest::TestDeviceMultiCollection2& collection, double xvalue) const {
0098
0099 uint32_t items = 64;
0100
0101
0102 uint32_t groups = divide_up_by(collection->metadata().size(), items);
0103 uint32_t groups2 = divide_up_by(collection.view<1>().metadata().size(), items);
0104
0105
0106
0107
0108 auto workDiv = make_workdiv<Acc1D>(groups, items);
0109 auto workDiv2 = make_workdiv<Acc1D>(groups2, items);
0110
0111 alpaka::exec<Acc1D>(queue, workDiv, TestAlgoKernel{}, collection.view<portabletest::TestSoA>(), xvalue);
0112 alpaka::exec<Acc1D>(queue, workDiv2, TestAlgoMultiKernel2{}, collection.view<portabletest::TestSoA2>(), xvalue);
0113 }
0114
0115 class TestAlgoStructKernel {
0116 public:
0117 template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0118 ALPAKA_FN_ACC void operator()(TAcc const& acc,
0119 portabletest::TestDeviceObject::Product* data,
0120 double x,
0121 double y,
0122 double z,
0123 int32_t id) const {
0124
0125 if (once_per_grid(acc)) {
0126 data->x = x;
0127 data->y = y;
0128 data->z = z;
0129 data->id = id;
0130 }
0131 }
0132 };
0133
0134 void TestAlgo::fillObject(
0135 Queue& queue, portabletest::TestDeviceObject& object, double x, double y, double z, int32_t id) const {
0136
0137 auto workDiv = make_workdiv<Acc1D>(1, 1);
0138
0139 alpaka::exec<Acc1D>(queue, workDiv, TestAlgoStructKernel{}, object.data(), x, y, z, id);
0140 }
0141
0142 void TestAlgo::fillMulti3(Queue& queue, portabletest::TestDeviceMultiCollection3& collection, double xvalue) const {
0143
0144 uint32_t items = 64;
0145
0146
0147 uint32_t groups = divide_up_by(collection.view<portabletest::TestSoA>().metadata().size(), items);
0148 uint32_t groups2 = divide_up_by(collection.view<portabletest::TestSoA2>().metadata().size(), items);
0149 uint32_t groups3 = divide_up_by(collection.view<portabletest::TestSoA3>().metadata().size(), items);
0150
0151
0152
0153
0154 auto workDiv = make_workdiv<Acc1D>(groups, items);
0155 auto workDiv2 = make_workdiv<Acc1D>(groups2, items);
0156 auto workDiv3 = make_workdiv<Acc1D>(groups3, items);
0157
0158 alpaka::exec<Acc1D>(queue, workDiv, TestAlgoKernel{}, collection.view<portabletest::TestSoA>(), xvalue);
0159 alpaka::exec<Acc1D>(queue, workDiv2, TestAlgoMultiKernel2{}, collection.view<portabletest::TestSoA2>(), xvalue);
0160 alpaka::exec<Acc1D>(queue, workDiv3, TestAlgoMultiKernel3{}, collection.view<portabletest::TestSoA3>(), xvalue);
0161 }
0162
0163 class TestAlgoKernelUpdate {
0164 public:
0165 template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0166 ALPAKA_FN_ACC void operator()(TAcc const& acc,
0167 portabletest::TestDeviceCollection::ConstView input,
0168 AlpakaESTestDataEDevice::ConstView esData,
0169 portabletest::TestDeviceCollection::View output) const {
0170
0171 if (once_per_grid(acc)) {
0172 output.r() = input.r();
0173 }
0174
0175
0176 for (int32_t i : uniform_elements(acc, output.metadata().size())) {
0177 double x = input[i].x();
0178 if (i < esData.size()) {
0179 x += esData.val(i) + esData.val2(i);
0180 }
0181 output[i] = {x, input[i].y(), input[i].z(), input[i].id(), input[i].flags(), input[i].m()};
0182 }
0183 }
0184 };
0185
0186 class TestAlgoKernelUpdateMulti2 {
0187 public:
0188 template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0189 ALPAKA_FN_ACC void operator()(TAcc const& acc,
0190 portabletest::TestSoA::ConstView input,
0191 portabletest::TestSoA2::ConstView input2,
0192 AlpakaESTestDataEDevice::ConstView esData,
0193 portabletest::TestSoA::View output,
0194 portabletest::TestSoA2::View output2) const {
0195
0196 if (once_per_grid(acc)) {
0197 output.r() = input.r();
0198 output2.r2() = input2.r2();
0199 }
0200
0201
0202 for (int32_t i : uniform_elements(acc, output.metadata().size())) {
0203 double x = input[i].x();
0204 if (i < esData.size()) {
0205 x += esData.val(i) + esData.val2(i);
0206 }
0207 output[i] = {x, input[i].y(), input[i].z(), input[i].id(), input[i].flags(), input[i].m()};
0208 }
0209 for (int32_t i : uniform_elements(acc, output2.metadata().size())) {
0210 double x2 = input2[i].x2();
0211 if (i < esData.size()) {
0212 x2 += esData.val(i) + esData.val2(i);
0213 }
0214 output2[i] = {x2, input2[i].y2(), input2[i].z2(), input2[i].id2(), input2[i].m2()};
0215 }
0216 }
0217 };
0218
0219 class TestAlgoKernelUpdateMulti3 {
0220 public:
0221 template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0222 ALPAKA_FN_ACC void operator()(TAcc const& acc,
0223 portabletest::TestSoA::ConstView input,
0224 portabletest::TestSoA2::ConstView input2,
0225 portabletest::TestSoA3::ConstView input3,
0226 AlpakaESTestDataEDevice::ConstView esData,
0227 portabletest::TestSoA::View output,
0228 portabletest::TestSoA2::View output2,
0229 portabletest::TestSoA3::View output3) const {
0230
0231 if (once_per_grid(acc)) {
0232 output.r() = input.r();
0233 output2.r2() = input2.r2();
0234 output3.r3() = input3.r3();
0235 }
0236
0237
0238 for (int32_t i : uniform_elements(acc, output.metadata().size())) {
0239 double x = input[i].x();
0240 if (i < esData.size()) {
0241 x += esData.val(i) + esData.val2(i);
0242 if (0 == i)
0243 printf("Setting x[0] to %f\n", x);
0244 }
0245 output[i] = {x, input[i].y(), input[i].z(), input[i].id(), input[i].flags(), input[i].m()};
0246 }
0247 for (int32_t i : uniform_elements(acc, output2.metadata().size())) {
0248 double x2 = input2[i].x2();
0249 if (i < esData.size()) {
0250 x2 += esData.val(i) + esData.val2(i);
0251 }
0252 output2[i] = {x2, input2[i].y2(), input2[i].z2(), input2[i].id2(), input2[i].m2()};
0253 }
0254 for (int32_t i : uniform_elements(acc, output3.metadata().size())) {
0255 double x3 = input3[i].x3();
0256 if (i < esData.size()) {
0257 x3 += esData.val(i) + esData.val2(i);
0258 }
0259 output3[i] = {x3, input3[i].y3(), input3[i].z3(), input3[i].id3(), input3[i].m3()};
0260 }
0261 }
0262 };
0263
0264 portabletest::TestDeviceCollection TestAlgo::update(Queue& queue,
0265 portabletest::TestDeviceCollection const& input,
0266 AlpakaESTestDataEDevice const& esData) const {
0267 portabletest::TestDeviceCollection collection{input->metadata().size(), queue};
0268
0269
0270 uint32_t items = 64;
0271
0272
0273 uint32_t groups = divide_up_by(collection->metadata().size(), items);
0274
0275
0276
0277
0278 auto workDiv = make_workdiv<Acc1D>(groups, items);
0279
0280 alpaka::exec<Acc1D>(queue, workDiv, TestAlgoKernelUpdate{}, input.view(), esData.view(), collection.view());
0281
0282 return collection;
0283 }
0284
0285 portabletest::TestDeviceMultiCollection2 TestAlgo::updateMulti2(Queue& queue,
0286 portabletest::TestDeviceMultiCollection2 const& input,
0287 AlpakaESTestDataEDevice const& esData) const {
0288 portabletest::TestDeviceMultiCollection2 collection{input.sizes(), queue};
0289
0290
0291 uint32_t items = 64;
0292
0293
0294 auto sizes = collection.sizes();
0295 uint32_t groups = divide_up_by(*std::max_element(sizes.begin(), sizes.end()), items);
0296
0297
0298
0299
0300 auto workDiv = make_workdiv<Acc1D>(groups, items);
0301
0302 alpaka::exec<Acc1D>(queue,
0303 workDiv,
0304 TestAlgoKernelUpdateMulti2{},
0305 input.view<portabletest::TestSoA>(),
0306 input.view<portabletest::TestSoA2>(),
0307 esData.view(),
0308 collection.view<portabletest::TestSoA>(),
0309 collection.view<portabletest::TestSoA2>());
0310
0311 return collection;
0312 }
0313
0314 portabletest::TestDeviceMultiCollection3 TestAlgo::updateMulti3(Queue& queue,
0315 portabletest::TestDeviceMultiCollection3 const& input,
0316 AlpakaESTestDataEDevice const& esData) const {
0317 portabletest::TestDeviceMultiCollection3 collection{input.sizes(), queue};
0318
0319
0320 uint32_t items = 64;
0321
0322
0323 auto sizes = collection.sizes();
0324 uint32_t groups = divide_up_by(*std::max_element(sizes.begin(), sizes.end()), items);
0325
0326
0327
0328
0329 auto workDiv = make_workdiv<Acc1D>(groups, items);
0330
0331 alpaka::exec<Acc1D>(queue,
0332 workDiv,
0333 TestAlgoKernelUpdateMulti3{},
0334 input.view<portabletest::TestSoA>(),
0335 input.view<portabletest::TestSoA2>(),
0336 input.view<portabletest::TestSoA3>(),
0337 esData.view(),
0338 collection.view<portabletest::TestSoA>(),
0339 collection.view<portabletest::TestSoA2>(),
0340 collection.view<portabletest::TestSoA3>());
0341
0342 return collection;
0343 }
0344
0345 }