File indexing completed on 2024-07-03 04:18:12
0001 #include "RecoLocalCalo/HGCalRecProducers/interface/HGCalTilesConstants.h"
0002
0003 #include "HGCalLayerClustersSoAAlgoWrapper.h"
0004 #include "ConstantsForClusters.h"
0005
0006 #include "CLUEAlgoAlpaka.h"
0007
0008 namespace ALPAKA_ACCELERATOR_NAMESPACE {
0009
0010 using namespace cms::alpakatools;
0011 using namespace hgcal::constants;
0012
0013
0014 class HGCalLayerClustersSoAAlgoKernelEnergy {
0015 public:
0016 template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0017 ALPAKA_FN_ACC void operator()(TAcc const& acc,
0018 const unsigned int numer_of_clusters,
0019 const HGCalSoARecHitsDeviceCollection::ConstView input_rechits_soa,
0020 const HGCalSoARecHitsExtraDeviceCollection::ConstView input_clusters_soa,
0021 HGCalSoAClustersDeviceCollection::View outputs) const {
0022
0023 for (int32_t i : uniform_elements(acc, input_rechits_soa.metadata().size())) {
0024
0025 if (input_clusters_soa[i].clusterIndex() == kInvalidCluster) {
0026 continue;
0027 }
0028 auto clIdx = input_clusters_soa[i].clusterIndex();
0029 alpaka::atomicAdd(acc, &outputs[clIdx].energy(), input_rechits_soa[i].weight());
0030 alpaka::atomicAdd(acc, &outputs[clIdx].cells(), 1);
0031 if (input_clusters_soa[i].isSeed() == 1) {
0032 outputs[clIdx].seed() = input_rechits_soa[i].detid();
0033 }
0034 }
0035 }
0036 };
0037
0038
0039 class HGCalLayerClustersSoAAlgoKernelPositionByHits {
0040 public:
0041 template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0042 ALPAKA_FN_ACC void operator()(TAcc const& acc,
0043 const unsigned int numer_of_clusters,
0044 float thresholdW0,
0045 float positionDeltaRho2,
0046 const HGCalSoARecHitsDeviceCollection::ConstView input_rechits_soa,
0047 const HGCalSoARecHitsExtraDeviceCollection::ConstView input_clusters_soa,
0048 HGCalSoAClustersDeviceCollection::View outputs,
0049 HGCalSoAClustersExtraDeviceCollection::View outputs_service) const {
0050
0051 for (int32_t hit_index : uniform_elements(acc, input_rechits_soa.metadata().size())) {
0052 const int cluster_index = input_clusters_soa[hit_index].clusterIndex();
0053
0054
0055 if (cluster_index == kInvalidCluster) {
0056 continue;
0057 }
0058
0059 alpaka::atomicAdd(acc, &outputs_service[cluster_index].total_weight(), input_rechits_soa[hit_index].weight());
0060
0061 int clusterSeed = outputs_service[cluster_index].maxEnergyIndex();
0062 float clusterEnergy = (clusterSeed == kInvalidIndex) ? 0.f : input_rechits_soa[clusterSeed].weight();
0063
0064 while (input_rechits_soa[hit_index].weight() > clusterEnergy) {
0065
0066
0067
0068
0069 int seed = alpaka::atomicCas(acc, &outputs_service[cluster_index].maxEnergyIndex(), clusterSeed, hit_index);
0070 if (seed == hit_index) {
0071
0072 break;
0073 } else {
0074
0075 clusterSeed = seed;
0076 clusterEnergy = (clusterSeed == kInvalidIndex) ? 0.f : input_rechits_soa[clusterSeed].weight();
0077 }
0078 }
0079 }
0080 }
0081 };
0082
0083
0084 class HGCalLayerClustersSoAAlgoKernelPositionByHits2 {
0085 public:
0086 template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0087 ALPAKA_FN_ACC void operator()(TAcc const& acc,
0088 const unsigned int numer_of_clusters,
0089 float thresholdW0,
0090 float positionDeltaRho2,
0091 const HGCalSoARecHitsDeviceCollection::ConstView input_rechits_soa,
0092 const HGCalSoARecHitsExtraDeviceCollection::ConstView input_clusters_soa,
0093 HGCalSoAClustersDeviceCollection::View outputs,
0094 HGCalSoAClustersExtraDeviceCollection::View outputs_service) const {
0095
0096 for (int32_t hit_index : uniform_elements(acc, input_rechits_soa.metadata().size())) {
0097 const int cluster_index = input_clusters_soa[hit_index].clusterIndex();
0098
0099
0100 if (cluster_index == kInvalidCluster) {
0101 continue;
0102 }
0103 const int max_energy_index = outputs_service[cluster_index].maxEnergyIndex();
0104
0105
0106 const float d1 = input_rechits_soa[hit_index].dim1() - input_rechits_soa[max_energy_index].dim1();
0107 const float d2 = input_rechits_soa[hit_index].dim2() - input_rechits_soa[max_energy_index].dim2();
0108 if (std::fmaf(d1, d1, d2 * d2) > positionDeltaRho2) {
0109 continue;
0110 }
0111 float Wi = std::max(thresholdW0 + std::log(input_rechits_soa[hit_index].weight() /
0112 outputs_service[cluster_index].total_weight()),
0113 0.f);
0114 alpaka::atomicAdd(acc, &outputs[cluster_index].x(), input_rechits_soa[hit_index].dim1() * Wi);
0115 alpaka::atomicAdd(acc, &outputs[cluster_index].y(), input_rechits_soa[hit_index].dim2() * Wi);
0116 alpaka::atomicAdd(acc, &outputs_service[cluster_index].total_weight_log(), Wi);
0117 }
0118 }
0119 };
0120
0121
0122 class HGCalLayerClustersSoAAlgoKernelPositionByHits3 {
0123 public:
0124 template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0125 ALPAKA_FN_ACC void operator()(TAcc const& acc,
0126 const unsigned int numer_of_clusters,
0127 float thresholdW0,
0128 float positionDeltaRho2,
0129 const HGCalSoARecHitsDeviceCollection::ConstView input_rechits_soa,
0130 const HGCalSoARecHitsExtraDeviceCollection::ConstView input_clusters_soa,
0131 HGCalSoAClustersDeviceCollection::View outputs,
0132 HGCalSoAClustersExtraDeviceCollection::View outputs_service) const {
0133
0134 for (int32_t cluster_index : uniform_elements(acc, outputs.metadata().size())) {
0135 const int max_energy_index = outputs_service[cluster_index].maxEnergyIndex();
0136
0137 if (outputs_service[cluster_index].total_weight_log() > 0.f) {
0138 float inv_tot_weight = 1.f / outputs_service[cluster_index].total_weight_log();
0139 outputs[cluster_index].x() *= inv_tot_weight;
0140 outputs[cluster_index].y() *= inv_tot_weight;
0141 } else {
0142 outputs[cluster_index].x() = input_rechits_soa[max_energy_index].dim1();
0143 outputs[cluster_index].y() = input_rechits_soa[max_energy_index].dim2();
0144 }
0145 outputs[cluster_index].z() = input_rechits_soa[max_energy_index].dim3();
0146 }
0147 }
0148 };
0149
0150 void HGCalLayerClustersSoAAlgoWrapper::run(Queue& queue,
0151 const unsigned int size,
0152 float thresholdW0,
0153 float positionDeltaRho2,
0154 const HGCalSoARecHitsDeviceCollection::ConstView input_rechits_soa,
0155 const HGCalSoARecHitsExtraDeviceCollection::ConstView input_clusters_soa,
0156 HGCalSoAClustersDeviceCollection::View outputs,
0157 HGCalSoAClustersExtraDeviceCollection::View outputs_service) const {
0158 auto x = cms::alpakatools::make_device_view<float>(alpaka::getDev(queue), outputs.x(), size);
0159 alpaka::memset(queue, x, 0x0);
0160 auto y = cms::alpakatools::make_device_view<float>(alpaka::getDev(queue), outputs.y(), size);
0161 alpaka::memset(queue, y, 0x0);
0162 auto z = cms::alpakatools::make_device_view<float>(alpaka::getDev(queue), outputs.z(), size);
0163 alpaka::memset(queue, z, 0x0);
0164 auto seed = cms::alpakatools::make_device_view<int>(alpaka::getDev(queue), outputs.seed(), size);
0165 alpaka::memset(queue, seed, 0x0);
0166 auto energy = cms::alpakatools::make_device_view<float>(alpaka::getDev(queue), outputs.energy(), size);
0167 alpaka::memset(queue, energy, 0x0);
0168 auto cells = cms::alpakatools::make_device_view<int>(alpaka::getDev(queue), outputs.cells(), size);
0169 alpaka::memset(queue, cells, 0x0);
0170 auto total_weight =
0171 cms::alpakatools::make_device_view<float>(alpaka::getDev(queue), outputs_service.total_weight(), size);
0172 alpaka::memset(queue, total_weight, 0x0);
0173 auto total_weight_log =
0174 cms::alpakatools::make_device_view<float>(alpaka::getDev(queue), outputs_service.total_weight_log(), size);
0175 alpaka::memset(queue, total_weight_log, 0x0);
0176 auto maxEnergyValue =
0177 cms::alpakatools::make_device_view<float>(alpaka::getDev(queue), outputs_service.maxEnergyValue(), size);
0178 alpaka::memset(queue, maxEnergyValue, 0x0);
0179 auto maxEnergyIndex =
0180 cms::alpakatools::make_device_view<int>(alpaka::getDev(queue), outputs_service.maxEnergyIndex(), size);
0181 alpaka::memset(queue, maxEnergyIndex, kInvalidIndexByte);
0182
0183
0184 uint32_t items = 64;
0185
0186
0187 uint32_t groups = divide_up_by(input_rechits_soa.metadata().size(), items);
0188
0189
0190
0191
0192 auto workDiv = make_workdiv<Acc1D>(groups, items);
0193
0194 alpaka::exec<Acc1D>(
0195 queue, workDiv, HGCalLayerClustersSoAAlgoKernelEnergy{}, size, input_rechits_soa, input_clusters_soa, outputs);
0196 alpaka::exec<Acc1D>(queue,
0197 workDiv,
0198 HGCalLayerClustersSoAAlgoKernelPositionByHits{},
0199 size,
0200 thresholdW0,
0201 positionDeltaRho2,
0202 input_rechits_soa,
0203 input_clusters_soa,
0204 outputs,
0205 outputs_service);
0206 alpaka::exec<Acc1D>(queue,
0207 workDiv,
0208 HGCalLayerClustersSoAAlgoKernelPositionByHits2{},
0209 size,
0210 thresholdW0,
0211 positionDeltaRho2,
0212 input_rechits_soa,
0213 input_clusters_soa,
0214 outputs,
0215 outputs_service);
0216 uint32_t group_clusters = divide_up_by(size, items);
0217 auto workDivClusters = make_workdiv<Acc1D>(group_clusters, items);
0218 alpaka::exec<Acc1D>(queue,
0219 workDivClusters,
0220 HGCalLayerClustersSoAAlgoKernelPositionByHits3{},
0221 size,
0222 thresholdW0,
0223 positionDeltaRho2,
0224 input_rechits_soa,
0225 input_clusters_soa,
0226 outputs,
0227 outputs_service);
0228 }
0229 }