File indexing completed on 2025-01-22 07:34:23
0001 #include "RecoLocalCalo/HGCalRecProducers/interface/HGCalTilesConstants.h"
0002
0003 #include "HGCalLayerClustersSoAAlgoWrapper.h"
0004 #include "ConstantsForClusters.h"
0005
0006 #include "CLUEAlgoAlpaka.h"
0007
0008 namespace ALPAKA_ACCELERATOR_NAMESPACE {
0009
0010 using namespace cms::alpakatools;
0011 using namespace hgcal::constants;
0012
0013
0014 class HGCalLayerClustersSoAAlgoKernelEnergy {
0015 public:
0016 ALPAKA_FN_ACC void operator()(Acc1D const& acc,
0017 const unsigned int numer_of_clusters,
0018 const HGCalSoARecHitsDeviceCollection::ConstView input_rechits_soa,
0019 const HGCalSoARecHitsExtraDeviceCollection::ConstView input_clusters_soa,
0020 HGCalSoAClustersDeviceCollection::View outputs) const {
0021
0022 for (int32_t i : uniform_elements(acc, input_rechits_soa.metadata().size())) {
0023
0024 if (input_clusters_soa[i].clusterIndex() == kInvalidCluster) {
0025 continue;
0026 }
0027 auto clIdx = input_clusters_soa[i].clusterIndex();
0028 alpaka::atomicAdd(acc, &outputs[clIdx].energy(), input_rechits_soa[i].weight());
0029 alpaka::atomicAdd(acc, &outputs[clIdx].cells(), 1);
0030 if (input_clusters_soa[i].isSeed() == 1) {
0031 outputs[clIdx].seed() = input_rechits_soa[i].detid();
0032 }
0033 }
0034 }
0035 };
0036
0037
0038 class HGCalLayerClustersSoAAlgoKernelPositionByHits {
0039 public:
0040 ALPAKA_FN_ACC void operator()(Acc1D const& acc,
0041 const unsigned int numer_of_clusters,
0042 float thresholdW0,
0043 float positionDeltaRho2,
0044 const HGCalSoARecHitsDeviceCollection::ConstView input_rechits_soa,
0045 const HGCalSoARecHitsExtraDeviceCollection::ConstView input_clusters_soa,
0046 HGCalSoAClustersDeviceCollection::View outputs,
0047 HGCalSoAClustersExtraDeviceCollection::View outputs_service) const {
0048
0049 for (int32_t hit_index : uniform_elements(acc, input_rechits_soa.metadata().size())) {
0050 const int cluster_index = input_clusters_soa[hit_index].clusterIndex();
0051
0052
0053 if (cluster_index == kInvalidCluster) {
0054 continue;
0055 }
0056
0057 alpaka::atomicAdd(acc, &outputs_service[cluster_index].total_weight(), input_rechits_soa[hit_index].weight());
0058
0059 int clusterSeed = outputs_service[cluster_index].maxEnergyIndex();
0060 float clusterEnergy = (clusterSeed == kInvalidIndex) ? 0.f : input_rechits_soa[clusterSeed].weight();
0061
0062 while (input_rechits_soa[hit_index].weight() > clusterEnergy) {
0063
0064
0065
0066
0067 int seed = alpaka::atomicCas(acc, &outputs_service[cluster_index].maxEnergyIndex(), clusterSeed, hit_index);
0068 if (seed == hit_index) {
0069
0070 break;
0071 } else {
0072
0073 clusterSeed = seed;
0074 clusterEnergy = (clusterSeed == kInvalidIndex) ? 0.f : input_rechits_soa[clusterSeed].weight();
0075 }
0076 }
0077 }
0078 }
0079 };
0080
0081
0082 class HGCalLayerClustersSoAAlgoKernelPositionByHits2 {
0083 public:
0084 ALPAKA_FN_ACC void operator()(Acc1D const& acc,
0085 const unsigned int numer_of_clusters,
0086 float thresholdW0,
0087 float positionDeltaRho2,
0088 const HGCalSoARecHitsDeviceCollection::ConstView input_rechits_soa,
0089 const HGCalSoARecHitsExtraDeviceCollection::ConstView input_clusters_soa,
0090 HGCalSoAClustersDeviceCollection::View outputs,
0091 HGCalSoAClustersExtraDeviceCollection::View outputs_service) const {
0092
0093 for (int32_t hit_index : uniform_elements(acc, input_rechits_soa.metadata().size())) {
0094 const int cluster_index = input_clusters_soa[hit_index].clusterIndex();
0095
0096
0097 if (cluster_index == kInvalidCluster) {
0098 continue;
0099 }
0100 const int max_energy_index = outputs_service[cluster_index].maxEnergyIndex();
0101
0102
0103 const float d1 = input_rechits_soa[hit_index].dim1() - input_rechits_soa[max_energy_index].dim1();
0104 const float d2 = input_rechits_soa[hit_index].dim2() - input_rechits_soa[max_energy_index].dim2();
0105 if (std::fmaf(d1, d1, d2 * d2) > positionDeltaRho2) {
0106 continue;
0107 }
0108 float Wi = std::max(thresholdW0 + std::log(input_rechits_soa[hit_index].weight() /
0109 outputs_service[cluster_index].total_weight()),
0110 0.f);
0111 alpaka::atomicAdd(acc, &outputs[cluster_index].x(), input_rechits_soa[hit_index].dim1() * Wi);
0112 alpaka::atomicAdd(acc, &outputs[cluster_index].y(), input_rechits_soa[hit_index].dim2() * Wi);
0113 alpaka::atomicAdd(acc, &outputs_service[cluster_index].total_weight_log(), Wi);
0114 }
0115 }
0116 };
0117
0118
0119 class HGCalLayerClustersSoAAlgoKernelPositionByHits3 {
0120 public:
0121 ALPAKA_FN_ACC void operator()(Acc1D const& acc,
0122 const unsigned int numer_of_clusters,
0123 float thresholdW0,
0124 float positionDeltaRho2,
0125 const HGCalSoARecHitsDeviceCollection::ConstView input_rechits_soa,
0126 const HGCalSoARecHitsExtraDeviceCollection::ConstView input_clusters_soa,
0127 HGCalSoAClustersDeviceCollection::View outputs,
0128 HGCalSoAClustersExtraDeviceCollection::View outputs_service) const {
0129
0130 for (int32_t cluster_index : uniform_elements(acc, outputs.metadata().size())) {
0131 const int max_energy_index = outputs_service[cluster_index].maxEnergyIndex();
0132
0133 if (outputs_service[cluster_index].total_weight_log() > 0.f) {
0134 float inv_tot_weight = 1.f / outputs_service[cluster_index].total_weight_log();
0135 outputs[cluster_index].x() *= inv_tot_weight;
0136 outputs[cluster_index].y() *= inv_tot_weight;
0137 } else {
0138 outputs[cluster_index].x() = input_rechits_soa[max_energy_index].dim1();
0139 outputs[cluster_index].y() = input_rechits_soa[max_energy_index].dim2();
0140 }
0141 outputs[cluster_index].z() = input_rechits_soa[max_energy_index].dim3();
0142 }
0143 }
0144 };
0145
0146 void HGCalLayerClustersSoAAlgoWrapper::run(Queue& queue,
0147 const unsigned int size,
0148 float thresholdW0,
0149 float positionDeltaRho2,
0150 const HGCalSoARecHitsDeviceCollection::ConstView input_rechits_soa,
0151 const HGCalSoARecHitsExtraDeviceCollection::ConstView input_clusters_soa,
0152 HGCalSoAClustersDeviceCollection::View outputs,
0153 HGCalSoAClustersExtraDeviceCollection::View outputs_service) const {
0154 auto x = cms::alpakatools::make_device_view<float>(queue, outputs.x(), size);
0155 alpaka::memset(queue, x, 0x0);
0156 auto y = cms::alpakatools::make_device_view<float>(queue, outputs.y(), size);
0157 alpaka::memset(queue, y, 0x0);
0158 auto z = cms::alpakatools::make_device_view<float>(queue, outputs.z(), size);
0159 alpaka::memset(queue, z, 0x0);
0160 auto seed = cms::alpakatools::make_device_view<int>(queue, outputs.seed(), size);
0161 alpaka::memset(queue, seed, 0x0);
0162 auto energy = cms::alpakatools::make_device_view<float>(queue, outputs.energy(), size);
0163 alpaka::memset(queue, energy, 0x0);
0164 auto cells = cms::alpakatools::make_device_view<int>(queue, outputs.cells(), size);
0165 alpaka::memset(queue, cells, 0x0);
0166 auto total_weight = cms::alpakatools::make_device_view<float>(queue, outputs_service.total_weight(), size);
0167 alpaka::memset(queue, total_weight, 0x0);
0168 auto total_weight_log = cms::alpakatools::make_device_view<float>(queue, outputs_service.total_weight_log(), size);
0169 alpaka::memset(queue, total_weight_log, 0x0);
0170 auto maxEnergyValue = cms::alpakatools::make_device_view<float>(queue, outputs_service.maxEnergyValue(), size);
0171 alpaka::memset(queue, maxEnergyValue, 0x0);
0172 auto maxEnergyIndex = cms::alpakatools::make_device_view<int>(queue, outputs_service.maxEnergyIndex(), size);
0173 alpaka::memset(queue, maxEnergyIndex, kInvalidIndexByte);
0174
0175
0176 uint32_t items = 64;
0177
0178
0179 uint32_t groups = divide_up_by(input_rechits_soa.metadata().size(), items);
0180
0181
0182
0183
0184 auto workDiv = make_workdiv<Acc1D>(groups, items);
0185
0186 alpaka::exec<Acc1D>(
0187 queue, workDiv, HGCalLayerClustersSoAAlgoKernelEnergy{}, size, input_rechits_soa, input_clusters_soa, outputs);
0188 alpaka::exec<Acc1D>(queue,
0189 workDiv,
0190 HGCalLayerClustersSoAAlgoKernelPositionByHits{},
0191 size,
0192 thresholdW0,
0193 positionDeltaRho2,
0194 input_rechits_soa,
0195 input_clusters_soa,
0196 outputs,
0197 outputs_service);
0198 alpaka::exec<Acc1D>(queue,
0199 workDiv,
0200 HGCalLayerClustersSoAAlgoKernelPositionByHits2{},
0201 size,
0202 thresholdW0,
0203 positionDeltaRho2,
0204 input_rechits_soa,
0205 input_clusters_soa,
0206 outputs,
0207 outputs_service);
0208 uint32_t group_clusters = divide_up_by(size, items);
0209 auto workDivClusters = make_workdiv<Acc1D>(group_clusters, items);
0210 alpaka::exec<Acc1D>(queue,
0211 workDivClusters,
0212 HGCalLayerClustersSoAAlgoKernelPositionByHits3{},
0213 size,
0214 thresholdW0,
0215 positionDeltaRho2,
0216 input_rechits_soa,
0217 input_clusters_soa,
0218 outputs,
0219 outputs_service);
0220 }
0221 }