Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2025-01-22 07:34:23

0001 #include "RecoLocalCalo/HGCalRecProducers/interface/HGCalTilesConstants.h"
0002 
0003 #include "HGCalLayerClustersSoAAlgoWrapper.h"
0004 #include "ConstantsForClusters.h"
0005 
0006 #include "CLUEAlgoAlpaka.h"
0007 
0008 namespace ALPAKA_ACCELERATOR_NAMESPACE {
0009 
0010   using namespace cms::alpakatools;
0011   using namespace hgcal::constants;
0012 
0013   // Set energy and number of hits in each clusters
0014   class HGCalLayerClustersSoAAlgoKernelEnergy {
0015   public:
0016     ALPAKA_FN_ACC void operator()(Acc1D const& acc,
0017                                   const unsigned int numer_of_clusters,
0018                                   const HGCalSoARecHitsDeviceCollection::ConstView input_rechits_soa,
0019                                   const HGCalSoARecHitsExtraDeviceCollection::ConstView input_clusters_soa,
0020                                   HGCalSoAClustersDeviceCollection::View outputs) const {
0021       // make a strided loop over the kernel grid, covering up to "size" elements
0022       for (int32_t i : uniform_elements(acc, input_rechits_soa.metadata().size())) {
0023         // Skip unassigned rechits
0024         if (input_clusters_soa[i].clusterIndex() == kInvalidCluster) {
0025           continue;
0026         }
0027         auto clIdx = input_clusters_soa[i].clusterIndex();
0028         alpaka::atomicAdd(acc, &outputs[clIdx].energy(), input_rechits_soa[i].weight());
0029         alpaka::atomicAdd(acc, &outputs[clIdx].cells(), 1);
0030         if (input_clusters_soa[i].isSeed() == 1) {
0031           outputs[clIdx].seed() = input_rechits_soa[i].detid();
0032         }
0033       }
0034     }
0035   };
0036 
0037   // Kernel to find the max for every cluster
0038   class HGCalLayerClustersSoAAlgoKernelPositionByHits {
0039   public:
0040     ALPAKA_FN_ACC void operator()(Acc1D const& acc,
0041                                   const unsigned int numer_of_clusters,
0042                                   float thresholdW0,
0043                                   float positionDeltaRho2,
0044                                   const HGCalSoARecHitsDeviceCollection::ConstView input_rechits_soa,
0045                                   const HGCalSoARecHitsExtraDeviceCollection::ConstView input_clusters_soa,
0046                                   HGCalSoAClustersDeviceCollection::View outputs,
0047                                   HGCalSoAClustersExtraDeviceCollection::View outputs_service) const {
0048       // make a strided loop over the kernel grid, covering up to "size" elements
0049       for (int32_t hit_index : uniform_elements(acc, input_rechits_soa.metadata().size())) {
0050         const int cluster_index = input_clusters_soa[hit_index].clusterIndex();
0051 
0052         // Bail out if you are not part of any cluster
0053         if (cluster_index == kInvalidCluster) {
0054           continue;
0055         }
0056 
0057         alpaka::atomicAdd(acc, &outputs_service[cluster_index].total_weight(), input_rechits_soa[hit_index].weight());
0058         // Read the current seed index, and the associated energy.
0059         int clusterSeed = outputs_service[cluster_index].maxEnergyIndex();
0060         float clusterEnergy = (clusterSeed == kInvalidIndex) ? 0.f : input_rechits_soa[clusterSeed].weight();
0061 
0062         while (input_rechits_soa[hit_index].weight() > clusterEnergy) {
0063           // If output_service[cluster_index].maxEnergyIndex() did not change,
0064           // store the new value and exit the loop.  Otherwise return the value
0065           // that has been updated, and decide again if the maximum needs to be
0066           // updated.
0067           int seed = alpaka::atomicCas(acc, &outputs_service[cluster_index].maxEnergyIndex(), clusterSeed, hit_index);
0068           if (seed == hit_index) {
0069             // atomicCas has stored the new value.
0070             break;
0071           } else {
0072             // Update the seed index and re-read the associated energy.
0073             clusterSeed = seed;
0074             clusterEnergy = (clusterSeed == kInvalidIndex) ? 0.f : input_rechits_soa[clusterSeed].weight();
0075           }
0076         }  // CAS
0077       }  // uniform_elements
0078     }  // operator()
0079   };
0080 
0081   // Real Kernel position
0082   class HGCalLayerClustersSoAAlgoKernelPositionByHits2 {
0083   public:
0084     ALPAKA_FN_ACC void operator()(Acc1D const& acc,
0085                                   const unsigned int numer_of_clusters,
0086                                   float thresholdW0,
0087                                   float positionDeltaRho2,
0088                                   const HGCalSoARecHitsDeviceCollection::ConstView input_rechits_soa,
0089                                   const HGCalSoARecHitsExtraDeviceCollection::ConstView input_clusters_soa,
0090                                   HGCalSoAClustersDeviceCollection::View outputs,
0091                                   HGCalSoAClustersExtraDeviceCollection::View outputs_service) const {
0092       // make a strided loop over the kernel grid, covering up to "size" elements
0093       for (int32_t hit_index : uniform_elements(acc, input_rechits_soa.metadata().size())) {
0094         const int cluster_index = input_clusters_soa[hit_index].clusterIndex();
0095 
0096         // Bail out if you are not part of any cluster
0097         if (cluster_index == kInvalidCluster) {
0098           continue;
0099         }
0100         const int max_energy_index = outputs_service[cluster_index].maxEnergyIndex();
0101 
0102         //for silicon only just use 1+6 cells = 1.3cm for all thicknesses
0103         const float d1 = input_rechits_soa[hit_index].dim1() - input_rechits_soa[max_energy_index].dim1();
0104         const float d2 = input_rechits_soa[hit_index].dim2() - input_rechits_soa[max_energy_index].dim2();
0105         if (std::fmaf(d1, d1, d2 * d2) > positionDeltaRho2) {
0106           continue;
0107         }
0108         float Wi = std::max(thresholdW0 + std::log(input_rechits_soa[hit_index].weight() /
0109                                                    outputs_service[cluster_index].total_weight()),
0110                             0.f);
0111         alpaka::atomicAdd(acc, &outputs[cluster_index].x(), input_rechits_soa[hit_index].dim1() * Wi);
0112         alpaka::atomicAdd(acc, &outputs[cluster_index].y(), input_rechits_soa[hit_index].dim2() * Wi);
0113         alpaka::atomicAdd(acc, &outputs_service[cluster_index].total_weight_log(), Wi);
0114       }  // uniform_elements
0115     }  // operator()
0116   };
0117 
0118   // Besides the final position, add also the DetId of the seed of each cluster
0119   class HGCalLayerClustersSoAAlgoKernelPositionByHits3 {
0120   public:
0121     ALPAKA_FN_ACC void operator()(Acc1D const& acc,
0122                                   const unsigned int numer_of_clusters,
0123                                   float thresholdW0,
0124                                   float positionDeltaRho2,
0125                                   const HGCalSoARecHitsDeviceCollection::ConstView input_rechits_soa,
0126                                   const HGCalSoARecHitsExtraDeviceCollection::ConstView input_clusters_soa,
0127                                   HGCalSoAClustersDeviceCollection::View outputs,
0128                                   HGCalSoAClustersExtraDeviceCollection::View outputs_service) const {
0129       // make a strided loop over the kernel grid, covering up to "size" elements
0130       for (int32_t cluster_index : uniform_elements(acc, outputs.metadata().size())) {
0131         const int max_energy_index = outputs_service[cluster_index].maxEnergyIndex();
0132 
0133         if (outputs_service[cluster_index].total_weight_log() > 0.f) {
0134           float inv_tot_weight = 1.f / outputs_service[cluster_index].total_weight_log();
0135           outputs[cluster_index].x() *= inv_tot_weight;
0136           outputs[cluster_index].y() *= inv_tot_weight;
0137         } else {
0138           outputs[cluster_index].x() = input_rechits_soa[max_energy_index].dim1();
0139           outputs[cluster_index].y() = input_rechits_soa[max_energy_index].dim2();
0140         }
0141         outputs[cluster_index].z() = input_rechits_soa[max_energy_index].dim3();
0142       }  // uniform_elements
0143     }  // operator()
0144   };
0145 
0146   void HGCalLayerClustersSoAAlgoWrapper::run(Queue& queue,
0147                                              const unsigned int size,
0148                                              float thresholdW0,
0149                                              float positionDeltaRho2,
0150                                              const HGCalSoARecHitsDeviceCollection::ConstView input_rechits_soa,
0151                                              const HGCalSoARecHitsExtraDeviceCollection::ConstView input_clusters_soa,
0152                                              HGCalSoAClustersDeviceCollection::View outputs,
0153                                              HGCalSoAClustersExtraDeviceCollection::View outputs_service) const {
0154     auto x = cms::alpakatools::make_device_view<float>(queue, outputs.x(), size);
0155     alpaka::memset(queue, x, 0x0);
0156     auto y = cms::alpakatools::make_device_view<float>(queue, outputs.y(), size);
0157     alpaka::memset(queue, y, 0x0);
0158     auto z = cms::alpakatools::make_device_view<float>(queue, outputs.z(), size);
0159     alpaka::memset(queue, z, 0x0);
0160     auto seed = cms::alpakatools::make_device_view<int>(queue, outputs.seed(), size);
0161     alpaka::memset(queue, seed, 0x0);
0162     auto energy = cms::alpakatools::make_device_view<float>(queue, outputs.energy(), size);
0163     alpaka::memset(queue, energy, 0x0);
0164     auto cells = cms::alpakatools::make_device_view<int>(queue, outputs.cells(), size);
0165     alpaka::memset(queue, cells, 0x0);
0166     auto total_weight = cms::alpakatools::make_device_view<float>(queue, outputs_service.total_weight(), size);
0167     alpaka::memset(queue, total_weight, 0x0);
0168     auto total_weight_log = cms::alpakatools::make_device_view<float>(queue, outputs_service.total_weight_log(), size);
0169     alpaka::memset(queue, total_weight_log, 0x0);
0170     auto maxEnergyValue = cms::alpakatools::make_device_view<float>(queue, outputs_service.maxEnergyValue(), size);
0171     alpaka::memset(queue, maxEnergyValue, 0x0);
0172     auto maxEnergyIndex = cms::alpakatools::make_device_view<int>(queue, outputs_service.maxEnergyIndex(), size);
0173     alpaka::memset(queue, maxEnergyIndex, kInvalidIndexByte);
0174 
0175     // use 64 items per group (this value is arbitrary, but it's a reasonable starting point)
0176     uint32_t items = 64;
0177 
0178     // use as many groups as needed to cover the whole problem
0179     uint32_t groups = divide_up_by(input_rechits_soa.metadata().size(), items);
0180 
0181     // map items to
0182     //   - threads with a single element per thread on a GPU backend
0183     //   - elements within a single thread on a CPU backend
0184     auto workDiv = make_workdiv<Acc1D>(groups, items);
0185 
0186     alpaka::exec<Acc1D>(
0187         queue, workDiv, HGCalLayerClustersSoAAlgoKernelEnergy{}, size, input_rechits_soa, input_clusters_soa, outputs);
0188     alpaka::exec<Acc1D>(queue,
0189                         workDiv,
0190                         HGCalLayerClustersSoAAlgoKernelPositionByHits{},
0191                         size,
0192                         thresholdW0,
0193                         positionDeltaRho2,
0194                         input_rechits_soa,
0195                         input_clusters_soa,
0196                         outputs,
0197                         outputs_service);
0198     alpaka::exec<Acc1D>(queue,
0199                         workDiv,
0200                         HGCalLayerClustersSoAAlgoKernelPositionByHits2{},
0201                         size,
0202                         thresholdW0,
0203                         positionDeltaRho2,
0204                         input_rechits_soa,
0205                         input_clusters_soa,
0206                         outputs,
0207                         outputs_service);
0208     uint32_t group_clusters = divide_up_by(size, items);
0209     auto workDivClusters = make_workdiv<Acc1D>(group_clusters, items);
0210     alpaka::exec<Acc1D>(queue,
0211                         workDivClusters,
0212                         HGCalLayerClustersSoAAlgoKernelPositionByHits3{},
0213                         size,
0214                         thresholdW0,
0215                         positionDeltaRho2,
0216                         input_rechits_soa,
0217                         input_clusters_soa,
0218                         outputs,
0219                         outputs_service);
0220   }
0221 }  // namespace ALPAKA_ACCELERATOR_NAMESPACE