Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-07-03 04:18:12

0001 #include "RecoLocalCalo/HGCalRecProducers/interface/HGCalTilesConstants.h"
0002 
0003 #include "HGCalLayerClustersSoAAlgoWrapper.h"
0004 #include "ConstantsForClusters.h"
0005 
0006 #include "CLUEAlgoAlpaka.h"
0007 
0008 namespace ALPAKA_ACCELERATOR_NAMESPACE {
0009 
0010   using namespace cms::alpakatools;
0011   using namespace hgcal::constants;
0012 
0013   // Set energy and number of hits in each clusters
0014   class HGCalLayerClustersSoAAlgoKernelEnergy {
0015   public:
0016     template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0017     ALPAKA_FN_ACC void operator()(TAcc const& acc,
0018                                   const unsigned int numer_of_clusters,
0019                                   const HGCalSoARecHitsDeviceCollection::ConstView input_rechits_soa,
0020                                   const HGCalSoARecHitsExtraDeviceCollection::ConstView input_clusters_soa,
0021                                   HGCalSoAClustersDeviceCollection::View outputs) const {
0022       // make a strided loop over the kernel grid, covering up to "size" elements
0023       for (int32_t i : uniform_elements(acc, input_rechits_soa.metadata().size())) {
0024         // Skip unassigned rechits
0025         if (input_clusters_soa[i].clusterIndex() == kInvalidCluster) {
0026           continue;
0027         }
0028         auto clIdx = input_clusters_soa[i].clusterIndex();
0029         alpaka::atomicAdd(acc, &outputs[clIdx].energy(), input_rechits_soa[i].weight());
0030         alpaka::atomicAdd(acc, &outputs[clIdx].cells(), 1);
0031         if (input_clusters_soa[i].isSeed() == 1) {
0032           outputs[clIdx].seed() = input_rechits_soa[i].detid();
0033         }
0034       }
0035     }
0036   };
0037 
0038   // Kernel to find the max for every cluster
0039   class HGCalLayerClustersSoAAlgoKernelPositionByHits {
0040   public:
0041     template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0042     ALPAKA_FN_ACC void operator()(TAcc const& acc,
0043                                   const unsigned int numer_of_clusters,
0044                                   float thresholdW0,
0045                                   float positionDeltaRho2,
0046                                   const HGCalSoARecHitsDeviceCollection::ConstView input_rechits_soa,
0047                                   const HGCalSoARecHitsExtraDeviceCollection::ConstView input_clusters_soa,
0048                                   HGCalSoAClustersDeviceCollection::View outputs,
0049                                   HGCalSoAClustersExtraDeviceCollection::View outputs_service) const {
0050       // make a strided loop over the kernel grid, covering up to "size" elements
0051       for (int32_t hit_index : uniform_elements(acc, input_rechits_soa.metadata().size())) {
0052         const int cluster_index = input_clusters_soa[hit_index].clusterIndex();
0053 
0054         // Bail out if you are not part of any cluster
0055         if (cluster_index == kInvalidCluster) {
0056           continue;
0057         }
0058 
0059         alpaka::atomicAdd(acc, &outputs_service[cluster_index].total_weight(), input_rechits_soa[hit_index].weight());
0060         // Read the current seed index, and the associated energy.
0061         int clusterSeed = outputs_service[cluster_index].maxEnergyIndex();
0062         float clusterEnergy = (clusterSeed == kInvalidIndex) ? 0.f : input_rechits_soa[clusterSeed].weight();
0063 
0064         while (input_rechits_soa[hit_index].weight() > clusterEnergy) {
0065           // If output_service[cluster_index].maxEnergyIndex() did not change,
0066           // store the new value and exit the loop.  Otherwise return the value
0067           // that has been updated, and decide again if the maximum needs to be
0068           // updated.
0069           int seed = alpaka::atomicCas(acc, &outputs_service[cluster_index].maxEnergyIndex(), clusterSeed, hit_index);
0070           if (seed == hit_index) {
0071             // atomicCas has stored the new value.
0072             break;
0073           } else {
0074             // Update the seed index and re-read the associated energy.
0075             clusterSeed = seed;
0076             clusterEnergy = (clusterSeed == kInvalidIndex) ? 0.f : input_rechits_soa[clusterSeed].weight();
0077           }
0078         }  // CAS
0079       }    // uniform_elements
0080     }      // operator()
0081   };
0082 
0083   // Real Kernel position
0084   class HGCalLayerClustersSoAAlgoKernelPositionByHits2 {
0085   public:
0086     template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0087     ALPAKA_FN_ACC void operator()(TAcc const& acc,
0088                                   const unsigned int numer_of_clusters,
0089                                   float thresholdW0,
0090                                   float positionDeltaRho2,
0091                                   const HGCalSoARecHitsDeviceCollection::ConstView input_rechits_soa,
0092                                   const HGCalSoARecHitsExtraDeviceCollection::ConstView input_clusters_soa,
0093                                   HGCalSoAClustersDeviceCollection::View outputs,
0094                                   HGCalSoAClustersExtraDeviceCollection::View outputs_service) const {
0095       // make a strided loop over the kernel grid, covering up to "size" elements
0096       for (int32_t hit_index : uniform_elements(acc, input_rechits_soa.metadata().size())) {
0097         const int cluster_index = input_clusters_soa[hit_index].clusterIndex();
0098 
0099         // Bail out if you are not part of any cluster
0100         if (cluster_index == kInvalidCluster) {
0101           continue;
0102         }
0103         const int max_energy_index = outputs_service[cluster_index].maxEnergyIndex();
0104 
0105         //for silicon only just use 1+6 cells = 1.3cm for all thicknesses
0106         const float d1 = input_rechits_soa[hit_index].dim1() - input_rechits_soa[max_energy_index].dim1();
0107         const float d2 = input_rechits_soa[hit_index].dim2() - input_rechits_soa[max_energy_index].dim2();
0108         if (std::fmaf(d1, d1, d2 * d2) > positionDeltaRho2) {
0109           continue;
0110         }
0111         float Wi = std::max(thresholdW0 + std::log(input_rechits_soa[hit_index].weight() /
0112                                                    outputs_service[cluster_index].total_weight()),
0113                             0.f);
0114         alpaka::atomicAdd(acc, &outputs[cluster_index].x(), input_rechits_soa[hit_index].dim1() * Wi);
0115         alpaka::atomicAdd(acc, &outputs[cluster_index].y(), input_rechits_soa[hit_index].dim2() * Wi);
0116         alpaka::atomicAdd(acc, &outputs_service[cluster_index].total_weight_log(), Wi);
0117       }  // uniform_elements
0118     }    // operator()
0119   };
0120 
0121   // Besides the final position, add also the DetId of the seed of each cluster
0122   class HGCalLayerClustersSoAAlgoKernelPositionByHits3 {
0123   public:
0124     template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0125     ALPAKA_FN_ACC void operator()(TAcc const& acc,
0126                                   const unsigned int numer_of_clusters,
0127                                   float thresholdW0,
0128                                   float positionDeltaRho2,
0129                                   const HGCalSoARecHitsDeviceCollection::ConstView input_rechits_soa,
0130                                   const HGCalSoARecHitsExtraDeviceCollection::ConstView input_clusters_soa,
0131                                   HGCalSoAClustersDeviceCollection::View outputs,
0132                                   HGCalSoAClustersExtraDeviceCollection::View outputs_service) const {
0133       // make a strided loop over the kernel grid, covering up to "size" elements
0134       for (int32_t cluster_index : uniform_elements(acc, outputs.metadata().size())) {
0135         const int max_energy_index = outputs_service[cluster_index].maxEnergyIndex();
0136 
0137         if (outputs_service[cluster_index].total_weight_log() > 0.f) {
0138           float inv_tot_weight = 1.f / outputs_service[cluster_index].total_weight_log();
0139           outputs[cluster_index].x() *= inv_tot_weight;
0140           outputs[cluster_index].y() *= inv_tot_weight;
0141         } else {
0142           outputs[cluster_index].x() = input_rechits_soa[max_energy_index].dim1();
0143           outputs[cluster_index].y() = input_rechits_soa[max_energy_index].dim2();
0144         }
0145         outputs[cluster_index].z() = input_rechits_soa[max_energy_index].dim3();
0146       }  // uniform_elements
0147     }    // operator()
0148   };
0149 
0150   void HGCalLayerClustersSoAAlgoWrapper::run(Queue& queue,
0151                                              const unsigned int size,
0152                                              float thresholdW0,
0153                                              float positionDeltaRho2,
0154                                              const HGCalSoARecHitsDeviceCollection::ConstView input_rechits_soa,
0155                                              const HGCalSoARecHitsExtraDeviceCollection::ConstView input_clusters_soa,
0156                                              HGCalSoAClustersDeviceCollection::View outputs,
0157                                              HGCalSoAClustersExtraDeviceCollection::View outputs_service) const {
0158     auto x = cms::alpakatools::make_device_view<float>(alpaka::getDev(queue), outputs.x(), size);
0159     alpaka::memset(queue, x, 0x0);
0160     auto y = cms::alpakatools::make_device_view<float>(alpaka::getDev(queue), outputs.y(), size);
0161     alpaka::memset(queue, y, 0x0);
0162     auto z = cms::alpakatools::make_device_view<float>(alpaka::getDev(queue), outputs.z(), size);
0163     alpaka::memset(queue, z, 0x0);
0164     auto seed = cms::alpakatools::make_device_view<int>(alpaka::getDev(queue), outputs.seed(), size);
0165     alpaka::memset(queue, seed, 0x0);
0166     auto energy = cms::alpakatools::make_device_view<float>(alpaka::getDev(queue), outputs.energy(), size);
0167     alpaka::memset(queue, energy, 0x0);
0168     auto cells = cms::alpakatools::make_device_view<int>(alpaka::getDev(queue), outputs.cells(), size);
0169     alpaka::memset(queue, cells, 0x0);
0170     auto total_weight =
0171         cms::alpakatools::make_device_view<float>(alpaka::getDev(queue), outputs_service.total_weight(), size);
0172     alpaka::memset(queue, total_weight, 0x0);
0173     auto total_weight_log =
0174         cms::alpakatools::make_device_view<float>(alpaka::getDev(queue), outputs_service.total_weight_log(), size);
0175     alpaka::memset(queue, total_weight_log, 0x0);
0176     auto maxEnergyValue =
0177         cms::alpakatools::make_device_view<float>(alpaka::getDev(queue), outputs_service.maxEnergyValue(), size);
0178     alpaka::memset(queue, maxEnergyValue, 0x0);
0179     auto maxEnergyIndex =
0180         cms::alpakatools::make_device_view<int>(alpaka::getDev(queue), outputs_service.maxEnergyIndex(), size);
0181     alpaka::memset(queue, maxEnergyIndex, kInvalidIndexByte);
0182 
0183     // use 64 items per group (this value is arbitrary, but it's a reasonable starting point)
0184     uint32_t items = 64;
0185 
0186     // use as many groups as needed to cover the whole problem
0187     uint32_t groups = divide_up_by(input_rechits_soa.metadata().size(), items);
0188 
0189     // map items to
0190     //   - threads with a single element per thread on a GPU backend
0191     //   - elements within a single thread on a CPU backend
0192     auto workDiv = make_workdiv<Acc1D>(groups, items);
0193 
0194     alpaka::exec<Acc1D>(
0195         queue, workDiv, HGCalLayerClustersSoAAlgoKernelEnergy{}, size, input_rechits_soa, input_clusters_soa, outputs);
0196     alpaka::exec<Acc1D>(queue,
0197                         workDiv,
0198                         HGCalLayerClustersSoAAlgoKernelPositionByHits{},
0199                         size,
0200                         thresholdW0,
0201                         positionDeltaRho2,
0202                         input_rechits_soa,
0203                         input_clusters_soa,
0204                         outputs,
0205                         outputs_service);
0206     alpaka::exec<Acc1D>(queue,
0207                         workDiv,
0208                         HGCalLayerClustersSoAAlgoKernelPositionByHits2{},
0209                         size,
0210                         thresholdW0,
0211                         positionDeltaRho2,
0212                         input_rechits_soa,
0213                         input_clusters_soa,
0214                         outputs,
0215                         outputs_service);
0216     uint32_t group_clusters = divide_up_by(size, items);
0217     auto workDivClusters = make_workdiv<Acc1D>(group_clusters, items);
0218     alpaka::exec<Acc1D>(queue,
0219                         workDivClusters,
0220                         HGCalLayerClustersSoAAlgoKernelPositionByHits3{},
0221                         size,
0222                         thresholdW0,
0223                         positionDeltaRho2,
0224                         input_rechits_soa,
0225                         input_clusters_soa,
0226                         outputs,
0227                         outputs_service);
0228   }
0229 }  // namespace ALPAKA_ACCELERATOR_NAMESPACE