Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-20 02:32:09

0001 #ifndef RecoParticleFlow_PFClusterProducer_plugins_alpaka_PFClusterECLCC_h
0002 #define RecoParticleFlow_PFClusterProducer_plugins_alpaka_PFClusterECLCC_h
0003 
0004 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0005 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0006 #include "RecoParticleFlow/PFClusterProducer/interface/alpaka/PFClusteringVarsDeviceCollection.h"
0007 #include "RecoParticleFlow/PFClusterProducer/interface/alpaka/PFClusteringEdgeVarsDeviceCollection.h"
0008 
0009 // The following comment block is required in using the ECL-CC algorithm for topological clustering
0010 
0011 /*
0012   ECL-CC code: ECL-CC is a connected components graph algorithm. The CUDA
0013   implementation thereof is quite fast. It operates on graphs stored in
0014   binary CSR format.
0015 
0016   Copyright (c) 2017-2020, Texas State University. All rights reserved.
0017 
0018   Redistribution and use in source and binary forms, with or without
0019   modification, are permitted provided that the following conditions are met:
0020 
0021      * Redistributions of source code must retain the above copyright
0022        notice, this list of conditions and the following disclaimer.
0023      * Redistributions in binary form must reproduce the above copyright
0024        notice, this list of conditions and the following disclaimer in the
0025        documentation and/or other materials provided with the distribution.
0026      * Neither the name of Texas State University nor the names of its
0027        contributors may be used to endorse or promote products derived from
0028        this software without specific prior written permission.
0029 
0030   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
0031   ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
0032   WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
0033   DISCLAIMED. IN NO EVENT SHALL TEXAS STATE UNIVERSITY BE LIABLE FOR ANY
0034   DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
0035   (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
0036   LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
0037   ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
0038   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
0039   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
0040 
0041   Authors: Jayadharini Jaiganesh and Martin Burtscher
0042 
0043   URL: The latest version of this code is available at
0044   https://userweb.cs.txstate.edu/~burtscher/research/ECL-CC/.
0045 
0046   Publication: This work is described in detail in the following paper.
0047   Jayadharini Jaiganesh and Martin Burtscher. A High-Performance Connected
0048   Components Implementation for GPUs. Proceedings of the 2018 ACM International
0049   Symposium on High-Performance Parallel and Distributed Computing, pp. 92-104.
0050   June 2018.
0051 */
0052 
0053 /*
0054  The code is modified for the specific use-case of generating topological clusters
0055  for PFClustering. It is adapted to work with the Alpaka portability library. The
0056  kernels for processing vertices at warp and block level granularity have been
0057  removed since the degree of vertices in our inputs is only ever 8; the number of
0058  neighbors.
0059 */
0060 
0061 namespace ALPAKA_ACCELERATOR_NAMESPACE {
0062 
0063   /* intermediate pointer jumping */
0064 
0065   ALPAKA_FN_ACC inline int representative(const int idx,
0066                                           reco::PFClusteringVarsDeviceCollection::View pfClusteringVars) {
0067     int curr = pfClusteringVars[idx].pfrh_topoId();
0068     if (curr != idx) {
0069       int next, prev = idx;
0070       while (curr > (next = pfClusteringVars[curr].pfrh_topoId())) {
0071         pfClusteringVars[prev].pfrh_topoId() = next;
0072         prev = curr;
0073         curr = next;
0074       }
0075     }
0076     return curr;
0077   }
0078 
0079   // Initial step of ECL-CC. Uses ID of first neighbour in edgeList with a smaller ID
0080   class ECLCCInit {
0081   public:
0082     template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0083     ALPAKA_FN_ACC void operator()(const TAcc& acc,
0084                                   reco::PFRecHitHostCollection::ConstView pfRecHits,
0085                                   reco::PFClusteringVarsDeviceCollection::View pfClusteringVars,
0086                                   reco::PFClusteringEdgeVarsDeviceCollection::View pfClusteringEdgeVars) const {
0087       const int nRH = pfRecHits.size();
0088       for (int v : cms::alpakatools::uniform_elements(acc, nRH)) {
0089         const int beg = pfClusteringEdgeVars[v].pfrh_edgeIdx();
0090         const int end = pfClusteringEdgeVars[v + 1].pfrh_edgeIdx();
0091         int m = v;
0092         int i = beg;
0093         while ((m == v) && (i < end)) {
0094           m = std::min(m, pfClusteringEdgeVars[i].pfrh_edgeList());
0095           i++;
0096         }
0097         pfClusteringVars[v].pfrh_topoId() = m;
0098       }
0099     }
0100   };
0101 
0102   // First edge processing kernel of ECL-CC
0103   // Processes vertices
0104   class ECLCCCompute1 {
0105   public:
0106     template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0107     ALPAKA_FN_ACC void operator()(const TAcc& acc,
0108                                   reco::PFRecHitHostCollection::ConstView pfRecHits,
0109                                   reco::PFClusteringVarsDeviceCollection::View pfClusteringVars,
0110                                   reco::PFClusteringEdgeVarsDeviceCollection::View pfClusteringEdgeVars) const {
0111       const int nRH = pfRecHits.size();
0112 
0113       for (int v : cms::alpakatools::uniform_elements(acc, nRH)) {
0114         const int vstat = pfClusteringVars[v].pfrh_topoId();
0115         if (v != vstat) {
0116           const int beg = pfClusteringEdgeVars[v].pfrh_edgeIdx();
0117           const int end = pfClusteringEdgeVars[v + 1].pfrh_edgeIdx();
0118           int vstat = representative(v, pfClusteringVars);
0119           for (int i = beg; i < end; i++) {
0120             const int nli = pfClusteringEdgeVars[i].pfrh_edgeList();
0121             if (v > nli) {
0122               int ostat = representative(nli, pfClusteringVars);
0123               bool repeat;
0124               do {
0125                 repeat = false;
0126                 if (vstat != ostat) {
0127                   int ret;
0128                   if (vstat < ostat) {
0129                     if ((ret = alpaka::atomicCas(acc, &pfClusteringVars[ostat].pfrh_topoId(), ostat, vstat)) != ostat) {
0130                       ostat = ret;
0131                       repeat = true;
0132                     }
0133                   } else {
0134                     if ((ret = alpaka::atomicCas(acc, &pfClusteringVars[vstat].pfrh_topoId(), vstat, ostat)) != vstat) {
0135                       vstat = ret;
0136                       repeat = true;
0137                     }
0138                   }
0139                 }
0140               } while (repeat);
0141             }
0142           }
0143         }
0144       }
0145     }
0146   };
0147 
0148   /* link all vertices to sink */
0149   class ECLCCFlatten {
0150   public:
0151     template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0152     ALPAKA_FN_ACC void operator()(const TAcc& acc,
0153                                   reco::PFRecHitHostCollection::ConstView pfRecHits,
0154                                   reco::PFClusteringVarsDeviceCollection::View pfClusteringVars,
0155                                   reco::PFClusteringEdgeVarsDeviceCollection::View pfClusteringEdgeVars) const {
0156       const int nRH = pfRecHits.size();
0157 
0158       for (int v : cms::alpakatools::uniform_elements(acc, nRH)) {
0159         int next, vstat = pfClusteringVars[v].pfrh_topoId();
0160         const int old = vstat;
0161         while (vstat > (next = pfClusteringVars[vstat].pfrh_topoId())) {
0162           vstat = next;
0163         }
0164         if (old != vstat)
0165           pfClusteringVars[v].pfrh_topoId() = vstat;
0166       }
0167     }
0168   };
0169 
0170   // ECL-CC ends
0171 
0172 }  // namespace ALPAKA_ACCELERATOR_NAMESPACE
0173 
0174 #endif