File indexing completed on 2024-10-04 05:19:00
0001 #include <alpaka/alpaka.hpp>
0002
0003 #include "FWCore/Utilities/interface/bit_cast.h"
0004 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0005 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0006 #include "HeterogeneousCore/AlpakaInterface/interface/atomicMaxF.h"
0007
0008 #include "DataFormats/ParticleFlowReco/interface/PFLayer.h"
0009 #include "RecoParticleFlow/PFClusterProducer/plugins/alpaka/PFClusterSoAProducerKernel.h"
0010 #include "RecoParticleFlow/PFClusterProducer/plugins/alpaka/PFClusterECLCC.h"
0011
0012 namespace ALPAKA_ACCELERATOR_NAMESPACE {
0013
0014 using namespace cms::alpakatools;
0015
0016 using namespace reco::pfClustering;
0017
0018 static constexpr int threadsPerBlockForClustering = 512;
0019 static constexpr uint32_t blocksForExoticClusters = 4;
0020
0021
0022
0023
0024 static constexpr float cutoffDistance = 100.;
0025 static constexpr float cutoffFraction = 0.9999;
0026
0027 static constexpr uint32_t kHBHalf = 1296;
0028 static constexpr uint32_t maxTopoInput = 2 * kHBHalf;
0029
0030
0031 ALPAKA_FN_ACC ALPAKA_FN_INLINE static float dR2(Position4 pos1, Position4 pos2) {
0032 float mag1 = sqrtf(pos1.x * pos1.x + pos1.y * pos1.y + pos1.z * pos1.z);
0033 float cosTheta1 = mag1 > 0.0 ? pos1.z / mag1 : 1.0;
0034 float eta1 = 0.5f * logf((1.0f + cosTheta1) / (1.0f - cosTheta1));
0035 float phi1 = atan2f(pos1.y, pos1.x);
0036
0037 float mag2 = sqrtf(pos2.x * pos2.x + pos2.y * pos2.y + pos2.z * pos2.z);
0038 float cosTheta2 = mag2 > 0.0 ? pos2.z / mag2 : 1.0;
0039 float eta2 = 0.5f * logf((1.0f + cosTheta2) / (1.0f - cosTheta2));
0040 float phi2 = atan2f(pos2.y, pos2.x);
0041
0042 float deta = eta2 - eta1;
0043 constexpr const float fPI = M_PI;
0044 float dphi = std::abs(std::abs(phi2 - phi1) - fPI) - fPI;
0045 return (deta * deta + dphi * dphi);
0046 }
0047
0048
0049 ALPAKA_FN_ACC static auto getSeedRhIdx(int* seeds, int seedNum) { return seeds[seedNum]; }
0050
0051
0052 ALPAKA_FN_ACC static auto getRhFrac(reco::PFClusteringVarsDeviceCollection::View pfClusteringVars,
0053 int topoSeedBegin,
0054 reco::PFRecHitFractionDeviceCollection::View fracView,
0055 int seedNum,
0056 int rhNum) {
0057 int seedIdx = pfClusteringVars[topoSeedBegin + seedNum].topoSeedList();
0058 return fracView[pfClusteringVars[seedIdx].seedFracOffsets() + rhNum].frac();
0059 }
0060
0061
0062 template <bool debug = false>
0063 ALPAKA_FN_ACC static void updateClusterPos(reco::PFClusterParamsDeviceCollection::ConstView pfClusParams,
0064 Position4& pos4,
0065 float frac,
0066 int rhInd,
0067 reco::PFRecHitDeviceCollection::ConstView pfRecHits,
0068 float rhENormInv) {
0069 Position4 rechitPos = Position4{pfRecHits[rhInd].x(), pfRecHits[rhInd].y(), pfRecHits[rhInd].z(), 1.0};
0070 const auto rh_energy = pfRecHits[rhInd].energy() * frac;
0071 const auto norm = (frac < pfClusParams.minFracInCalc() ? 0.0f : std::max(0.0f, logf(rh_energy * rhENormInv)));
0072 if constexpr (debug)
0073 printf("\t\t\trechit %d: norm = %f\tfrac = %f\trh_energy = %f\tpos = (%f, %f, %f)\n",
0074 rhInd,
0075 norm,
0076 frac,
0077 rh_energy,
0078 rechitPos.x,
0079 rechitPos.y,
0080 rechitPos.z);
0081
0082 pos4.x += rechitPos.x * norm;
0083 pos4.y += rechitPos.y * norm;
0084 pos4.z += rechitPos.z * norm;
0085 pos4.w += norm;
0086 }
0087
0088
0089
0090 template <bool debug = false, typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0091 ALPAKA_FN_ACC static void hcalFastCluster_singleSeed(
0092 const TAcc& acc,
0093 reco::PFClusterParamsDeviceCollection::ConstView pfClusParams,
0094 const reco::PFRecHitHCALTopologyDeviceCollection::ConstView topology,
0095 int topoId,
0096 int nRHTopo,
0097 reco::PFRecHitDeviceCollection::ConstView pfRecHits,
0098 reco::PFClusteringVarsDeviceCollection::View pfClusteringVars,
0099 reco::PFClusterDeviceCollection::View clusterView,
0100 reco::PFRecHitFractionDeviceCollection::View fracView) {
0101 int tid = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u];
0102
0103 int& i = alpaka::declareSharedVar<int, __COUNTER__>(acc);
0104 int& nRHOther = alpaka::declareSharedVar<int, __COUNTER__>(acc);
0105 unsigned int& iter = alpaka::declareSharedVar<unsigned int, __COUNTER__>(acc);
0106 float& tol = alpaka::declareSharedVar<float, __COUNTER__>(acc);
0107 float& clusterEnergy = alpaka::declareSharedVar<float, __COUNTER__>(acc);
0108 float& rhENormInv = alpaka::declareSharedVar<float, __COUNTER__>(acc);
0109 float& seedEnergy = alpaka::declareSharedVar<float, __COUNTER__>(acc);
0110 Position4& clusterPos = alpaka::declareSharedVar<Position4, __COUNTER__>(acc);
0111 Position4& prevClusterPos = alpaka::declareSharedVar<Position4, __COUNTER__>(acc);
0112 Position4& seedPos = alpaka::declareSharedVar<Position4, __COUNTER__>(acc);
0113 bool& notDone = alpaka::declareSharedVar<bool, __COUNTER__>(acc);
0114 if (once_per_block(acc)) {
0115 i = pfClusteringVars[pfClusteringVars[topoId].topoSeedOffsets()].topoSeedList();
0116 nRHOther = nRHTopo - 1;
0117 seedPos = Position4{pfRecHits[i].x(), pfRecHits[i].y(), pfRecHits[i].z(), 1.};
0118 clusterPos = seedPos;
0119 prevClusterPos = seedPos;
0120 seedEnergy = pfRecHits[i].energy();
0121 clusterEnergy = seedEnergy;
0122 tol = pfClusParams.stoppingTolerance();
0123
0124 if (topology.cutsFromDB()) {
0125 rhENormInv = (1.f / topology[pfRecHits[i].denseId()].noiseThreshold());
0126 } else {
0127 if (pfRecHits[i].layer() == PFLayer::HCAL_BARREL1)
0128 rhENormInv = pfClusParams.recHitEnergyNormInvHB_vec()[pfRecHits[i].depth() - 1];
0129 else if (pfRecHits[i].layer() == PFLayer::HCAL_ENDCAP)
0130 rhENormInv = pfClusParams.recHitEnergyNormInvHE_vec()[pfRecHits[i].depth() - 1];
0131 else {
0132 rhENormInv = 0.;
0133 printf("Rechit %d has invalid layer %d!\n", i, pfRecHits[i].layer());
0134 }
0135 }
0136
0137 iter = 0;
0138 notDone = true;
0139 }
0140 alpaka::syncBlockThreads(acc);
0141
0142 int j = -1;
0143 int rhFracOffset = -1;
0144 Position4 rhPos;
0145 float rhEnergy = -1., rhPosNorm = -1.;
0146
0147 if (tid < nRHOther) {
0148 rhFracOffset =
0149 pfClusteringVars[i].seedFracOffsets() + tid + 1;
0150 j = fracView[rhFracOffset].pfrhIdx();
0151 rhPos = Position4{pfRecHits[j].x(), pfRecHits[j].y(), pfRecHits[j].z(), 1.};
0152 rhEnergy = pfRecHits[j].energy();
0153 rhPosNorm = fmaxf(0., logf(rhEnergy * rhENormInv));
0154 }
0155 alpaka::syncBlockThreads(acc);
0156
0157 do {
0158 if constexpr (debug) {
0159 if (once_per_block(acc))
0160 printf("\n--- Now on iter %d for topoId %d ---\n", iter, topoId);
0161 }
0162 float dist2 = -1., d2 = -1., fraction = -1.;
0163 if (tid < nRHOther) {
0164
0165 dist2 = (clusterPos.x - rhPos.x) * (clusterPos.x - rhPos.x) +
0166 (clusterPos.y - rhPos.y) * (clusterPos.y - rhPos.y) +
0167 (clusterPos.z - rhPos.z) * (clusterPos.z - rhPos.z);
0168
0169 d2 = dist2 / pfClusParams.showerSigma2();
0170 fraction = clusterEnergy * rhENormInv * expf(-0.5f * d2);
0171
0172
0173 if (fraction > pfClusParams.minFracTot() && d2 < cutoffDistance)
0174 fraction = 1.;
0175 else
0176 fraction = -1.;
0177 fracView[rhFracOffset].frac() = fraction;
0178 }
0179 alpaka::syncBlockThreads(acc);
0180
0181 if constexpr (debug) {
0182 if (once_per_block(acc))
0183 printf("Computing cluster position for topoId %d\n", topoId);
0184 }
0185
0186 if (once_per_block(acc)) {
0187
0188 clusterPos = seedPos;
0189 clusterEnergy = seedEnergy;
0190 }
0191 alpaka::syncBlockThreads(acc);
0192
0193
0194 if (fraction > -0.5) {
0195 alpaka::atomicAdd(acc, &clusterEnergy, rhEnergy, alpaka::hierarchy::Threads{});
0196 alpaka::atomicAdd(acc, &clusterPos.x, rhPos.x * rhPosNorm, alpaka::hierarchy::Threads{});
0197 alpaka::atomicAdd(acc, &clusterPos.y, rhPos.y * rhPosNorm, alpaka::hierarchy::Threads{});
0198 alpaka::atomicAdd(acc, &clusterPos.z, rhPos.z * rhPosNorm, alpaka::hierarchy::Threads{});
0199 alpaka::atomicAdd(acc, &clusterPos.w, rhPosNorm, alpaka::hierarchy::Threads{});
0200 }
0201 alpaka::syncBlockThreads(acc);
0202
0203 if (once_per_block(acc)) {
0204
0205 if (clusterPos.w >= pfClusParams.minAllowedNormalization()) {
0206
0207 clusterPos.x /= clusterPos.w;
0208 clusterPos.y /= clusterPos.w;
0209 clusterPos.z /= clusterPos.w;
0210
0211 if constexpr (debug)
0212 printf("\tPF cluster (seed %d) energy = %f\tposition = (%f, %f, %f)\n",
0213 i,
0214 clusterEnergy,
0215 clusterPos.x,
0216 clusterPos.y,
0217 clusterPos.z);
0218 } else {
0219 if constexpr (debug)
0220 printf("\tPF cluster (seed %d) position norm (%f) less than minimum (%f)\n",
0221 i,
0222 clusterPos.w,
0223 pfClusParams.minAllowedNormalization());
0224 clusterPos.x = 0.;
0225 clusterPos.y = 0.;
0226 clusterPos.z = 0.;
0227 }
0228 float diff2 = dR2(prevClusterPos, clusterPos);
0229 if constexpr (debug)
0230 printf("\tPF cluster (seed %d) has diff2 = %f\n", i, diff2);
0231 prevClusterPos = clusterPos;
0232
0233 float tol2 = tol * tol;
0234 iter++;
0235 notDone = (diff2 > tol2) && (iter < pfClusParams.maxIterations());
0236 if constexpr (debug) {
0237 if (diff2 > tol2)
0238 printf("\tTopoId %d has diff2 = %f greater than squared tolerance %f (continuing)\n", topoId, diff2, tol2);
0239 else if constexpr (debug)
0240 printf("\tTopoId %d has diff2 = %f LESS than squared tolerance %f (terminating!)\n", topoId, diff2, tol2);
0241 }
0242 }
0243 alpaka::syncBlockThreads(acc);
0244 } while (notDone);
0245 if (once_per_block(acc)) {
0246 int rhIdx =
0247 pfClusteringVars[pfClusteringVars[topoId].topoSeedOffsets()].topoSeedList();
0248 int seedIdx = pfClusteringVars[rhIdx].rhIdxToSeedIdx();
0249 clusterView[seedIdx].energy() = clusterEnergy;
0250 clusterView[seedIdx].x() = clusterPos.x;
0251 clusterView[seedIdx].y() = clusterPos.y;
0252 clusterView[seedIdx].z() = clusterPos.z;
0253 }
0254 }
0255
0256
0257
0258 template <bool debug = false, typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0259 ALPAKA_FN_ACC static void hcalFastCluster_multiSeedParallel(
0260 const TAcc& acc,
0261 reco::PFClusterParamsDeviceCollection::ConstView pfClusParams,
0262 const reco::PFRecHitHCALTopologyDeviceCollection::ConstView topology,
0263 int topoId,
0264 int nSeeds,
0265 int nRHTopo,
0266 reco::PFRecHitDeviceCollection::ConstView pfRecHits,
0267 reco::PFClusteringVarsDeviceCollection::View pfClusteringVars,
0268 reco::PFClusterDeviceCollection::View clusterView,
0269 reco::PFRecHitFractionDeviceCollection::View fracView) {
0270 int tid = alpaka::getIdx<alpaka::Block,
0271 alpaka::Threads>(
0272 acc)[0u];
0273
0274 int& nRHNotSeed = alpaka::declareSharedVar<int, __COUNTER__>(acc);
0275 int& topoSeedBegin = alpaka::declareSharedVar<int, __COUNTER__>(acc);
0276 int& stride = alpaka::declareSharedVar<int, __COUNTER__>(acc);
0277 int& iter = alpaka::declareSharedVar<int, __COUNTER__>(acc);
0278 float& tol = alpaka::declareSharedVar<float, __COUNTER__>(acc);
0279 float& diff2 = alpaka::declareSharedVar<float, __COUNTER__>(acc);
0280 float& rhENormInv = alpaka::declareSharedVar<float, __COUNTER__>(acc);
0281 bool& notDone = alpaka::declareSharedVar<bool, __COUNTER__>(acc);
0282 auto& clusterPos = alpaka::declareSharedVar<Position4[100], __COUNTER__>(acc);
0283 auto& prevClusterPos = alpaka::declareSharedVar<Position4[100], __COUNTER__>(acc);
0284 auto& clusterEnergy = alpaka::declareSharedVar<float[100], __COUNTER__>(acc);
0285 auto& rhFracSum = alpaka::declareSharedVar<float[threadsPerBlockForClustering], __COUNTER__>(acc);
0286 auto& seeds = alpaka::declareSharedVar<int[100], __COUNTER__>(acc);
0287 auto& rechits = alpaka::declareSharedVar<int[threadsPerBlockForClustering], __COUNTER__>(acc);
0288
0289 if (once_per_block(acc)) {
0290 nRHNotSeed = nRHTopo - nSeeds + 1;
0291 topoSeedBegin = pfClusteringVars[topoId].topoSeedOffsets();
0292 tol = pfClusParams.stoppingTolerance() *
0293 powf(fmaxf(1.0, nSeeds - 1), 2.0);
0294 stride = alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc)[0u];
0295 iter = 0;
0296 notDone = true;
0297
0298 int i = pfClusteringVars[topoSeedBegin].topoSeedList();
0299
0300 if (topology.cutsFromDB()) {
0301 rhENormInv = (1.f / topology[pfRecHits[i].denseId()].noiseThreshold());
0302 } else {
0303 if (pfRecHits[i].layer() == PFLayer::HCAL_BARREL1)
0304 rhENormInv = pfClusParams.recHitEnergyNormInvHB_vec()[pfRecHits[i].depth() - 1];
0305 else if (pfRecHits[i].layer() == PFLayer::HCAL_ENDCAP)
0306 rhENormInv = pfClusParams.recHitEnergyNormInvHE_vec()[pfRecHits[i].depth() - 1];
0307 else {
0308 rhENormInv = 0.;
0309 printf("Rechit %d has invalid layer %d!\n", i, pfRecHits[i].layer());
0310 }
0311 }
0312 }
0313 alpaka::syncBlockThreads(acc);
0314
0315 if (tid < nSeeds)
0316 seeds[tid] = pfClusteringVars[topoSeedBegin + tid].topoSeedList();
0317 if (tid < nRHNotSeed - 1)
0318 rechits[tid] =
0319 fracView[pfClusteringVars[pfClusteringVars[topoSeedBegin].topoSeedList()].seedFracOffsets() + tid + 1]
0320 .pfrhIdx();
0321
0322 alpaka::syncBlockThreads(acc);
0323
0324 if constexpr (debug) {
0325 if (once_per_block(acc)) {
0326 printf("\n===========================================================================================\n");
0327 printf("Processing topo cluster %d with nSeeds = %d nRHTopo = %d and seeds (", topoId, nSeeds, nRHTopo);
0328 for (int s = 0; s < nSeeds; s++) {
0329 if (s != 0)
0330 printf(", ");
0331 printf("%d", getSeedRhIdx(seeds, s));
0332 }
0333 if (nRHTopo == nSeeds) {
0334 printf(")\n\n");
0335 } else {
0336 printf(") and other rechits (");
0337 for (int r = 1; r < nRHNotSeed; r++) {
0338 if (r != 1)
0339 printf(", ");
0340 if (r <= 0) {
0341 printf("Invalid rhNum (%d) for get RhFracIdx!\n", r);
0342 }
0343 printf("%d", rechits[r - 1]);
0344 }
0345 printf(")\n\n");
0346 }
0347 }
0348 alpaka::syncBlockThreads(acc);
0349 }
0350
0351
0352 if (tid < nSeeds) {
0353 int i = getSeedRhIdx(seeds, tid);
0354 clusterPos[tid] = Position4{pfRecHits[i].x(), pfRecHits[i].y(), pfRecHits[i].z(), 1.0};
0355 prevClusterPos[tid] = clusterPos[tid];
0356 clusterEnergy[tid] = pfRecHits[i].energy();
0357 for (int r = 0; r < (nRHNotSeed - 1); r++) {
0358 fracView[pfClusteringVars[i].seedFracOffsets() + r + 1].pfrhIdx() = rechits[r];
0359 fracView[pfClusteringVars[i].seedFracOffsets() + r + 1].frac() = -1.;
0360 }
0361 }
0362 alpaka::syncBlockThreads(acc);
0363
0364 int rhThreadIdx = -1;
0365 Position4 rhThreadPos;
0366 if (tid < (nRHNotSeed - 1)) {
0367 rhThreadIdx = rechits[tid];
0368 rhThreadPos = Position4{pfRecHits[rhThreadIdx].x(), pfRecHits[rhThreadIdx].y(), pfRecHits[rhThreadIdx].z(), 1.};
0369 }
0370
0371
0372 int seedThreadIdx = -1;
0373 Neighbours4 seedNeighbors = Neighbours4{-9, -9, -9, -9};
0374 float seedEnergy = -1.;
0375 Position4 seedInitClusterPos = Position4{0., 0., 0., 0.};
0376 if (tid < nSeeds) {
0377 if constexpr (debug)
0378 printf("tid: %d\n", tid);
0379 seedThreadIdx = getSeedRhIdx(seeds, tid);
0380 seedNeighbors = Neighbours4{pfRecHits[seedThreadIdx].neighbours()(0),
0381 pfRecHits[seedThreadIdx].neighbours()(1),
0382 pfRecHits[seedThreadIdx].neighbours()(2),
0383 pfRecHits[seedThreadIdx].neighbours()(3)};
0384 seedEnergy = pfRecHits[seedThreadIdx].energy();
0385
0386
0387 updateClusterPos(pfClusParams, seedInitClusterPos, 1., seedThreadIdx, pfRecHits, rhENormInv);
0388 }
0389
0390 do {
0391 if constexpr (debug) {
0392 if (once_per_block(acc))
0393 printf("\n--- Now on iter %d for topoId %d ---\n", iter, topoId);
0394 }
0395
0396
0397 rhFracSum[tid] = 0.;
0398 if (once_per_block(acc))
0399 diff2 = -1;
0400
0401 if (tid < (nRHNotSeed - 1)) {
0402 for (int s = 0; s < nSeeds; s++) {
0403 float dist2 = (clusterPos[s].x - rhThreadPos.x) * (clusterPos[s].x - rhThreadPos.x) +
0404 (clusterPos[s].y - rhThreadPos.y) * (clusterPos[s].y - rhThreadPos.y) +
0405 (clusterPos[s].z - rhThreadPos.z) * (clusterPos[s].z - rhThreadPos.z);
0406
0407 float d2 = dist2 / pfClusParams.showerSigma2();
0408 float fraction = clusterEnergy[s] * rhENormInv * expf(-0.5f * d2);
0409
0410 rhFracSum[tid] += fraction;
0411 }
0412 }
0413 alpaka::syncBlockThreads(acc);
0414
0415 if (tid < (nRHNotSeed - 1)) {
0416 for (int s = 0; s < nSeeds; s++) {
0417 int i = seeds[s];
0418 float dist2 = (clusterPos[s].x - rhThreadPos.x) * (clusterPos[s].x - rhThreadPos.x) +
0419 (clusterPos[s].y - rhThreadPos.y) * (clusterPos[s].y - rhThreadPos.y) +
0420 (clusterPos[s].z - rhThreadPos.z) * (clusterPos[s].z - rhThreadPos.z);
0421
0422 float d2 = dist2 / pfClusParams.showerSigma2();
0423 float fraction = clusterEnergy[s] * rhENormInv * expf(-0.5f * d2);
0424
0425 if (rhFracSum[tid] > pfClusParams.minFracTot()) {
0426 float fracpct = fraction / rhFracSum[tid];
0427 if (fracpct > cutoffFraction || (d2 < cutoffDistance && fracpct > pfClusParams.minFracToKeep())) {
0428 fracView[pfClusteringVars[i].seedFracOffsets() + tid + 1].frac() = fracpct;
0429 } else {
0430 fracView[pfClusteringVars[i].seedFracOffsets() + tid + 1].frac() = -1;
0431 }
0432 } else {
0433 fracView[pfClusteringVars[i].seedFracOffsets() + tid + 1].frac() = -1;
0434 }
0435 }
0436 }
0437 alpaka::syncBlockThreads(acc);
0438
0439 if constexpr (debug) {
0440 if (once_per_block(acc))
0441 printf("Computing cluster position for topoId %d\n", topoId);
0442 }
0443
0444
0445 if (tid < nSeeds) {
0446 clusterPos[tid] = seedInitClusterPos;
0447 clusterEnergy[tid] = seedEnergy;
0448 if constexpr (debug) {
0449 printf("Cluster %d (seed %d) has energy %f\tpos = (%f, %f, %f, %f)\n",
0450 tid,
0451 seeds[tid],
0452 clusterEnergy[tid],
0453 clusterPos[tid].x,
0454 clusterPos[tid].y,
0455 clusterPos[tid].z,
0456 clusterPos[tid].w);
0457 }
0458 }
0459 alpaka::syncBlockThreads(acc);
0460
0461
0462 if (tid < nSeeds) {
0463 for (int r = 0; r < nRHNotSeed - 1; r++) {
0464 int j = rechits[r];
0465 float frac = getRhFrac(pfClusteringVars, topoSeedBegin, fracView, tid, r + 1);
0466
0467 if (frac > -0.5) {
0468 clusterEnergy[tid] += frac * pfRecHits[j].energy();
0469
0470 if (nSeeds == 1 || j == seedNeighbors.x || j == seedNeighbors.y || j == seedNeighbors.z ||
0471 j == seedNeighbors.w)
0472 updateClusterPos(pfClusParams, clusterPos[tid], frac, j, pfRecHits, rhENormInv);
0473 }
0474 }
0475 }
0476 alpaka::syncBlockThreads(acc);
0477
0478
0479 if (tid < nSeeds) {
0480 if (clusterPos[tid].w >= pfClusParams.minAllowedNormalization()) {
0481
0482 clusterPos[tid].x /= clusterPos[tid].w;
0483 clusterPos[tid].y /= clusterPos[tid].w;
0484 clusterPos[tid].z /= clusterPos[tid].w;
0485
0486 if constexpr (debug)
0487 printf("\tCluster %d (seed %d) energy = %f\tposition = (%f, %f, %f)\n",
0488 tid,
0489 seedThreadIdx,
0490 clusterEnergy[tid],
0491 clusterPos[tid].x,
0492 clusterPos[tid].y,
0493 clusterPos[tid].z);
0494 } else {
0495 if constexpr (debug)
0496 printf("\tCluster %d (seed %d) position norm (%f) less than minimum (%f)\n",
0497 tid,
0498 seedThreadIdx,
0499 clusterPos[tid].w,
0500 pfClusParams.minAllowedNormalization());
0501 clusterPos[tid].x = 0.0;
0502 clusterPos[tid].y = 0.0;
0503 clusterPos[tid].z = 0.0;
0504 }
0505 }
0506 alpaka::syncBlockThreads(acc);
0507
0508 if (tid < nSeeds) {
0509 float delta2 = dR2(prevClusterPos[tid], clusterPos[tid]);
0510 if constexpr (debug)
0511 printf("\tCluster %d (seed %d) has delta2 = %f\n", tid, seeds[tid], delta2);
0512 atomicMaxF(acc, &diff2, delta2);
0513 prevClusterPos[tid] = clusterPos[tid];
0514 }
0515 alpaka::syncBlockThreads(acc);
0516
0517 if (once_per_block(acc)) {
0518 float tol2 = tol * tol;
0519 iter++;
0520 notDone = (diff2 > tol2) && ((unsigned int)iter < pfClusParams.maxIterations());
0521 if constexpr (debug) {
0522 if (diff2 > tol2)
0523 printf("\tTopoId %d has diff2 = %f greater than squared tolerance %f (continuing)\n", topoId, diff2, tol2);
0524 else if constexpr (debug)
0525 printf("\tTopoId %d has diff2 = %f LESS than squared tolerance %f (terminating!)\n", topoId, diff2, tol2);
0526 }
0527 }
0528 alpaka::syncBlockThreads(acc);
0529 } while (notDone);
0530 if (once_per_block(acc))
0531
0532 if (tid < nSeeds) {
0533 int rhIdx = pfClusteringVars[tid + pfClusteringVars[topoId].topoSeedOffsets()].topoSeedList();
0534 int seedIdx = pfClusteringVars[rhIdx].rhIdxToSeedIdx();
0535 clusterView[seedIdx].energy() = clusterEnergy[tid];
0536 clusterView[seedIdx].x() = clusterPos[tid].x;
0537 clusterView[seedIdx].y() = clusterPos[tid].y;
0538 clusterView[seedIdx].z() = clusterPos[tid].z;
0539 }
0540 }
0541
0542
0543
0544
0545 template <bool debug = false, typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0546 ALPAKA_FN_ACC static void hcalFastCluster_exotic(const TAcc& acc,
0547 reco::PFClusterParamsDeviceCollection::ConstView pfClusParams,
0548 const reco::PFRecHitHCALTopologyDeviceCollection::ConstView topology,
0549 int topoId,
0550 int nSeeds,
0551 int nRHTopo,
0552 reco::PFRecHitDeviceCollection::ConstView pfRecHits,
0553 reco::PFClusteringVarsDeviceCollection::View pfClusteringVars,
0554 reco::PFClusterDeviceCollection::View clusterView,
0555 reco::PFRecHitFractionDeviceCollection::View fracView,
0556 Position4* __restrict__ globalClusterPos,
0557 Position4* __restrict__ globalPrevClusterPos,
0558 float* __restrict__ globalClusterEnergy,
0559 float* __restrict__ globalRhFracSum,
0560 int* __restrict__ globalSeeds,
0561 int* __restrict__ globalRechits) {
0562 int& nRHNotSeed = alpaka::declareSharedVar<int, __COUNTER__>(acc);
0563 int& blockIdx = alpaka::declareSharedVar<int, __COUNTER__>(acc);
0564 int& topoSeedBegin = alpaka::declareSharedVar<int, __COUNTER__>(acc);
0565 int& stride = alpaka::declareSharedVar<int, __COUNTER__>(acc);
0566 int& iter = alpaka::declareSharedVar<int, __COUNTER__>(acc);
0567 float& tol = alpaka::declareSharedVar<float, __COUNTER__>(acc);
0568 float& diff2 = alpaka::declareSharedVar<float, __COUNTER__>(acc);
0569 float& rhENormInv = alpaka::declareSharedVar<float, __COUNTER__>(acc);
0570 bool& notDone = alpaka::declareSharedVar<bool, __COUNTER__>(acc);
0571
0572 blockIdx = maxTopoInput * alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[0u];
0573 Position4* clusterPos = globalClusterPos + blockIdx;
0574 Position4* prevClusterPos = globalPrevClusterPos + blockIdx;
0575 float* clusterEnergy = globalClusterEnergy + blockIdx;
0576 float* rhFracSum = globalRhFracSum + blockIdx;
0577 int* seeds = globalSeeds + blockIdx;
0578 int* rechits = globalRechits + blockIdx;
0579
0580 if (once_per_block(acc)) {
0581 nRHNotSeed = nRHTopo - nSeeds + 1;
0582 topoSeedBegin = pfClusteringVars[topoId].topoSeedOffsets();
0583 tol = pfClusParams.stoppingTolerance() *
0584 powf(fmaxf(1.0, nSeeds - 1), 2.0);
0585 stride = alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc)[0u];
0586 iter = 0;
0587 notDone = true;
0588
0589 int i = pfClusteringVars[topoSeedBegin].topoSeedList();
0590
0591 if (topology.cutsFromDB()) {
0592 rhENormInv = (1.f / topology[pfRecHits[i].denseId()].noiseThreshold());
0593 } else {
0594 if (pfRecHits[i].layer() == PFLayer::HCAL_BARREL1)
0595 rhENormInv = pfClusParams.recHitEnergyNormInvHB_vec()[pfRecHits[i].depth() - 1];
0596 else if (pfRecHits[i].layer() == PFLayer::HCAL_ENDCAP)
0597 rhENormInv = pfClusParams.recHitEnergyNormInvHE_vec()[pfRecHits[i].depth() - 1];
0598 else {
0599 rhENormInv = 0.;
0600 printf("Rechit %d has invalid layer %d!\n", i, pfRecHits[i].layer());
0601 }
0602 }
0603 }
0604 alpaka::syncBlockThreads(acc);
0605
0606 for (int n = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]; n < nRHTopo; n += stride) {
0607 if (n < nSeeds)
0608 seeds[n] = pfClusteringVars[topoSeedBegin + n].topoSeedList();
0609 if (n < nRHNotSeed - 1)
0610 rechits[n] =
0611 fracView[pfClusteringVars[pfClusteringVars[topoSeedBegin].topoSeedList()].seedFracOffsets() + n + 1]
0612 .pfrhIdx();
0613 }
0614 alpaka::syncBlockThreads(acc);
0615
0616 if constexpr (debug) {
0617 if (once_per_block(acc)) {
0618 printf("\n===========================================================================================\n");
0619 printf("Processing topo cluster %d with nSeeds = %d nRHTopo = %d and seeds (", topoId, nSeeds, nRHTopo);
0620 for (int s = 0; s < nSeeds; s++) {
0621 if (s != 0)
0622 printf(", ");
0623 printf("%d", getSeedRhIdx(seeds, s));
0624 }
0625 if (nRHTopo == nSeeds) {
0626 printf(")\n\n");
0627 } else {
0628 printf(") and other rechits (");
0629 for (int r = 1; r < nRHNotSeed; r++) {
0630 if (r != 1)
0631 printf(", ");
0632 if (r <= 0) {
0633 printf("Invalid rhNum (%d) for get RhFracIdx!\n", r);
0634 }
0635 printf("%d", rechits[r - 1]);
0636 }
0637 printf(")\n\n");
0638 }
0639 }
0640 alpaka::syncBlockThreads(acc);
0641 }
0642
0643
0644 for (int s = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]; s < nSeeds; s += stride) {
0645 int i = seeds[s];
0646 clusterPos[s] = Position4{pfRecHits[i].x(), pfRecHits[i].y(), pfRecHits[i].z(), 1.0};
0647 prevClusterPos[s] = clusterPos[s];
0648 clusterEnergy[s] = pfRecHits[i].energy();
0649 for (int r = 0; r < (nRHNotSeed - 1); r++) {
0650 fracView[pfClusteringVars[i].seedFracOffsets() + r + 1].pfrhIdx() = rechits[r];
0651 fracView[pfClusteringVars[i].seedFracOffsets() + r + 1].frac() = -1.;
0652 }
0653 }
0654 alpaka::syncBlockThreads(acc);
0655
0656 do {
0657 if constexpr (debug) {
0658 if (once_per_block(acc))
0659 printf("\n--- Now on iter %d for topoId %d ---\n", iter, topoId);
0660 }
0661
0662 if (once_per_block(acc))
0663 diff2 = -1;
0664
0665 for (int tid = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]; tid < nRHNotSeed - 1; tid += stride) {
0666 rhFracSum[tid] = 0.;
0667 int rhThreadIdx = rechits[tid];
0668 Position4 rhThreadPos =
0669 Position4{pfRecHits[rhThreadIdx].x(), pfRecHits[rhThreadIdx].y(), pfRecHits[rhThreadIdx].z(), 1.};
0670 for (int s = 0; s < nSeeds; s++) {
0671 float dist2 = (clusterPos[s].x - rhThreadPos.x) * (clusterPos[s].x - rhThreadPos.x) +
0672 (clusterPos[s].y - rhThreadPos.y) * (clusterPos[s].y - rhThreadPos.y) +
0673 (clusterPos[s].z - rhThreadPos.z) * (clusterPos[s].z - rhThreadPos.z);
0674
0675 float d2 = dist2 / pfClusParams.showerSigma2();
0676 float fraction = clusterEnergy[s] * rhENormInv * expf(-0.5f * d2);
0677
0678 rhFracSum[tid] += fraction;
0679 }
0680 }
0681 alpaka::syncBlockThreads(acc);
0682
0683 for (int tid = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]; tid < nRHNotSeed - 1; tid += stride) {
0684 int rhThreadIdx = rechits[tid];
0685 Position4 rhThreadPos =
0686 Position4{pfRecHits[rhThreadIdx].x(), pfRecHits[rhThreadIdx].y(), pfRecHits[rhThreadIdx].z(), 1.};
0687 for (int s = 0; s < nSeeds; s++) {
0688 int i = seeds[s];
0689 float dist2 = (clusterPos[s].x - rhThreadPos.x) * (clusterPos[s].x - rhThreadPos.x) +
0690 (clusterPos[s].y - rhThreadPos.y) * (clusterPos[s].y - rhThreadPos.y) +
0691 (clusterPos[s].z - rhThreadPos.z) * (clusterPos[s].z - rhThreadPos.z);
0692
0693 float d2 = dist2 / pfClusParams.showerSigma2();
0694 float fraction = clusterEnergy[s] * rhENormInv * expf(-0.5f * d2);
0695
0696 if (rhFracSum[tid] > pfClusParams.minFracTot()) {
0697 float fracpct = fraction / rhFracSum[tid];
0698 if (fracpct > cutoffFraction || (d2 < cutoffDistance && fracpct > pfClusParams.minFracToKeep())) {
0699 fracView[pfClusteringVars[i].seedFracOffsets() + tid + 1].frac() = fracpct;
0700 } else {
0701 fracView[pfClusteringVars[i].seedFracOffsets() + tid + 1].frac() = -1;
0702 }
0703 } else {
0704 fracView[pfClusteringVars[i].seedFracOffsets() + tid + 1].frac() = -1;
0705 }
0706 }
0707 }
0708 alpaka::syncBlockThreads(acc);
0709
0710 if constexpr (debug) {
0711 if (once_per_block(acc))
0712 printf("Computing cluster position for topoId %d\n", topoId);
0713 }
0714
0715
0716 for (int s = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]; s < nSeeds; s += stride) {
0717 int seedRhIdx = getSeedRhIdx(seeds, s);
0718 float norm = logf(pfRecHits[seedRhIdx].energy() * rhENormInv);
0719 clusterPos[s] = Position4{
0720 pfRecHits[seedRhIdx].x() * norm, pfRecHits[seedRhIdx].y() * norm, pfRecHits[seedRhIdx].z() * norm, norm};
0721 clusterEnergy[s] = pfRecHits[seedRhIdx].energy();
0722 if constexpr (debug) {
0723 printf("Cluster %d (seed %d) has energy %f\tpos = (%f, %f, %f, %f)\n",
0724 s,
0725 seeds[s],
0726 clusterEnergy[s],
0727 clusterPos[s].x,
0728 clusterPos[s].y,
0729 clusterPos[s].z,
0730 clusterPos[s].w);
0731 }
0732 }
0733 alpaka::syncBlockThreads(acc);
0734
0735
0736 for (int s = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]; s < nSeeds; s += stride) {
0737 int seedRhIdx = getSeedRhIdx(seeds, s);
0738 for (int r = 0; r < nRHNotSeed - 1; r++) {
0739 int j = rechits[r];
0740 float frac = getRhFrac(pfClusteringVars, topoSeedBegin, fracView, s, r + 1);
0741
0742 if (frac > -0.5) {
0743 clusterEnergy[s] += frac * pfRecHits[j].energy();
0744
0745 if (nSeeds == 1 || j == pfRecHits[seedRhIdx].neighbours()(0) || j == pfRecHits[seedRhIdx].neighbours()(1) ||
0746 j == pfRecHits[seedRhIdx].neighbours()(2) || j == pfRecHits[seedRhIdx].neighbours()(3))
0747 updateClusterPos(pfClusParams, clusterPos[s], frac, j, pfRecHits, rhENormInv);
0748 }
0749 }
0750 }
0751 alpaka::syncBlockThreads(acc);
0752
0753
0754 for (int s = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]; s < nSeeds; s += stride) {
0755 if (clusterPos[s].w >= pfClusParams.minAllowedNormalization()) {
0756
0757 clusterPos[s].x /= clusterPos[s].w;
0758 clusterPos[s].y /= clusterPos[s].w;
0759 clusterPos[s].z /= clusterPos[s].w;
0760
0761 if constexpr (debug)
0762 printf("\tCluster %d (seed %d) energy = %f\tposition = (%f, %f, %f)\n",
0763 s,
0764 seeds[s],
0765 clusterEnergy[s],
0766 clusterPos[s].x,
0767 clusterPos[s].y,
0768 clusterPos[s].z);
0769 } else {
0770 if constexpr (debug)
0771 printf("\tCluster %d (seed %d) position norm (%f) less than minimum (%f)\n",
0772 s,
0773 seeds[s],
0774 clusterPos[s].w,
0775 pfClusParams.minAllowedNormalization());
0776 clusterPos[s].x = 0.0;
0777 clusterPos[s].y = 0.0;
0778 clusterPos[s].z = 0.0;
0779 }
0780 }
0781 alpaka::syncBlockThreads(acc);
0782
0783 for (int s = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]; s < nSeeds; s += stride) {
0784 float delta2 = dR2(prevClusterPos[s], clusterPos[s]);
0785 if constexpr (debug)
0786 printf("\tCluster %d (seed %d) has delta2 = %f\n", s, seeds[s], delta2);
0787 atomicMaxF(acc, &diff2, delta2);
0788 prevClusterPos[s] = clusterPos[s];
0789 }
0790 alpaka::syncBlockThreads(acc);
0791
0792 if (once_per_block(acc)) {
0793 float tol2 = tol * tol;
0794 iter++;
0795 notDone = (diff2 > tol2) && ((unsigned int)iter < pfClusParams.maxIterations());
0796 if constexpr (debug) {
0797 if (diff2 > tol2)
0798 printf("\tTopoId %d has diff2 = %f greater than squared tolerance %f (continuing)\n", topoId, diff2, tol2);
0799 else if constexpr (debug)
0800 printf("\tTopoId %d has diff2 = %f LESS than squared tolerance %f (terminating!)\n", topoId, diff2, tol2);
0801 }
0802 }
0803 alpaka::syncBlockThreads(acc);
0804 } while (notDone);
0805 if (once_per_block(acc))
0806 for (int s = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]; s < nSeeds; s += stride) {
0807 int rhIdx = pfClusteringVars[s + pfClusteringVars[topoId].topoSeedOffsets()].topoSeedList();
0808 int seedIdx = pfClusteringVars[rhIdx].rhIdxToSeedIdx();
0809 clusterView[seedIdx].energy() = pfRecHits[s].energy();
0810 clusterView[seedIdx].x() = pfRecHits[s].x();
0811 clusterView[seedIdx].y() = pfRecHits[s].y();
0812 clusterView[seedIdx].z() = pfRecHits[s].z();
0813 }
0814 alpaka::syncBlockThreads(acc);
0815 }
0816
0817
0818
0819 template <bool debug = false, typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0820 ALPAKA_FN_ACC static void hcalFastCluster_multiSeedIterative(
0821 const TAcc& acc,
0822 reco::PFClusterParamsDeviceCollection::ConstView pfClusParams,
0823 const reco::PFRecHitHCALTopologyDeviceCollection::ConstView topology,
0824 int topoId,
0825 int nSeeds,
0826 int nRHTopo,
0827 reco::PFRecHitDeviceCollection::ConstView pfRecHits,
0828 reco::PFClusteringVarsDeviceCollection::View pfClusteringVars,
0829 reco::PFClusterDeviceCollection::View clusterView,
0830 reco::PFRecHitFractionDeviceCollection::View fracView) {
0831 int& nRHNotSeed = alpaka::declareSharedVar<int, __COUNTER__>(acc);
0832 int& topoSeedBegin = alpaka::declareSharedVar<int, __COUNTER__>(acc);
0833 int& stride = alpaka::declareSharedVar<int, __COUNTER__>(acc);
0834 int& iter = alpaka::declareSharedVar<int, __COUNTER__>(acc);
0835 float& tol = alpaka::declareSharedVar<float, __COUNTER__>(acc);
0836 float& diff2 = alpaka::declareSharedVar<float, __COUNTER__>(acc);
0837 float& rhENormInv = alpaka::declareSharedVar<float, __COUNTER__>(acc);
0838 bool& notDone = alpaka::declareSharedVar<bool, __COUNTER__>(acc);
0839
0840 auto& clusterPos = alpaka::declareSharedVar<Position4[400], __COUNTER__>(acc);
0841 auto& prevClusterPos = alpaka::declareSharedVar<Position4[400], __COUNTER__>(acc);
0842 auto& clusterEnergy = alpaka::declareSharedVar<float[400], __COUNTER__>(acc);
0843 auto& rhFracSum = alpaka::declareSharedVar<float[1500], __COUNTER__>(acc);
0844 auto& seeds = alpaka::declareSharedVar<int[400], __COUNTER__>(acc);
0845 auto& rechits = alpaka::declareSharedVar<int[1500], __COUNTER__>(acc);
0846
0847 if (once_per_block(acc)) {
0848 nRHNotSeed = nRHTopo - nSeeds + 1;
0849 topoSeedBegin = pfClusteringVars[topoId].topoSeedOffsets();
0850 tol = pfClusParams.stoppingTolerance() *
0851 powf(fmaxf(1.0, nSeeds - 1), 2.0);
0852 stride = alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc)[0u];
0853 iter = 0;
0854 notDone = true;
0855
0856 int i = pfClusteringVars[topoSeedBegin].topoSeedList();
0857
0858 if (topology.cutsFromDB()) {
0859 rhENormInv = (1.f / topology[pfRecHits[i].denseId()].noiseThreshold());
0860 } else {
0861 if (pfRecHits[i].layer() == PFLayer::HCAL_BARREL1)
0862 rhENormInv = pfClusParams.recHitEnergyNormInvHB_vec()[pfRecHits[i].depth() - 1];
0863 else if (pfRecHits[i].layer() == PFLayer::HCAL_ENDCAP)
0864 rhENormInv = pfClusParams.recHitEnergyNormInvHE_vec()[pfRecHits[i].depth() - 1];
0865 else {
0866 rhENormInv = 0.;
0867 printf("Rechit %d has invalid layer %d!\n", i, pfRecHits[i].layer());
0868 }
0869 }
0870 }
0871 alpaka::syncBlockThreads(acc);
0872
0873 for (int n = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]; n < nRHTopo; n += stride) {
0874 if (n < nSeeds)
0875 seeds[n] = pfClusteringVars[topoSeedBegin + n].topoSeedList();
0876 if (n < nRHNotSeed - 1)
0877 rechits[n] =
0878 fracView[pfClusteringVars[pfClusteringVars[topoSeedBegin].topoSeedList()].seedFracOffsets() + n + 1]
0879 .pfrhIdx();
0880 }
0881 alpaka::syncBlockThreads(acc);
0882
0883 if constexpr (debug) {
0884 if (once_per_block(acc)) {
0885 printf("\n===========================================================================================\n");
0886 printf("Processing topo cluster %d with nSeeds = %d nRHTopo = %d and seeds (", topoId, nSeeds, nRHTopo);
0887 for (int s = 0; s < nSeeds; s++) {
0888 if (s != 0)
0889 printf(", ");
0890 printf("%d", getSeedRhIdx(seeds, s));
0891 }
0892 if (nRHTopo == nSeeds) {
0893 printf(")\n\n");
0894 } else {
0895 printf(") and other rechits (");
0896 for (int r = 1; r < nRHNotSeed; r++) {
0897 if (r != 1)
0898 printf(", ");
0899 if (r <= 0) {
0900 printf("Invalid rhNum (%d) for get RhFracIdx!\n", r);
0901 }
0902 printf("%d", rechits[r - 1]);
0903 }
0904 printf(")\n\n");
0905 }
0906 }
0907 alpaka::syncBlockThreads(acc);
0908 }
0909
0910
0911 for (int s = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]; s < nSeeds; s += stride) {
0912 int i = seeds[s];
0913 clusterPos[s] = Position4{pfRecHits[i].x(), pfRecHits[i].y(), pfRecHits[i].z(), 1.0};
0914 prevClusterPos[s] = clusterPos[s];
0915 clusterEnergy[s] = pfRecHits[i].energy();
0916 for (int r = 0; r < (nRHNotSeed - 1); r++) {
0917 fracView[pfClusteringVars[i].seedFracOffsets() + r + 1].pfrhIdx() = rechits[r];
0918 fracView[pfClusteringVars[i].seedFracOffsets() + r + 1].frac() = -1.;
0919 }
0920 }
0921 alpaka::syncBlockThreads(acc);
0922
0923 do {
0924 if constexpr (debug) {
0925 if (once_per_block(acc))
0926 printf("\n--- Now on iter %d for topoId %d ---\n", iter, topoId);
0927 }
0928
0929 if (once_per_block(acc))
0930 diff2 = -1;
0931
0932 for (int tid = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]; tid < nRHNotSeed - 1; tid += stride) {
0933 rhFracSum[tid] = 0.;
0934 int rhThreadIdx = rechits[tid];
0935 Position4 rhThreadPos =
0936 Position4{pfRecHits[rhThreadIdx].x(), pfRecHits[rhThreadIdx].y(), pfRecHits[rhThreadIdx].z(), 1.};
0937 for (int s = 0; s < nSeeds; s++) {
0938 float dist2 = (clusterPos[s].x - rhThreadPos.x) * (clusterPos[s].x - rhThreadPos.x) +
0939 (clusterPos[s].y - rhThreadPos.y) * (clusterPos[s].y - rhThreadPos.y) +
0940 (clusterPos[s].z - rhThreadPos.z) * (clusterPos[s].z - rhThreadPos.z);
0941
0942 float d2 = dist2 / pfClusParams.showerSigma2();
0943 float fraction = clusterEnergy[s] * rhENormInv * expf(-0.5f * d2);
0944
0945 rhFracSum[tid] += fraction;
0946 }
0947 }
0948 alpaka::syncBlockThreads(acc);
0949
0950 for (int tid = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]; tid < nRHNotSeed - 1; tid += stride) {
0951 int rhThreadIdx = rechits[tid];
0952 Position4 rhThreadPos =
0953 Position4{pfRecHits[rhThreadIdx].x(), pfRecHits[rhThreadIdx].y(), pfRecHits[rhThreadIdx].z(), 1.};
0954 for (int s = 0; s < nSeeds; s++) {
0955 int i = seeds[s];
0956 float dist2 = (clusterPos[s].x - rhThreadPos.x) * (clusterPos[s].x - rhThreadPos.x) +
0957 (clusterPos[s].y - rhThreadPos.y) * (clusterPos[s].y - rhThreadPos.y) +
0958 (clusterPos[s].z - rhThreadPos.z) * (clusterPos[s].z - rhThreadPos.z);
0959
0960 float d2 = dist2 / pfClusParams.showerSigma2();
0961 float fraction = clusterEnergy[s] * rhENormInv * expf(-0.5f * d2);
0962
0963 if (rhFracSum[tid] > pfClusParams.minFracTot()) {
0964 float fracpct = fraction / rhFracSum[tid];
0965 if (fracpct > cutoffFraction || (d2 < cutoffDistance && fracpct > pfClusParams.minFracToKeep())) {
0966 fracView[pfClusteringVars[i].seedFracOffsets() + tid + 1].frac() = fracpct;
0967 } else {
0968 fracView[pfClusteringVars[i].seedFracOffsets() + tid + 1].frac() = -1;
0969 }
0970 } else {
0971 fracView[pfClusteringVars[i].seedFracOffsets() + tid + 1].frac() = -1;
0972 }
0973 }
0974 }
0975 alpaka::syncBlockThreads(acc);
0976
0977 if constexpr (debug) {
0978 if (once_per_block(acc))
0979 printf("Computing cluster position for topoId %d\n", topoId);
0980 }
0981
0982
0983 for (int s = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]; s < nSeeds; s += stride) {
0984 int seedRhIdx = getSeedRhIdx(seeds, s);
0985 float norm = logf(pfRecHits[seedRhIdx].energy() * rhENormInv);
0986 clusterPos[s] = Position4{
0987 pfRecHits[seedRhIdx].x() * norm, pfRecHits[seedRhIdx].y() * norm, pfRecHits[seedRhIdx].z() * norm, norm};
0988 clusterEnergy[s] = pfRecHits[seedRhIdx].energy();
0989 if constexpr (debug) {
0990 printf("Cluster %d (seed %d) has energy %f\tpos = (%f, %f, %f, %f)\n",
0991 s,
0992 seeds[s],
0993 clusterEnergy[s],
0994 clusterPos[s].x,
0995 clusterPos[s].y,
0996 clusterPos[s].z,
0997 clusterPos[s].w);
0998 }
0999 }
1000 alpaka::syncBlockThreads(acc);
1001
1002
1003 for (int s = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]; s < nSeeds; s += stride) {
1004 int seedRhIdx = getSeedRhIdx(seeds, s);
1005 for (int r = 0; r < nRHNotSeed - 1; r++) {
1006 int j = rechits[r];
1007 float frac = getRhFrac(pfClusteringVars, topoSeedBegin, fracView, s, r + 1);
1008
1009 if (frac > -0.5) {
1010 clusterEnergy[s] += frac * pfRecHits[j].energy();
1011
1012 if (nSeeds == 1 || j == pfRecHits[seedRhIdx].neighbours()(0) || j == pfRecHits[seedRhIdx].neighbours()(1) ||
1013 j == pfRecHits[seedRhIdx].neighbours()(2) || j == pfRecHits[seedRhIdx].neighbours()(3))
1014 updateClusterPos(pfClusParams, clusterPos[s], frac, j, pfRecHits, rhENormInv);
1015 }
1016 }
1017 }
1018 alpaka::syncBlockThreads(acc);
1019
1020
1021 for (int s = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]; s < nSeeds; s += stride) {
1022 if (clusterPos[s].w >= pfClusParams.minAllowedNormalization()) {
1023
1024 clusterPos[s].x /= clusterPos[s].w;
1025 clusterPos[s].y /= clusterPos[s].w;
1026 clusterPos[s].z /= clusterPos[s].w;
1027
1028 if constexpr (debug)
1029 printf("\tCluster %d (seed %d) energy = %f\tposition = (%f, %f, %f)\n",
1030 s,
1031 seeds[s],
1032 clusterEnergy[s],
1033 clusterPos[s].x,
1034 clusterPos[s].y,
1035 clusterPos[s].z);
1036 } else {
1037 if constexpr (debug)
1038 printf("\tCluster %d (seed %d) position norm (%f) less than minimum (%f)\n",
1039 s,
1040 seeds[s],
1041 clusterPos[s].w,
1042 pfClusParams.minAllowedNormalization());
1043 clusterPos[s].x = 0.0;
1044 clusterPos[s].y = 0.0;
1045 clusterPos[s].z = 0.0;
1046 }
1047 }
1048 alpaka::syncBlockThreads(acc);
1049
1050 for (int s = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]; s < nSeeds; s += stride) {
1051 float delta2 = dR2(prevClusterPos[s], clusterPos[s]);
1052 if constexpr (debug)
1053 printf("\tCluster %d (seed %d) has delta2 = %f\n", s, seeds[s], delta2);
1054 atomicMaxF(acc, &diff2, delta2);
1055 prevClusterPos[s] = clusterPos[s];
1056 }
1057 alpaka::syncBlockThreads(acc);
1058
1059 if (once_per_block(acc)) {
1060 float tol2 = tol * tol;
1061 iter++;
1062 notDone = (diff2 > tol2) && ((unsigned int)iter < pfClusParams.maxIterations());
1063 if constexpr (debug) {
1064 if (diff2 > tol2)
1065 printf("\tTopoId %d has diff2 = %f greater than tolerance %f (continuing)\n", topoId, diff2, tol2);
1066 else if constexpr (debug)
1067 printf("\tTopoId %d has diff2 = %f LESS than tolerance %f (terminating!)\n", topoId, diff2, tol2);
1068 }
1069 }
1070 alpaka::syncBlockThreads(acc);
1071 } while (notDone);
1072 if (once_per_block(acc))
1073 for (int s = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]; s < nSeeds; s += stride) {
1074 int rhIdx = pfClusteringVars[s + pfClusteringVars[topoId].topoSeedOffsets()].topoSeedList();
1075 int seedIdx = pfClusteringVars[rhIdx].rhIdxToSeedIdx();
1076 clusterView[seedIdx].energy() = pfRecHits[s].energy();
1077 clusterView[seedIdx].x() = pfRecHits[s].x();
1078 clusterView[seedIdx].y() = pfRecHits[s].y();
1079 clusterView[seedIdx].z() = pfRecHits[s].z();
1080 }
1081 }
1082
1083
1084 class SeedingTopoThresh {
1085 public:
1086 template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
1087 ALPAKA_FN_ACC void operator()(const TAcc& acc,
1088 reco::PFClusteringVarsDeviceCollection::View pfClusteringVars,
1089 const reco::PFClusterParamsDeviceCollection::ConstView pfClusParams,
1090 const reco::PFRecHitHCALTopologyDeviceCollection::ConstView topology,
1091 const reco::PFRecHitHostCollection::ConstView pfRecHits,
1092 reco::PFClusterDeviceCollection::View clusterView,
1093 uint32_t* __restrict__ nSeeds) const {
1094 const int nRH = pfRecHits.size();
1095
1096 if (once_per_grid(acc)) {
1097 clusterView.size() = nRH;
1098 }
1099
1100 for (auto i : uniform_elements(acc, nRH)) {
1101
1102 pfClusteringVars[i].pfrh_isSeed() = 0;
1103 pfClusteringVars[i].rhCount() = 0;
1104 pfClusteringVars[i].topoSeedCount() = 0;
1105 pfClusteringVars[i].topoRHCount() = 0;
1106 pfClusteringVars[i].seedFracOffsets() = -1;
1107 pfClusteringVars[i].topoSeedOffsets() = -1;
1108 pfClusteringVars[i].topoSeedList() = -1;
1109 clusterView[i].seedRHIdx() = -1;
1110
1111 int layer = pfRecHits[i].layer();
1112 int depthOffset = pfRecHits[i].depth() - 1;
1113 float energy = pfRecHits[i].energy();
1114 Position3 pos = Position3{pfRecHits[i].x(), pfRecHits[i].y(), pfRecHits[i].z()};
1115 float seedThreshold = 9999.;
1116 float topoThreshold = 9999.;
1117
1118 if (topology.cutsFromDB()) {
1119 seedThreshold = topology[pfRecHits[i].denseId()].seedThreshold();
1120 topoThreshold = topology[pfRecHits[i].denseId()].noiseThreshold();
1121 } else {
1122 if (layer == PFLayer::HCAL_BARREL1) {
1123 seedThreshold = pfClusParams.seedEThresholdHB_vec()[depthOffset];
1124 topoThreshold = pfClusParams.topoEThresholdHB_vec()[depthOffset];
1125 } else if (layer == PFLayer::HCAL_ENDCAP) {
1126 seedThreshold = pfClusParams.seedEThresholdHE_vec()[depthOffset];
1127 topoThreshold = pfClusParams.topoEThresholdHE_vec()[depthOffset];
1128 }
1129 }
1130
1131
1132 float pt2 = energy * energy * (pos.x * pos.x + pos.y * pos.y) / (pos.x * pos.x + pos.y * pos.y + pos.z * pos.z);
1133
1134
1135 if ((layer == PFLayer::HCAL_BARREL1 && energy > seedThreshold && pt2 > pfClusParams.seedPt2ThresholdHB()) ||
1136 (layer == PFLayer::HCAL_ENDCAP && energy > seedThreshold && pt2 > pfClusParams.seedPt2ThresholdHE())) {
1137 pfClusteringVars[i].pfrh_isSeed() = 1;
1138 for (int k = 0; k < 4; k++) {
1139 if (pfRecHits[i].neighbours()(k) < 0)
1140 continue;
1141 if (energy < pfRecHits[pfRecHits[i].neighbours()(k)].energy()) {
1142 pfClusteringVars[i].pfrh_isSeed() = 0;
1143 break;
1144 }
1145 }
1146 if (pfClusteringVars[i].pfrh_isSeed())
1147 alpaka::atomicAdd(acc, nSeeds, 1u);
1148 }
1149
1150
1151 if ((layer == PFLayer::HCAL_ENDCAP && energy > topoThreshold) ||
1152 (layer == PFLayer::HCAL_BARREL1 && energy > topoThreshold)) {
1153 pfClusteringVars[i].pfrh_passTopoThresh() = true;
1154 pfClusteringVars[i].pfrh_topoId() = i;
1155 } else {
1156 pfClusteringVars[i].pfrh_passTopoThresh() = false;
1157 pfClusteringVars[i].pfrh_topoId() = -1;
1158 }
1159 }
1160 }
1161 };
1162
1163
1164 class PrepareTopoInputs {
1165 public:
1166 template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
1167 ALPAKA_FN_ACC void operator()(const TAcc& acc,
1168 const reco::PFRecHitHostCollection::ConstView pfRecHits,
1169 reco::PFClusteringVarsDeviceCollection::View pfClusteringVars,
1170 reco::PFClusteringEdgeVarsDeviceCollection::View pfClusteringEdgeVars,
1171 uint32_t* __restrict__ nSeeds) const {
1172 const int nRH = pfRecHits.size();
1173
1174 if (once_per_grid(acc)) {
1175 pfClusteringVars.nEdges() = nRH * 8;
1176 pfClusteringEdgeVars[nRH].pfrh_edgeIdx() = nRH * 8;
1177 }
1178 for (uint32_t i : cms::alpakatools::uniform_elements(acc, nRH)) {
1179 pfClusteringEdgeVars[i].pfrh_edgeIdx() = i * 8;
1180 pfClusteringVars[i].pfrh_topoId() = 0;
1181 for (int j = 0; j < 8; j++) {
1182 if (pfRecHits[i].neighbours()(j) == -1)
1183 pfClusteringEdgeVars[i * 8 + j].pfrh_edgeList() = i;
1184 else
1185 pfClusteringEdgeVars[i * 8 + j].pfrh_edgeList() = pfRecHits[i].neighbours()(j);
1186 }
1187 }
1188
1189 return;
1190 }
1191 };
1192
1193
1194 class TopoClusterContraction {
1195 public:
1196 template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
1197 ALPAKA_FN_ACC void operator()(const TAcc& acc,
1198 const reco::PFRecHitHostCollection::ConstView pfRecHits,
1199 reco::PFClusteringVarsDeviceCollection::View pfClusteringVars,
1200 reco::PFClusterDeviceCollection::View clusterView,
1201 uint32_t* __restrict__ nSeeds,
1202 uint32_t* __restrict__ nRHF) const {
1203 const int nRH = pfRecHits.size();
1204 int& totalSeedOffset = alpaka::declareSharedVar<int, __COUNTER__>(acc);
1205 int& totalSeedFracOffset = alpaka::declareSharedVar<int, __COUNTER__>(acc);
1206
1207
1208 if (once_per_block(acc)) {
1209 pfClusteringVars.nTopos() = 0;
1210 pfClusteringVars.nRHFracs() = 0;
1211 totalSeedOffset = 0;
1212 totalSeedFracOffset = 0;
1213 pfClusteringVars.pcrhFracSize() = 0;
1214 }
1215
1216 alpaka::syncBlockThreads(acc);
1217
1218
1219
1220 for (int rhIdx = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]; rhIdx < nRH;
1221 rhIdx += alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc)[0u]) {
1222 pfClusteringVars[rhIdx].rhIdxToSeedIdx() = -1;
1223 int topoId = pfClusteringVars[rhIdx].pfrh_topoId();
1224 if (topoId > -1) {
1225
1226 alpaka::atomicAdd(acc, &pfClusteringVars[topoId].topoRHCount(), 1);
1227
1228 if (topoId == rhIdx) {
1229 int topoIdx = alpaka::atomicAdd(acc, &pfClusteringVars.nTopos(), 1);
1230 pfClusteringVars[topoIdx].topoIds() =
1231 topoId;
1232 }
1233
1234 if (pfClusteringVars[rhIdx].pfrh_isSeed()) {
1235 alpaka::atomicAdd(acc, &pfClusteringVars[topoId].topoSeedCount(), 1);
1236 }
1237 }
1238 }
1239
1240 alpaka::syncBlockThreads(acc);
1241
1242
1243 for (int topoId = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]; topoId < nRH;
1244 topoId += alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc)[0u]) {
1245 if (pfClusteringVars[topoId].topoSeedCount() > 0) {
1246
1247 int offset = alpaka::atomicAdd(acc, &totalSeedOffset, pfClusteringVars[topoId].topoSeedCount());
1248 pfClusteringVars[topoId].topoSeedOffsets() = offset;
1249 }
1250 }
1251 alpaka::syncBlockThreads(acc);
1252
1253
1254
1255 for (int rhIdx = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]; rhIdx < nRH;
1256 rhIdx += alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc)[0u]) {
1257 int topoId = pfClusteringVars[rhIdx].pfrh_topoId();
1258 if (pfClusteringVars[rhIdx].pfrh_isSeed()) {
1259
1260 int k = alpaka::atomicAdd(acc, &pfClusteringVars[topoId].rhCount(), 1);
1261 int seedIdx = pfClusteringVars[topoId].topoSeedOffsets() + k;
1262 if ((unsigned int)seedIdx >= *nSeeds)
1263 printf("Warning(contraction) %8d > %8d should not happen, check topoId: %d has %d rh\n",
1264 seedIdx,
1265 *nSeeds,
1266 topoId,
1267 k);
1268 pfClusteringVars[seedIdx].topoSeedList() = rhIdx;
1269 pfClusteringVars[rhIdx].rhIdxToSeedIdx() = seedIdx;
1270 clusterView[seedIdx].topoId() = topoId;
1271 clusterView[seedIdx].seedRHIdx() = rhIdx;
1272 clusterView[seedIdx].depth() = pfRecHits[rhIdx].depth();
1273 }
1274 }
1275
1276 alpaka::syncBlockThreads(acc);
1277
1278
1279 for (int rhIdx = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]; rhIdx < nRH;
1280 rhIdx += alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc)[0u]) {
1281 pfClusteringVars[rhIdx].rhCount() = 1;
1282
1283 int topoId = pfClusteringVars[rhIdx].pfrh_topoId();
1284 if (pfClusteringVars[rhIdx].pfrh_isSeed() && topoId > -1) {
1285
1286 int offset = alpaka::atomicAdd(acc, &totalSeedFracOffset, pfClusteringVars[topoId].topoRHCount());
1287
1288
1289 pfClusteringVars[rhIdx].seedFracOffsets() = offset;
1290
1291
1292 clusterView[pfClusteringVars[rhIdx].rhIdxToSeedIdx()].rhfracOffset() =
1293 pfClusteringVars[rhIdx].seedFracOffsets();
1294 clusterView[pfClusteringVars[rhIdx].rhIdxToSeedIdx()].rhfracSize() =
1295 pfClusteringVars[topoId].topoRHCount() - pfClusteringVars[topoId].topoSeedCount() + 1;
1296 }
1297 }
1298
1299 alpaka::syncBlockThreads(acc);
1300
1301 if (once_per_block(acc)) {
1302 pfClusteringVars.pcrhFracSize() = totalSeedFracOffset;
1303 pfClusteringVars.nRHFracs() = totalSeedFracOffset;
1304 clusterView.nRHFracs() = totalSeedFracOffset;
1305 *nRHF = totalSeedFracOffset;
1306 clusterView.nSeeds() = *nSeeds;
1307 clusterView.nTopos() = pfClusteringVars.nTopos();
1308
1309 if (pfClusteringVars.pcrhFracSize() > 200000)
1310 printf("At the end of topoClusterContraction, found large *pcrhFracSize = %d\n",
1311 pfClusteringVars.pcrhFracSize());
1312 }
1313 }
1314 };
1315
1316
1317
1318 class FillRhfIndex {
1319 public:
1320 template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
1321 ALPAKA_FN_ACC void operator()(const TAcc& acc,
1322 const reco::PFRecHitHostCollection::ConstView pfRecHits,
1323 reco::PFClusteringVarsDeviceCollection::View pfClusteringVars,
1324 reco::PFRecHitFractionDeviceCollection::View fracView) const {
1325 const int nRH = pfRecHits.size();
1326
1327 for (auto index : uniform_elements_nd(acc, {nRH, nRH})) {
1328 const int i = index[0u];
1329 const int j = index[1u];
1330 int topoId = pfClusteringVars[i].pfrh_topoId();
1331 if (topoId > -1 && pfClusteringVars[i].pfrh_isSeed() && topoId == pfClusteringVars[j].pfrh_topoId()) {
1332 if (!pfClusteringVars[j].pfrh_isSeed()) {
1333 int k = alpaka::atomicAdd(
1334 acc, &pfClusteringVars[i].rhCount(), 1);
1335 auto fraction = fracView[pfClusteringVars[i].seedFracOffsets() + k];
1336 fraction.pfrhIdx() = j;
1337 fraction.pfcIdx() = pfClusteringVars[i].rhIdxToSeedIdx();
1338 } else if (i == j) {
1339 auto seed = fracView[pfClusteringVars[i].seedFracOffsets()];
1340 seed.pfrhIdx() = j;
1341 seed.frac() = 1;
1342 seed.pfcIdx() = pfClusteringVars[i].rhIdxToSeedIdx();
1343 }
1344 }
1345 }
1346 }
1347 };
1348
1349 class FastCluster {
1350 public:
1351 template <bool debug = false, typename TAcc, typename = std::enable_if<!std::is_same_v<Device, alpaka::DevCpu>>>
1352 ALPAKA_FN_ACC void operator()(const TAcc& acc,
1353 const reco::PFRecHitHostCollection::ConstView pfRecHits,
1354 const reco::PFClusterParamsDeviceCollection::ConstView pfClusParams,
1355 const reco::PFRecHitHCALTopologyDeviceCollection::ConstView topology,
1356 reco::PFClusteringVarsDeviceCollection::View pfClusteringVars,
1357 reco::PFClusterDeviceCollection::View clusterView,
1358 reco::PFRecHitFractionDeviceCollection::View fracView) const {
1359 const int nRH = pfRecHits.size();
1360 int& topoId = alpaka::declareSharedVar<int, __COUNTER__>(acc);
1361 int& nRHTopo = alpaka::declareSharedVar<int, __COUNTER__>(acc);
1362 int& nSeeds = alpaka::declareSharedVar<int, __COUNTER__>(acc);
1363
1364 if (once_per_block(acc)) {
1365 topoId = alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[0u];
1366 nRHTopo = pfClusteringVars[topoId].topoRHCount();
1367 nSeeds = pfClusteringVars[topoId].topoSeedCount();
1368 }
1369
1370 alpaka::syncBlockThreads(acc);
1371
1372 if (topoId < nRH && nRHTopo > 0 && nSeeds > 0) {
1373 if (nRHTopo == nSeeds) {
1374
1375 if (once_per_block(acc)) {
1376
1377 int rhIdx = pfClusteringVars[pfClusteringVars[topoId].topoSeedOffsets()].topoSeedList();
1378 int seedIdx = pfClusteringVars[rhIdx].rhIdxToSeedIdx();
1379 clusterView[seedIdx].energy() = pfRecHits[rhIdx].energy();
1380 clusterView[seedIdx].x() = pfRecHits[rhIdx].x();
1381 clusterView[seedIdx].y() = pfRecHits[rhIdx].y();
1382 clusterView[seedIdx].z() = pfRecHits[rhIdx].z();
1383 }
1384
1385 } else if ((not std::is_same_v<Device, alpaka::DevCpu>) && nSeeds == 1) {
1386
1387 hcalFastCluster_singleSeed(
1388 acc, pfClusParams, topology, topoId, nRHTopo, pfRecHits, pfClusteringVars, clusterView, fracView);
1389 } else if ((not std::is_same_v<Device, alpaka::DevCpu>) && nSeeds <= 100 &&
1390 nRHTopo - nSeeds < threadsPerBlockForClustering) {
1391 hcalFastCluster_multiSeedParallel(
1392 acc, pfClusParams, topology, topoId, nSeeds, nRHTopo, pfRecHits, pfClusteringVars, clusterView, fracView);
1393 } else if (nSeeds <= 400 && nRHTopo - nSeeds <= 1500) {
1394
1395 hcalFastCluster_multiSeedIterative(
1396 acc, pfClusParams, topology, topoId, nSeeds, nRHTopo, pfRecHits, pfClusteringVars, clusterView, fracView);
1397 } else {
1398 if constexpr (debug) {
1399 if (once_per_block(acc))
1400 printf("Topo cluster %d has %d seeds and %d rechits. Will be processed in next kernel.\n",
1401 topoId,
1402 nSeeds,
1403 nRHTopo);
1404 }
1405 }
1406 }
1407 }
1408 };
1409
1410
1411 class FastClusterExotic {
1412 public:
1413 template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
1414 ALPAKA_FN_ACC void operator()(const TAcc& acc,
1415 const reco::PFRecHitHostCollection::ConstView pfRecHits,
1416 const reco::PFClusterParamsDeviceCollection::ConstView pfClusParams,
1417 const reco::PFRecHitHCALTopologyDeviceCollection::ConstView topology,
1418 reco::PFClusteringVarsDeviceCollection::View pfClusteringVars,
1419 reco::PFClusterDeviceCollection::View clusterView,
1420 reco::PFRecHitFractionDeviceCollection::View fracView,
1421 Position4* __restrict__ globalClusterPos,
1422 Position4* __restrict__ globalPrevClusterPos,
1423 float* __restrict__ globalClusterEnergy,
1424 float* __restrict__ globalRhFracSum,
1425 int* __restrict__ globalSeeds,
1426 int* __restrict__ globalRechits) const {
1427 const int nRH = pfRecHits.size();
1428 for (int topoId = alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[0u]; topoId < nRH;
1429 topoId += blocksForExoticClusters) {
1430 int nRHTopo = pfClusteringVars[topoId].topoRHCount();
1431 int nSeeds = pfClusteringVars[topoId].topoSeedCount();
1432
1433
1434 if (nRHTopo > 0 && (nSeeds > 400 || nRHTopo - nSeeds > 1500)) {
1435 hcalFastCluster_exotic(acc,
1436 pfClusParams,
1437 topology,
1438 topoId,
1439 nSeeds,
1440 nRHTopo,
1441 pfRecHits,
1442 pfClusteringVars,
1443 clusterView,
1444 fracView,
1445 globalClusterPos,
1446 globalPrevClusterPos,
1447 globalClusterEnergy,
1448 globalRhFracSum,
1449 globalSeeds,
1450 globalRechits);
1451 }
1452 alpaka::syncBlockThreads(acc);
1453 }
1454 }
1455 };
1456
1457 PFClusterProducerKernel::PFClusterProducerKernel(Queue& queue, const reco::PFRecHitHostCollection& pfRecHits)
1458 : nSeeds(cms::alpakatools::make_device_buffer<uint32_t>(queue)),
1459 globalClusterPos(
1460 cms::alpakatools::make_device_buffer<Position4[]>(queue, blocksForExoticClusters * maxTopoInput)),
1461 globalPrevClusterPos(
1462 cms::alpakatools::make_device_buffer<Position4[]>(queue, blocksForExoticClusters * maxTopoInput)),
1463 globalClusterEnergy(
1464 cms::alpakatools::make_device_buffer<float[]>(queue, blocksForExoticClusters * maxTopoInput)),
1465 globalRhFracSum(cms::alpakatools::make_device_buffer<float[]>(queue, blocksForExoticClusters * maxTopoInput)),
1466 globalSeeds(cms::alpakatools::make_device_buffer<int[]>(queue, blocksForExoticClusters * maxTopoInput)),
1467 globalRechits(cms::alpakatools::make_device_buffer<int[]>(queue, blocksForExoticClusters * maxTopoInput)) {
1468 alpaka::memset(queue, nSeeds, 0x00);
1469 }
1470
1471 void PFClusterProducerKernel::seedTopoAndContract(Queue& queue,
1472 const reco::PFClusterParamsDeviceCollection& params,
1473 const reco::PFRecHitHCALTopologyDeviceCollection& topology,
1474 reco::PFClusteringVarsDeviceCollection& pfClusteringVars,
1475 reco::PFClusteringEdgeVarsDeviceCollection& pfClusteringEdgeVars,
1476 const reco::PFRecHitHostCollection& pfRecHits,
1477 reco::PFClusterDeviceCollection& pfClusters,
1478 uint32_t* __restrict__ nRHF) {
1479 const int nRH = pfRecHits->size();
1480 const int threadsPerBlock = 256;
1481 const int blocks = divide_up_by(nRH, threadsPerBlock);
1482
1483
1484 alpaka::exec<Acc1D>(queue,
1485 make_workdiv<Acc1D>(blocks, threadsPerBlock),
1486 SeedingTopoThresh{},
1487 pfClusteringVars.view(),
1488 params.view(),
1489 topology.view(),
1490 pfRecHits.view(),
1491 pfClusters.view(),
1492 nSeeds.data());
1493
1494 alpaka::exec<Acc1D>(queue,
1495 make_workdiv<Acc1D>(blocks, threadsPerBlock),
1496 PrepareTopoInputs{},
1497 pfRecHits.view(),
1498 pfClusteringVars.view(),
1499 pfClusteringEdgeVars.view(),
1500 nSeeds.data());
1501
1502 alpaka::exec<Acc1D>(queue,
1503 make_workdiv<Acc1D>(blocks, threadsPerBlock),
1504 ECLCCInit{},
1505 pfRecHits.view(),
1506 pfClusteringVars.view(),
1507 pfClusteringEdgeVars.view());
1508 alpaka::exec<Acc1D>(queue,
1509 make_workdiv<Acc1D>(blocks, threadsPerBlock),
1510 ECLCCCompute1{},
1511 pfRecHits.view(),
1512 pfClusteringVars.view(),
1513 pfClusteringEdgeVars.view());
1514 alpaka::exec<Acc1D>(queue,
1515 make_workdiv<Acc1D>(blocks, threadsPerBlock),
1516 ECLCCFlatten{},
1517 pfRecHits.view(),
1518 pfClusteringVars.view(),
1519 pfClusteringEdgeVars.view());
1520
1521 alpaka::exec<Acc1D>(queue,
1522 make_workdiv<Acc1D>(1, threadsPerBlockForClustering),
1523 TopoClusterContraction{},
1524 pfRecHits.view(),
1525 pfClusteringVars.view(),
1526 pfClusters.view(),
1527 nSeeds.data(),
1528 nRHF);
1529 }
1530
1531 void PFClusterProducerKernel::cluster(Queue& queue,
1532 const reco::PFClusterParamsDeviceCollection& params,
1533 const reco::PFRecHitHCALTopologyDeviceCollection& topology,
1534 reco::PFClusteringVarsDeviceCollection& pfClusteringVars,
1535 reco::PFClusteringEdgeVarsDeviceCollection& pfClusteringEdgeVars,
1536 const reco::PFRecHitHostCollection& pfRecHits,
1537 reco::PFClusterDeviceCollection& pfClusters,
1538 reco::PFRecHitFractionDeviceCollection& pfrhFractions) {
1539 const int nRH = pfRecHits->size();
1540
1541
1542 alpaka::exec<Acc2D>(queue,
1543 make_workdiv<Acc2D>({divide_up_by(nRH, 32), divide_up_by(nRH, 32)}, {32, 32}),
1544 FillRhfIndex{},
1545 pfRecHits.view(),
1546 pfClusteringVars.view(),
1547 pfrhFractions.view());
1548
1549
1550 alpaka::exec<Acc1D>(queue,
1551 make_workdiv<Acc1D>(nRH, threadsPerBlockForClustering),
1552 FastCluster{},
1553 pfRecHits.view(),
1554 params.view(),
1555 topology.view(),
1556 pfClusteringVars.view(),
1557 pfClusters.view(),
1558 pfrhFractions.view());
1559
1560 alpaka::exec<Acc1D>(queue,
1561 make_workdiv<Acc1D>(blocksForExoticClusters,
1562 threadsPerBlockForClustering),
1563 FastClusterExotic{},
1564 pfRecHits.view(),
1565 params.view(),
1566 topology.view(),
1567 pfClusteringVars.view(),
1568 pfClusters.view(),
1569 pfrhFractions.view(),
1570 globalClusterPos.data(),
1571 globalPrevClusterPos.data(),
1572 globalClusterEnergy.data(),
1573 globalRhFracSum.data(),
1574 globalSeeds.data(),
1575 globalRechits.data());
1576 }
1577
1578 }