File indexing completed on 2025-01-22 07:34:27
0001 #include <alpaka/alpaka.hpp>
0002
0003 #include "DataFormats/ParticleFlowReco/interface/PFLayer.h"
0004 #include "FWCore/Utilities/interface/bit_cast.h"
0005 #include "HeterogeneousCore/AlpakaInterface/interface/atomicMaxF.h"
0006 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0007 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0008 #include "RecoParticleFlow/PFClusterProducer/plugins/alpaka/PFClusterECLCC.h"
0009 #include "RecoParticleFlow/PFClusterProducer/plugins/alpaka/PFClusterSoAProducerKernel.h"
0010
0011 namespace ALPAKA_ACCELERATOR_NAMESPACE {
0012
0013 using namespace cms::alpakatools;
0014
0015 using namespace reco::pfClustering;
0016
0017 static constexpr int threadsPerBlockForClustering = 512;
0018 static constexpr uint32_t blocksForExoticClusters = 4;
0019
0020
0021
0022
0023 static constexpr float cutoffDistance = 100.;
0024 static constexpr float cutoffFraction = 0.9999;
0025
0026 static constexpr uint32_t kHBHalf = 1296;
0027 static constexpr uint32_t maxTopoInput = 2 * kHBHalf;
0028
0029
0030 ALPAKA_FN_ACC ALPAKA_FN_INLINE static float dR2(Position4 pos1, Position4 pos2) {
0031 float mag1 = sqrtf(pos1.x * pos1.x + pos1.y * pos1.y + pos1.z * pos1.z);
0032 float cosTheta1 = mag1 > 0.0 ? pos1.z / mag1 : 1.0;
0033 float eta1 = 0.5f * logf((1.0f + cosTheta1) / (1.0f - cosTheta1));
0034 float phi1 = atan2f(pos1.y, pos1.x);
0035
0036 float mag2 = sqrtf(pos2.x * pos2.x + pos2.y * pos2.y + pos2.z * pos2.z);
0037 float cosTheta2 = mag2 > 0.0 ? pos2.z / mag2 : 1.0;
0038 float eta2 = 0.5f * logf((1.0f + cosTheta2) / (1.0f - cosTheta2));
0039 float phi2 = atan2f(pos2.y, pos2.x);
0040
0041 float deta = eta2 - eta1;
0042 constexpr const float fPI = M_PI;
0043 float dphi = std::abs(std::abs(phi2 - phi1) - fPI) - fPI;
0044 return (deta * deta + dphi * dphi);
0045 }
0046
0047
0048 ALPAKA_FN_ACC static auto getSeedRhIdx(int* seeds, int seedNum) { return seeds[seedNum]; }
0049
0050
0051 ALPAKA_FN_ACC static auto getRhFrac(reco::PFClusteringVarsDeviceCollection::View pfClusteringVars,
0052 int topoSeedBegin,
0053 reco::PFRecHitFractionDeviceCollection::View fracView,
0054 int seedNum,
0055 int rhNum) {
0056 int seedIdx = pfClusteringVars[topoSeedBegin + seedNum].topoSeedList();
0057 return fracView[pfClusteringVars[seedIdx].seedFracOffsets() + rhNum].frac();
0058 }
0059
0060
0061 template <bool debug = false>
0062 ALPAKA_FN_ACC static void updateClusterPos(::reco::PFClusterParamsSoA::ConstView pfClusParams,
0063 Position4& pos4,
0064 float frac,
0065 int rhInd,
0066 reco::PFRecHitDeviceCollection::ConstView pfRecHits,
0067 float rhENormInv) {
0068 Position4 rechitPos = Position4{pfRecHits[rhInd].x(), pfRecHits[rhInd].y(), pfRecHits[rhInd].z(), 1.0};
0069 const auto rh_energy = pfRecHits[rhInd].energy() * frac;
0070 const auto norm = (frac < pfClusParams.minFracInCalc() ? 0.0f : std::max(0.0f, logf(rh_energy * rhENormInv)));
0071 if constexpr (debug)
0072 printf("\t\t\trechit %d: norm = %f\tfrac = %f\trh_energy = %f\tpos = (%f, %f, %f)\n",
0073 rhInd,
0074 norm,
0075 frac,
0076 rh_energy,
0077 rechitPos.x,
0078 rechitPos.y,
0079 rechitPos.z);
0080
0081 pos4.x += rechitPos.x * norm;
0082 pos4.y += rechitPos.y * norm;
0083 pos4.z += rechitPos.z * norm;
0084 pos4.w += norm;
0085 }
0086
0087
0088
0089 template <bool debug = false>
0090 ALPAKA_FN_ACC static void hcalFastCluster_singleSeed(
0091 const Acc1D& acc,
0092 ::reco::PFClusterParamsSoA::ConstView pfClusParams,
0093 const reco::PFRecHitHCALTopologyDeviceCollection::ConstView topology,
0094 int topoId,
0095 int nRHTopo,
0096 reco::PFRecHitDeviceCollection::ConstView pfRecHits,
0097 reco::PFClusteringVarsDeviceCollection::View pfClusteringVars,
0098 reco::PFClusterDeviceCollection::View clusterView,
0099 reco::PFRecHitFractionDeviceCollection::View fracView) {
0100 int tid = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u];
0101
0102 int& i = alpaka::declareSharedVar<int, __COUNTER__>(acc);
0103 int& nRHOther = alpaka::declareSharedVar<int, __COUNTER__>(acc);
0104 unsigned int& iter = alpaka::declareSharedVar<unsigned int, __COUNTER__>(acc);
0105 float& tol = alpaka::declareSharedVar<float, __COUNTER__>(acc);
0106 float& clusterEnergy = alpaka::declareSharedVar<float, __COUNTER__>(acc);
0107 float& rhENormInv = alpaka::declareSharedVar<float, __COUNTER__>(acc);
0108 float& seedEnergy = alpaka::declareSharedVar<float, __COUNTER__>(acc);
0109 Position4& clusterPos = alpaka::declareSharedVar<Position4, __COUNTER__>(acc);
0110 Position4& prevClusterPos = alpaka::declareSharedVar<Position4, __COUNTER__>(acc);
0111 Position4& seedPos = alpaka::declareSharedVar<Position4, __COUNTER__>(acc);
0112 bool& notDone = alpaka::declareSharedVar<bool, __COUNTER__>(acc);
0113 if (once_per_block(acc)) {
0114 i = pfClusteringVars[pfClusteringVars[topoId].topoSeedOffsets()].topoSeedList();
0115 nRHOther = nRHTopo - 1;
0116 seedPos = Position4{pfRecHits[i].x(), pfRecHits[i].y(), pfRecHits[i].z(), 1.};
0117 clusterPos = seedPos;
0118 prevClusterPos = seedPos;
0119 seedEnergy = pfRecHits[i].energy();
0120 clusterEnergy = seedEnergy;
0121 tol = pfClusParams.stoppingTolerance();
0122
0123 if (topology.cutsFromDB()) {
0124 rhENormInv = (1.f / topology[pfRecHits[i].denseId()].noiseThreshold());
0125 } else {
0126 if (pfRecHits[i].layer() == PFLayer::HCAL_BARREL1)
0127 rhENormInv = pfClusParams.recHitEnergyNormInvHB_vec()[pfRecHits[i].depth() - 1];
0128 else if (pfRecHits[i].layer() == PFLayer::HCAL_ENDCAP)
0129 rhENormInv = pfClusParams.recHitEnergyNormInvHE_vec()[pfRecHits[i].depth() - 1];
0130 else {
0131 rhENormInv = 0.;
0132 printf("Rechit %d has invalid layer %d!\n", i, pfRecHits[i].layer());
0133 }
0134 }
0135
0136 iter = 0;
0137 notDone = true;
0138 }
0139 alpaka::syncBlockThreads(acc);
0140
0141 int j = -1;
0142 int rhFracOffset = -1;
0143 Position4 rhPos;
0144 float rhEnergy = -1., rhPosNorm = -1.;
0145
0146 if (tid < nRHOther) {
0147 rhFracOffset =
0148 pfClusteringVars[i].seedFracOffsets() + tid + 1;
0149 j = fracView[rhFracOffset].pfrhIdx();
0150 rhPos = Position4{pfRecHits[j].x(), pfRecHits[j].y(), pfRecHits[j].z(), 1.};
0151 rhEnergy = pfRecHits[j].energy();
0152 rhPosNorm = fmaxf(0., logf(rhEnergy * rhENormInv));
0153 }
0154 alpaka::syncBlockThreads(acc);
0155
0156 do {
0157 if constexpr (debug) {
0158 if (once_per_block(acc))
0159 printf("\n--- Now on iter %d for topoId %d ---\n", iter, topoId);
0160 }
0161 float dist2 = -1., d2 = -1., fraction = -1.;
0162 if (tid < nRHOther) {
0163
0164 dist2 = (clusterPos.x - rhPos.x) * (clusterPos.x - rhPos.x) +
0165 (clusterPos.y - rhPos.y) * (clusterPos.y - rhPos.y) +
0166 (clusterPos.z - rhPos.z) * (clusterPos.z - rhPos.z);
0167
0168 d2 = dist2 / pfClusParams.showerSigma2();
0169 fraction = clusterEnergy * rhENormInv * expf(-0.5f * d2);
0170
0171
0172 if (fraction > pfClusParams.minFracTot() && d2 < cutoffDistance)
0173 fraction = 1.;
0174 else
0175 fraction = -1.;
0176 fracView[rhFracOffset].frac() = fraction;
0177 }
0178 alpaka::syncBlockThreads(acc);
0179
0180 if constexpr (debug) {
0181 if (once_per_block(acc))
0182 printf("Computing cluster position for topoId %d\n", topoId);
0183 }
0184
0185 if (once_per_block(acc)) {
0186
0187 clusterPos = seedPos;
0188 clusterEnergy = seedEnergy;
0189 }
0190 alpaka::syncBlockThreads(acc);
0191
0192
0193 if (fraction > -0.5) {
0194 alpaka::atomicAdd(acc, &clusterEnergy, rhEnergy, alpaka::hierarchy::Threads{});
0195 alpaka::atomicAdd(acc, &clusterPos.x, rhPos.x * rhPosNorm, alpaka::hierarchy::Threads{});
0196 alpaka::atomicAdd(acc, &clusterPos.y, rhPos.y * rhPosNorm, alpaka::hierarchy::Threads{});
0197 alpaka::atomicAdd(acc, &clusterPos.z, rhPos.z * rhPosNorm, alpaka::hierarchy::Threads{});
0198 alpaka::atomicAdd(acc, &clusterPos.w, rhPosNorm, alpaka::hierarchy::Threads{});
0199 }
0200 alpaka::syncBlockThreads(acc);
0201
0202 if (once_per_block(acc)) {
0203
0204 if (clusterPos.w >= pfClusParams.minAllowedNormalization()) {
0205
0206 clusterPos.x /= clusterPos.w;
0207 clusterPos.y /= clusterPos.w;
0208 clusterPos.z /= clusterPos.w;
0209
0210 if constexpr (debug)
0211 printf("\tPF cluster (seed %d) energy = %f\tposition = (%f, %f, %f)\n",
0212 i,
0213 clusterEnergy,
0214 clusterPos.x,
0215 clusterPos.y,
0216 clusterPos.z);
0217 } else {
0218 if constexpr (debug)
0219 printf("\tPF cluster (seed %d) position norm (%f) less than minimum (%f)\n",
0220 i,
0221 clusterPos.w,
0222 pfClusParams.minAllowedNormalization());
0223 clusterPos.x = 0.;
0224 clusterPos.y = 0.;
0225 clusterPos.z = 0.;
0226 }
0227 float diff2 = dR2(prevClusterPos, clusterPos);
0228 if constexpr (debug)
0229 printf("\tPF cluster (seed %d) has diff2 = %f\n", i, diff2);
0230 prevClusterPos = clusterPos;
0231
0232 float tol2 = tol * tol;
0233 iter++;
0234 notDone = (diff2 > tol2) && (iter < pfClusParams.maxIterations());
0235 if constexpr (debug) {
0236 if (diff2 > tol2)
0237 printf("\tTopoId %d has diff2 = %f greater than squared tolerance %f (continuing)\n", topoId, diff2, tol2);
0238 else if constexpr (debug)
0239 printf("\tTopoId %d has diff2 = %f LESS than squared tolerance %f (terminating!)\n", topoId, diff2, tol2);
0240 }
0241 }
0242 alpaka::syncBlockThreads(acc);
0243 } while (notDone);
0244 if (once_per_block(acc)) {
0245 int rhIdx =
0246 pfClusteringVars[pfClusteringVars[topoId].topoSeedOffsets()].topoSeedList();
0247 int seedIdx = pfClusteringVars[rhIdx].rhIdxToSeedIdx();
0248 clusterView[seedIdx].energy() = clusterEnergy;
0249 clusterView[seedIdx].x() = clusterPos.x;
0250 clusterView[seedIdx].y() = clusterPos.y;
0251 clusterView[seedIdx].z() = clusterPos.z;
0252 }
0253 }
0254
0255
0256
0257 template <bool debug = false>
0258 ALPAKA_FN_ACC static void hcalFastCluster_multiSeedParallel(
0259 const Acc1D& acc,
0260 ::reco::PFClusterParamsSoA::ConstView pfClusParams,
0261 const reco::PFRecHitHCALTopologyDeviceCollection::ConstView topology,
0262 int topoId,
0263 int nSeeds,
0264 int nRHTopo,
0265 reco::PFRecHitDeviceCollection::ConstView pfRecHits,
0266 reco::PFClusteringVarsDeviceCollection::View pfClusteringVars,
0267 reco::PFClusterDeviceCollection::View clusterView,
0268 reco::PFRecHitFractionDeviceCollection::View fracView) {
0269 int tid = alpaka::getIdx<alpaka::Block,
0270 alpaka::Threads>(
0271 acc)[0u];
0272
0273 int& nRHNotSeed = alpaka::declareSharedVar<int, __COUNTER__>(acc);
0274 int& topoSeedBegin = alpaka::declareSharedVar<int, __COUNTER__>(acc);
0275 int& stride = alpaka::declareSharedVar<int, __COUNTER__>(acc);
0276 int& iter = alpaka::declareSharedVar<int, __COUNTER__>(acc);
0277 float& tol = alpaka::declareSharedVar<float, __COUNTER__>(acc);
0278 float& diff2 = alpaka::declareSharedVar<float, __COUNTER__>(acc);
0279 float& rhENormInv = alpaka::declareSharedVar<float, __COUNTER__>(acc);
0280 bool& notDone = alpaka::declareSharedVar<bool, __COUNTER__>(acc);
0281 auto& clusterPos = alpaka::declareSharedVar<Position4[100], __COUNTER__>(acc);
0282 auto& prevClusterPos = alpaka::declareSharedVar<Position4[100], __COUNTER__>(acc);
0283 auto& clusterEnergy = alpaka::declareSharedVar<float[100], __COUNTER__>(acc);
0284 auto& rhFracSum = alpaka::declareSharedVar<float[threadsPerBlockForClustering], __COUNTER__>(acc);
0285 auto& seeds = alpaka::declareSharedVar<int[100], __COUNTER__>(acc);
0286 auto& rechits = alpaka::declareSharedVar<int[threadsPerBlockForClustering], __COUNTER__>(acc);
0287
0288 if (once_per_block(acc)) {
0289 nRHNotSeed = nRHTopo - nSeeds + 1;
0290 topoSeedBegin = pfClusteringVars[topoId].topoSeedOffsets();
0291 tol = pfClusParams.stoppingTolerance() *
0292 powf(fmaxf(1.0, nSeeds - 1), 2.0);
0293 stride = alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc)[0u];
0294 iter = 0;
0295 notDone = true;
0296
0297 int i = pfClusteringVars[topoSeedBegin].topoSeedList();
0298
0299 if (topology.cutsFromDB()) {
0300 rhENormInv = (1.f / topology[pfRecHits[i].denseId()].noiseThreshold());
0301 } else {
0302 if (pfRecHits[i].layer() == PFLayer::HCAL_BARREL1)
0303 rhENormInv = pfClusParams.recHitEnergyNormInvHB_vec()[pfRecHits[i].depth() - 1];
0304 else if (pfRecHits[i].layer() == PFLayer::HCAL_ENDCAP)
0305 rhENormInv = pfClusParams.recHitEnergyNormInvHE_vec()[pfRecHits[i].depth() - 1];
0306 else {
0307 rhENormInv = 0.;
0308 printf("Rechit %d has invalid layer %d!\n", i, pfRecHits[i].layer());
0309 }
0310 }
0311 }
0312 alpaka::syncBlockThreads(acc);
0313
0314 if (tid < nSeeds)
0315 seeds[tid] = pfClusteringVars[topoSeedBegin + tid].topoSeedList();
0316 if (tid < nRHNotSeed - 1)
0317 rechits[tid] =
0318 fracView[pfClusteringVars[pfClusteringVars[topoSeedBegin].topoSeedList()].seedFracOffsets() + tid + 1]
0319 .pfrhIdx();
0320
0321 alpaka::syncBlockThreads(acc);
0322
0323 if constexpr (debug) {
0324 if (once_per_block(acc)) {
0325 printf("\n===========================================================================================\n");
0326 printf("Processing topo cluster %d with nSeeds = %d nRHTopo = %d and seeds (", topoId, nSeeds, nRHTopo);
0327 for (int s = 0; s < nSeeds; s++) {
0328 if (s != 0)
0329 printf(", ");
0330 printf("%d", getSeedRhIdx(seeds, s));
0331 }
0332 if (nRHTopo == nSeeds) {
0333 printf(")\n\n");
0334 } else {
0335 printf(") and other rechits (");
0336 for (int r = 1; r < nRHNotSeed; r++) {
0337 if (r != 1)
0338 printf(", ");
0339 if (r <= 0) {
0340 printf("Invalid rhNum (%d) for get RhFracIdx!\n", r);
0341 }
0342 printf("%d", rechits[r - 1]);
0343 }
0344 printf(")\n\n");
0345 }
0346 }
0347 alpaka::syncBlockThreads(acc);
0348 }
0349
0350
0351 if (tid < nSeeds) {
0352 int i = getSeedRhIdx(seeds, tid);
0353 clusterPos[tid] = Position4{pfRecHits[i].x(), pfRecHits[i].y(), pfRecHits[i].z(), 1.0};
0354 prevClusterPos[tid] = clusterPos[tid];
0355 clusterEnergy[tid] = pfRecHits[i].energy();
0356 for (int r = 0; r < (nRHNotSeed - 1); r++) {
0357 fracView[pfClusteringVars[i].seedFracOffsets() + r + 1].pfrhIdx() = rechits[r];
0358 fracView[pfClusteringVars[i].seedFracOffsets() + r + 1].frac() = -1.;
0359 }
0360 }
0361 alpaka::syncBlockThreads(acc);
0362
0363 int rhThreadIdx = -1;
0364 Position4 rhThreadPos;
0365 if (tid < (nRHNotSeed - 1)) {
0366 rhThreadIdx = rechits[tid];
0367 rhThreadPos = Position4{pfRecHits[rhThreadIdx].x(), pfRecHits[rhThreadIdx].y(), pfRecHits[rhThreadIdx].z(), 1.};
0368 }
0369
0370
0371 int seedThreadIdx = -1;
0372 Neighbours4 seedNeighbors = Neighbours4{-9, -9, -9, -9};
0373 float seedEnergy = -1.;
0374 Position4 seedInitClusterPos = Position4{0., 0., 0., 0.};
0375 if (tid < nSeeds) {
0376 if constexpr (debug)
0377 printf("tid: %d\n", tid);
0378 seedThreadIdx = getSeedRhIdx(seeds, tid);
0379 seedNeighbors = Neighbours4{pfRecHits[seedThreadIdx].neighbours()(0),
0380 pfRecHits[seedThreadIdx].neighbours()(1),
0381 pfRecHits[seedThreadIdx].neighbours()(2),
0382 pfRecHits[seedThreadIdx].neighbours()(3)};
0383 seedEnergy = pfRecHits[seedThreadIdx].energy();
0384
0385
0386 updateClusterPos(pfClusParams, seedInitClusterPos, 1., seedThreadIdx, pfRecHits, rhENormInv);
0387 }
0388
0389 do {
0390 if constexpr (debug) {
0391 if (once_per_block(acc))
0392 printf("\n--- Now on iter %d for topoId %d ---\n", iter, topoId);
0393 }
0394
0395
0396 rhFracSum[tid] = 0.;
0397 if (once_per_block(acc))
0398 diff2 = -1;
0399
0400 if (tid < (nRHNotSeed - 1)) {
0401 for (int s = 0; s < nSeeds; s++) {
0402 float dist2 = (clusterPos[s].x - rhThreadPos.x) * (clusterPos[s].x - rhThreadPos.x) +
0403 (clusterPos[s].y - rhThreadPos.y) * (clusterPos[s].y - rhThreadPos.y) +
0404 (clusterPos[s].z - rhThreadPos.z) * (clusterPos[s].z - rhThreadPos.z);
0405
0406 float d2 = dist2 / pfClusParams.showerSigma2();
0407 float fraction = clusterEnergy[s] * rhENormInv * expf(-0.5f * d2);
0408
0409 rhFracSum[tid] += fraction;
0410 }
0411 }
0412 alpaka::syncBlockThreads(acc);
0413
0414 if (tid < (nRHNotSeed - 1)) {
0415 for (int s = 0; s < nSeeds; s++) {
0416 int i = seeds[s];
0417 float dist2 = (clusterPos[s].x - rhThreadPos.x) * (clusterPos[s].x - rhThreadPos.x) +
0418 (clusterPos[s].y - rhThreadPos.y) * (clusterPos[s].y - rhThreadPos.y) +
0419 (clusterPos[s].z - rhThreadPos.z) * (clusterPos[s].z - rhThreadPos.z);
0420
0421 float d2 = dist2 / pfClusParams.showerSigma2();
0422 float fraction = clusterEnergy[s] * rhENormInv * expf(-0.5f * d2);
0423
0424 if (rhFracSum[tid] > pfClusParams.minFracTot()) {
0425 float fracpct = fraction / rhFracSum[tid];
0426 if (fracpct > cutoffFraction || (d2 < cutoffDistance && fracpct > pfClusParams.minFracToKeep())) {
0427 fracView[pfClusteringVars[i].seedFracOffsets() + tid + 1].frac() = fracpct;
0428 } else {
0429 fracView[pfClusteringVars[i].seedFracOffsets() + tid + 1].frac() = -1;
0430 }
0431 } else {
0432 fracView[pfClusteringVars[i].seedFracOffsets() + tid + 1].frac() = -1;
0433 }
0434 }
0435 }
0436 alpaka::syncBlockThreads(acc);
0437
0438 if constexpr (debug) {
0439 if (once_per_block(acc))
0440 printf("Computing cluster position for topoId %d\n", topoId);
0441 }
0442
0443
0444 if (tid < nSeeds) {
0445 clusterPos[tid] = seedInitClusterPos;
0446 clusterEnergy[tid] = seedEnergy;
0447 if constexpr (debug) {
0448 printf("Cluster %d (seed %d) has energy %f\tpos = (%f, %f, %f, %f)\n",
0449 tid,
0450 seeds[tid],
0451 clusterEnergy[tid],
0452 clusterPos[tid].x,
0453 clusterPos[tid].y,
0454 clusterPos[tid].z,
0455 clusterPos[tid].w);
0456 }
0457 }
0458 alpaka::syncBlockThreads(acc);
0459
0460
0461 if (tid < nSeeds) {
0462 for (int r = 0; r < nRHNotSeed - 1; r++) {
0463 int j = rechits[r];
0464 float frac = getRhFrac(pfClusteringVars, topoSeedBegin, fracView, tid, r + 1);
0465
0466 if (frac > -0.5) {
0467 clusterEnergy[tid] += frac * pfRecHits[j].energy();
0468
0469 if (nSeeds == 1 || j == seedNeighbors.x || j == seedNeighbors.y || j == seedNeighbors.z ||
0470 j == seedNeighbors.w)
0471 updateClusterPos(pfClusParams, clusterPos[tid], frac, j, pfRecHits, rhENormInv);
0472 }
0473 }
0474 }
0475 alpaka::syncBlockThreads(acc);
0476
0477
0478 if (tid < nSeeds) {
0479 if (clusterPos[tid].w >= pfClusParams.minAllowedNormalization()) {
0480
0481 clusterPos[tid].x /= clusterPos[tid].w;
0482 clusterPos[tid].y /= clusterPos[tid].w;
0483 clusterPos[tid].z /= clusterPos[tid].w;
0484
0485 if constexpr (debug)
0486 printf("\tCluster %d (seed %d) energy = %f\tposition = (%f, %f, %f)\n",
0487 tid,
0488 seedThreadIdx,
0489 clusterEnergy[tid],
0490 clusterPos[tid].x,
0491 clusterPos[tid].y,
0492 clusterPos[tid].z);
0493 } else {
0494 if constexpr (debug)
0495 printf("\tCluster %d (seed %d) position norm (%f) less than minimum (%f)\n",
0496 tid,
0497 seedThreadIdx,
0498 clusterPos[tid].w,
0499 pfClusParams.minAllowedNormalization());
0500 clusterPos[tid].x = 0.0;
0501 clusterPos[tid].y = 0.0;
0502 clusterPos[tid].z = 0.0;
0503 }
0504 }
0505 alpaka::syncBlockThreads(acc);
0506
0507 if (tid < nSeeds) {
0508 float delta2 = dR2(prevClusterPos[tid], clusterPos[tid]);
0509 if constexpr (debug)
0510 printf("\tCluster %d (seed %d) has delta2 = %f\n", tid, seeds[tid], delta2);
0511 atomicMaxF(acc, &diff2, delta2);
0512 prevClusterPos[tid] = clusterPos[tid];
0513 }
0514 alpaka::syncBlockThreads(acc);
0515
0516 if (once_per_block(acc)) {
0517 float tol2 = tol * tol;
0518 iter++;
0519 notDone = (diff2 > tol2) && ((unsigned int)iter < pfClusParams.maxIterations());
0520 if constexpr (debug) {
0521 if (diff2 > tol2)
0522 printf("\tTopoId %d has diff2 = %f greater than squared tolerance %f (continuing)\n", topoId, diff2, tol2);
0523 else if constexpr (debug)
0524 printf("\tTopoId %d has diff2 = %f LESS than squared tolerance %f (terminating!)\n", topoId, diff2, tol2);
0525 }
0526 }
0527 alpaka::syncBlockThreads(acc);
0528 } while (notDone);
0529 if (once_per_block(acc))
0530
0531 if (tid < nSeeds) {
0532 int rhIdx = pfClusteringVars[tid + pfClusteringVars[topoId].topoSeedOffsets()].topoSeedList();
0533 int seedIdx = pfClusteringVars[rhIdx].rhIdxToSeedIdx();
0534 clusterView[seedIdx].energy() = clusterEnergy[tid];
0535 clusterView[seedIdx].x() = clusterPos[tid].x;
0536 clusterView[seedIdx].y() = clusterPos[tid].y;
0537 clusterView[seedIdx].z() = clusterPos[tid].z;
0538 }
0539 }
0540
0541
0542
0543
0544 template <bool debug = false>
0545 ALPAKA_FN_ACC static void hcalFastCluster_exotic(const Acc1D& acc,
0546 ::reco::PFClusterParamsSoA::ConstView pfClusParams,
0547 const reco::PFRecHitHCALTopologyDeviceCollection::ConstView topology,
0548 int topoId,
0549 int nSeeds,
0550 int nRHTopo,
0551 reco::PFRecHitDeviceCollection::ConstView pfRecHits,
0552 reco::PFClusteringVarsDeviceCollection::View pfClusteringVars,
0553 reco::PFClusterDeviceCollection::View clusterView,
0554 reco::PFRecHitFractionDeviceCollection::View fracView,
0555 Position4* __restrict__ globalClusterPos,
0556 Position4* __restrict__ globalPrevClusterPos,
0557 float* __restrict__ globalClusterEnergy,
0558 float* __restrict__ globalRhFracSum,
0559 int* __restrict__ globalSeeds,
0560 int* __restrict__ globalRechits) {
0561 int& nRHNotSeed = alpaka::declareSharedVar<int, __COUNTER__>(acc);
0562 int& blockIdx = alpaka::declareSharedVar<int, __COUNTER__>(acc);
0563 int& topoSeedBegin = alpaka::declareSharedVar<int, __COUNTER__>(acc);
0564 int& stride = alpaka::declareSharedVar<int, __COUNTER__>(acc);
0565 int& iter = alpaka::declareSharedVar<int, __COUNTER__>(acc);
0566 float& tol = alpaka::declareSharedVar<float, __COUNTER__>(acc);
0567 float& diff2 = alpaka::declareSharedVar<float, __COUNTER__>(acc);
0568 float& rhENormInv = alpaka::declareSharedVar<float, __COUNTER__>(acc);
0569 bool& notDone = alpaka::declareSharedVar<bool, __COUNTER__>(acc);
0570
0571 blockIdx = maxTopoInput * alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[0u];
0572 Position4* clusterPos = globalClusterPos + blockIdx;
0573 Position4* prevClusterPos = globalPrevClusterPos + blockIdx;
0574 float* clusterEnergy = globalClusterEnergy + blockIdx;
0575 float* rhFracSum = globalRhFracSum + blockIdx;
0576 int* seeds = globalSeeds + blockIdx;
0577 int* rechits = globalRechits + blockIdx;
0578
0579 if (once_per_block(acc)) {
0580 nRHNotSeed = nRHTopo - nSeeds + 1;
0581 topoSeedBegin = pfClusteringVars[topoId].topoSeedOffsets();
0582 tol = pfClusParams.stoppingTolerance() *
0583 powf(fmaxf(1.0, nSeeds - 1), 2.0);
0584 stride = alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc)[0u];
0585 iter = 0;
0586 notDone = true;
0587
0588 int i = pfClusteringVars[topoSeedBegin].topoSeedList();
0589
0590 if (topology.cutsFromDB()) {
0591 rhENormInv = (1.f / topology[pfRecHits[i].denseId()].noiseThreshold());
0592 } else {
0593 if (pfRecHits[i].layer() == PFLayer::HCAL_BARREL1)
0594 rhENormInv = pfClusParams.recHitEnergyNormInvHB_vec()[pfRecHits[i].depth() - 1];
0595 else if (pfRecHits[i].layer() == PFLayer::HCAL_ENDCAP)
0596 rhENormInv = pfClusParams.recHitEnergyNormInvHE_vec()[pfRecHits[i].depth() - 1];
0597 else {
0598 rhENormInv = 0.;
0599 printf("Rechit %d has invalid layer %d!\n", i, pfRecHits[i].layer());
0600 }
0601 }
0602 }
0603 alpaka::syncBlockThreads(acc);
0604
0605 for (int n = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]; n < nRHTopo; n += stride) {
0606 if (n < nSeeds)
0607 seeds[n] = pfClusteringVars[topoSeedBegin + n].topoSeedList();
0608 if (n < nRHNotSeed - 1)
0609 rechits[n] =
0610 fracView[pfClusteringVars[pfClusteringVars[topoSeedBegin].topoSeedList()].seedFracOffsets() + n + 1]
0611 .pfrhIdx();
0612 }
0613 alpaka::syncBlockThreads(acc);
0614
0615 if constexpr (debug) {
0616 if (once_per_block(acc)) {
0617 printf("\n===========================================================================================\n");
0618 printf("Processing topo cluster %d with nSeeds = %d nRHTopo = %d and seeds (", topoId, nSeeds, nRHTopo);
0619 for (int s = 0; s < nSeeds; s++) {
0620 if (s != 0)
0621 printf(", ");
0622 printf("%d", getSeedRhIdx(seeds, s));
0623 }
0624 if (nRHTopo == nSeeds) {
0625 printf(")\n\n");
0626 } else {
0627 printf(") and other rechits (");
0628 for (int r = 1; r < nRHNotSeed; r++) {
0629 if (r != 1)
0630 printf(", ");
0631 if (r <= 0) {
0632 printf("Invalid rhNum (%d) for get RhFracIdx!\n", r);
0633 }
0634 printf("%d", rechits[r - 1]);
0635 }
0636 printf(")\n\n");
0637 }
0638 }
0639 alpaka::syncBlockThreads(acc);
0640 }
0641
0642
0643 for (int s = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]; s < nSeeds; s += stride) {
0644 int i = seeds[s];
0645 clusterPos[s] = Position4{pfRecHits[i].x(), pfRecHits[i].y(), pfRecHits[i].z(), 1.0};
0646 prevClusterPos[s] = clusterPos[s];
0647 clusterEnergy[s] = pfRecHits[i].energy();
0648 for (int r = 0; r < (nRHNotSeed - 1); r++) {
0649 fracView[pfClusteringVars[i].seedFracOffsets() + r + 1].pfrhIdx() = rechits[r];
0650 fracView[pfClusteringVars[i].seedFracOffsets() + r + 1].frac() = -1.;
0651 }
0652 }
0653 alpaka::syncBlockThreads(acc);
0654
0655 do {
0656 if constexpr (debug) {
0657 if (once_per_block(acc))
0658 printf("\n--- Now on iter %d for topoId %d ---\n", iter, topoId);
0659 }
0660
0661 if (once_per_block(acc))
0662 diff2 = -1;
0663
0664 for (int tid = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]; tid < nRHNotSeed - 1; tid += stride) {
0665 rhFracSum[tid] = 0.;
0666 int rhThreadIdx = rechits[tid];
0667 Position4 rhThreadPos =
0668 Position4{pfRecHits[rhThreadIdx].x(), pfRecHits[rhThreadIdx].y(), pfRecHits[rhThreadIdx].z(), 1.};
0669 for (int s = 0; s < nSeeds; s++) {
0670 float dist2 = (clusterPos[s].x - rhThreadPos.x) * (clusterPos[s].x - rhThreadPos.x) +
0671 (clusterPos[s].y - rhThreadPos.y) * (clusterPos[s].y - rhThreadPos.y) +
0672 (clusterPos[s].z - rhThreadPos.z) * (clusterPos[s].z - rhThreadPos.z);
0673
0674 float d2 = dist2 / pfClusParams.showerSigma2();
0675 float fraction = clusterEnergy[s] * rhENormInv * expf(-0.5f * d2);
0676
0677 rhFracSum[tid] += fraction;
0678 }
0679 }
0680 alpaka::syncBlockThreads(acc);
0681
0682 for (int tid = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]; tid < nRHNotSeed - 1; tid += stride) {
0683 int rhThreadIdx = rechits[tid];
0684 Position4 rhThreadPos =
0685 Position4{pfRecHits[rhThreadIdx].x(), pfRecHits[rhThreadIdx].y(), pfRecHits[rhThreadIdx].z(), 1.};
0686 for (int s = 0; s < nSeeds; s++) {
0687 int i = seeds[s];
0688 float dist2 = (clusterPos[s].x - rhThreadPos.x) * (clusterPos[s].x - rhThreadPos.x) +
0689 (clusterPos[s].y - rhThreadPos.y) * (clusterPos[s].y - rhThreadPos.y) +
0690 (clusterPos[s].z - rhThreadPos.z) * (clusterPos[s].z - rhThreadPos.z);
0691
0692 float d2 = dist2 / pfClusParams.showerSigma2();
0693 float fraction = clusterEnergy[s] * rhENormInv * expf(-0.5f * d2);
0694
0695 if (rhFracSum[tid] > pfClusParams.minFracTot()) {
0696 float fracpct = fraction / rhFracSum[tid];
0697 if (fracpct > cutoffFraction || (d2 < cutoffDistance && fracpct > pfClusParams.minFracToKeep())) {
0698 fracView[pfClusteringVars[i].seedFracOffsets() + tid + 1].frac() = fracpct;
0699 } else {
0700 fracView[pfClusteringVars[i].seedFracOffsets() + tid + 1].frac() = -1;
0701 }
0702 } else {
0703 fracView[pfClusteringVars[i].seedFracOffsets() + tid + 1].frac() = -1;
0704 }
0705 }
0706 }
0707 alpaka::syncBlockThreads(acc);
0708
0709 if constexpr (debug) {
0710 if (once_per_block(acc))
0711 printf("Computing cluster position for topoId %d\n", topoId);
0712 }
0713
0714
0715 for (int s = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]; s < nSeeds; s += stride) {
0716 int seedRhIdx = getSeedRhIdx(seeds, s);
0717 float norm = logf(pfRecHits[seedRhIdx].energy() * rhENormInv);
0718 clusterPos[s] = Position4{
0719 pfRecHits[seedRhIdx].x() * norm, pfRecHits[seedRhIdx].y() * norm, pfRecHits[seedRhIdx].z() * norm, norm};
0720 clusterEnergy[s] = pfRecHits[seedRhIdx].energy();
0721 if constexpr (debug) {
0722 printf("Cluster %d (seed %d) has energy %f\tpos = (%f, %f, %f, %f)\n",
0723 s,
0724 seeds[s],
0725 clusterEnergy[s],
0726 clusterPos[s].x,
0727 clusterPos[s].y,
0728 clusterPos[s].z,
0729 clusterPos[s].w);
0730 }
0731 }
0732 alpaka::syncBlockThreads(acc);
0733
0734
0735 for (int s = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]; s < nSeeds; s += stride) {
0736 int seedRhIdx = getSeedRhIdx(seeds, s);
0737 for (int r = 0; r < nRHNotSeed - 1; r++) {
0738 int j = rechits[r];
0739 float frac = getRhFrac(pfClusteringVars, topoSeedBegin, fracView, s, r + 1);
0740
0741 if (frac > -0.5) {
0742 clusterEnergy[s] += frac * pfRecHits[j].energy();
0743
0744 if (nSeeds == 1 || j == pfRecHits[seedRhIdx].neighbours()(0) || j == pfRecHits[seedRhIdx].neighbours()(1) ||
0745 j == pfRecHits[seedRhIdx].neighbours()(2) || j == pfRecHits[seedRhIdx].neighbours()(3))
0746 updateClusterPos(pfClusParams, clusterPos[s], frac, j, pfRecHits, rhENormInv);
0747 }
0748 }
0749 }
0750 alpaka::syncBlockThreads(acc);
0751
0752
0753 for (int s = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]; s < nSeeds; s += stride) {
0754 if (clusterPos[s].w >= pfClusParams.minAllowedNormalization()) {
0755
0756 clusterPos[s].x /= clusterPos[s].w;
0757 clusterPos[s].y /= clusterPos[s].w;
0758 clusterPos[s].z /= clusterPos[s].w;
0759
0760 if constexpr (debug)
0761 printf("\tCluster %d (seed %d) energy = %f\tposition = (%f, %f, %f)\n",
0762 s,
0763 seeds[s],
0764 clusterEnergy[s],
0765 clusterPos[s].x,
0766 clusterPos[s].y,
0767 clusterPos[s].z);
0768 } else {
0769 if constexpr (debug)
0770 printf("\tCluster %d (seed %d) position norm (%f) less than minimum (%f)\n",
0771 s,
0772 seeds[s],
0773 clusterPos[s].w,
0774 pfClusParams.minAllowedNormalization());
0775 clusterPos[s].x = 0.0;
0776 clusterPos[s].y = 0.0;
0777 clusterPos[s].z = 0.0;
0778 }
0779 }
0780 alpaka::syncBlockThreads(acc);
0781
0782 for (int s = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]; s < nSeeds; s += stride) {
0783 float delta2 = dR2(prevClusterPos[s], clusterPos[s]);
0784 if constexpr (debug)
0785 printf("\tCluster %d (seed %d) has delta2 = %f\n", s, seeds[s], delta2);
0786 atomicMaxF(acc, &diff2, delta2);
0787 prevClusterPos[s] = clusterPos[s];
0788 }
0789 alpaka::syncBlockThreads(acc);
0790
0791 if (once_per_block(acc)) {
0792 float tol2 = tol * tol;
0793 iter++;
0794 notDone = (diff2 > tol2) && ((unsigned int)iter < pfClusParams.maxIterations());
0795 if constexpr (debug) {
0796 if (diff2 > tol2)
0797 printf("\tTopoId %d has diff2 = %f greater than squared tolerance %f (continuing)\n", topoId, diff2, tol2);
0798 else if constexpr (debug)
0799 printf("\tTopoId %d has diff2 = %f LESS than squared tolerance %f (terminating!)\n", topoId, diff2, tol2);
0800 }
0801 }
0802 alpaka::syncBlockThreads(acc);
0803 } while (notDone);
0804 if (once_per_block(acc))
0805 for (int s = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]; s < nSeeds; s += stride) {
0806 int rhIdx = pfClusteringVars[s + pfClusteringVars[topoId].topoSeedOffsets()].topoSeedList();
0807 int seedIdx = pfClusteringVars[rhIdx].rhIdxToSeedIdx();
0808 clusterView[seedIdx].energy() = pfRecHits[s].energy();
0809 clusterView[seedIdx].x() = pfRecHits[s].x();
0810 clusterView[seedIdx].y() = pfRecHits[s].y();
0811 clusterView[seedIdx].z() = pfRecHits[s].z();
0812 }
0813 alpaka::syncBlockThreads(acc);
0814 }
0815
0816
0817
0818 template <bool debug = false>
0819 ALPAKA_FN_ACC static void hcalFastCluster_multiSeedIterative(
0820 const Acc1D& acc,
0821 ::reco::PFClusterParamsSoA::ConstView pfClusParams,
0822 const reco::PFRecHitHCALTopologyDeviceCollection::ConstView topology,
0823 int topoId,
0824 int nSeeds,
0825 int nRHTopo,
0826 reco::PFRecHitDeviceCollection::ConstView pfRecHits,
0827 reco::PFClusteringVarsDeviceCollection::View pfClusteringVars,
0828 reco::PFClusterDeviceCollection::View clusterView,
0829 reco::PFRecHitFractionDeviceCollection::View fracView) {
0830 int& nRHNotSeed = alpaka::declareSharedVar<int, __COUNTER__>(acc);
0831 int& topoSeedBegin = alpaka::declareSharedVar<int, __COUNTER__>(acc);
0832 int& stride = alpaka::declareSharedVar<int, __COUNTER__>(acc);
0833 int& iter = alpaka::declareSharedVar<int, __COUNTER__>(acc);
0834 float& tol = alpaka::declareSharedVar<float, __COUNTER__>(acc);
0835 float& diff2 = alpaka::declareSharedVar<float, __COUNTER__>(acc);
0836 float& rhENormInv = alpaka::declareSharedVar<float, __COUNTER__>(acc);
0837 bool& notDone = alpaka::declareSharedVar<bool, __COUNTER__>(acc);
0838
0839 auto& clusterPos = alpaka::declareSharedVar<Position4[400], __COUNTER__>(acc);
0840 auto& prevClusterPos = alpaka::declareSharedVar<Position4[400], __COUNTER__>(acc);
0841 auto& clusterEnergy = alpaka::declareSharedVar<float[400], __COUNTER__>(acc);
0842 auto& rhFracSum = alpaka::declareSharedVar<float[1500], __COUNTER__>(acc);
0843 auto& seeds = alpaka::declareSharedVar<int[400], __COUNTER__>(acc);
0844 auto& rechits = alpaka::declareSharedVar<int[1500], __COUNTER__>(acc);
0845
0846 if (once_per_block(acc)) {
0847 nRHNotSeed = nRHTopo - nSeeds + 1;
0848 topoSeedBegin = pfClusteringVars[topoId].topoSeedOffsets();
0849 tol = pfClusParams.stoppingTolerance() *
0850 powf(fmaxf(1.0, nSeeds - 1), 2.0);
0851 stride = alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc)[0u];
0852 iter = 0;
0853 notDone = true;
0854
0855 int i = pfClusteringVars[topoSeedBegin].topoSeedList();
0856
0857 if (topology.cutsFromDB()) {
0858 rhENormInv = (1.f / topology[pfRecHits[i].denseId()].noiseThreshold());
0859 } else {
0860 if (pfRecHits[i].layer() == PFLayer::HCAL_BARREL1)
0861 rhENormInv = pfClusParams.recHitEnergyNormInvHB_vec()[pfRecHits[i].depth() - 1];
0862 else if (pfRecHits[i].layer() == PFLayer::HCAL_ENDCAP)
0863 rhENormInv = pfClusParams.recHitEnergyNormInvHE_vec()[pfRecHits[i].depth() - 1];
0864 else {
0865 rhENormInv = 0.;
0866 printf("Rechit %d has invalid layer %d!\n", i, pfRecHits[i].layer());
0867 }
0868 }
0869 }
0870 alpaka::syncBlockThreads(acc);
0871
0872 for (int n = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]; n < nRHTopo; n += stride) {
0873 if (n < nSeeds)
0874 seeds[n] = pfClusteringVars[topoSeedBegin + n].topoSeedList();
0875 if (n < nRHNotSeed - 1)
0876 rechits[n] =
0877 fracView[pfClusteringVars[pfClusteringVars[topoSeedBegin].topoSeedList()].seedFracOffsets() + n + 1]
0878 .pfrhIdx();
0879 }
0880 alpaka::syncBlockThreads(acc);
0881
0882 if constexpr (debug) {
0883 if (once_per_block(acc)) {
0884 printf("\n===========================================================================================\n");
0885 printf("Processing topo cluster %d with nSeeds = %d nRHTopo = %d and seeds (", topoId, nSeeds, nRHTopo);
0886 for (int s = 0; s < nSeeds; s++) {
0887 if (s != 0)
0888 printf(", ");
0889 printf("%d", getSeedRhIdx(seeds, s));
0890 }
0891 if (nRHTopo == nSeeds) {
0892 printf(")\n\n");
0893 } else {
0894 printf(") and other rechits (");
0895 for (int r = 1; r < nRHNotSeed; r++) {
0896 if (r != 1)
0897 printf(", ");
0898 if (r <= 0) {
0899 printf("Invalid rhNum (%d) for get RhFracIdx!\n", r);
0900 }
0901 printf("%d", rechits[r - 1]);
0902 }
0903 printf(")\n\n");
0904 }
0905 }
0906 alpaka::syncBlockThreads(acc);
0907 }
0908
0909
0910 for (int s = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]; s < nSeeds; s += stride) {
0911 int i = seeds[s];
0912 clusterPos[s] = Position4{pfRecHits[i].x(), pfRecHits[i].y(), pfRecHits[i].z(), 1.0};
0913 prevClusterPos[s] = clusterPos[s];
0914 clusterEnergy[s] = pfRecHits[i].energy();
0915 for (int r = 0; r < (nRHNotSeed - 1); r++) {
0916 fracView[pfClusteringVars[i].seedFracOffsets() + r + 1].pfrhIdx() = rechits[r];
0917 fracView[pfClusteringVars[i].seedFracOffsets() + r + 1].frac() = -1.;
0918 }
0919 }
0920 alpaka::syncBlockThreads(acc);
0921
0922 do {
0923 if constexpr (debug) {
0924 if (once_per_block(acc))
0925 printf("\n--- Now on iter %d for topoId %d ---\n", iter, topoId);
0926 }
0927
0928 if (once_per_block(acc))
0929 diff2 = -1;
0930
0931 for (int tid = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]; tid < nRHNotSeed - 1; tid += stride) {
0932 rhFracSum[tid] = 0.;
0933 int rhThreadIdx = rechits[tid];
0934 Position4 rhThreadPos =
0935 Position4{pfRecHits[rhThreadIdx].x(), pfRecHits[rhThreadIdx].y(), pfRecHits[rhThreadIdx].z(), 1.};
0936 for (int s = 0; s < nSeeds; s++) {
0937 float dist2 = (clusterPos[s].x - rhThreadPos.x) * (clusterPos[s].x - rhThreadPos.x) +
0938 (clusterPos[s].y - rhThreadPos.y) * (clusterPos[s].y - rhThreadPos.y) +
0939 (clusterPos[s].z - rhThreadPos.z) * (clusterPos[s].z - rhThreadPos.z);
0940
0941 float d2 = dist2 / pfClusParams.showerSigma2();
0942 float fraction = clusterEnergy[s] * rhENormInv * expf(-0.5f * d2);
0943
0944 rhFracSum[tid] += fraction;
0945 }
0946 }
0947 alpaka::syncBlockThreads(acc);
0948
0949 for (int tid = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]; tid < nRHNotSeed - 1; tid += stride) {
0950 int rhThreadIdx = rechits[tid];
0951 Position4 rhThreadPos =
0952 Position4{pfRecHits[rhThreadIdx].x(), pfRecHits[rhThreadIdx].y(), pfRecHits[rhThreadIdx].z(), 1.};
0953 for (int s = 0; s < nSeeds; s++) {
0954 int i = seeds[s];
0955 float dist2 = (clusterPos[s].x - rhThreadPos.x) * (clusterPos[s].x - rhThreadPos.x) +
0956 (clusterPos[s].y - rhThreadPos.y) * (clusterPos[s].y - rhThreadPos.y) +
0957 (clusterPos[s].z - rhThreadPos.z) * (clusterPos[s].z - rhThreadPos.z);
0958
0959 float d2 = dist2 / pfClusParams.showerSigma2();
0960 float fraction = clusterEnergy[s] * rhENormInv * expf(-0.5f * d2);
0961
0962 if (rhFracSum[tid] > pfClusParams.minFracTot()) {
0963 float fracpct = fraction / rhFracSum[tid];
0964 if (fracpct > cutoffFraction || (d2 < cutoffDistance && fracpct > pfClusParams.minFracToKeep())) {
0965 fracView[pfClusteringVars[i].seedFracOffsets() + tid + 1].frac() = fracpct;
0966 } else {
0967 fracView[pfClusteringVars[i].seedFracOffsets() + tid + 1].frac() = -1;
0968 }
0969 } else {
0970 fracView[pfClusteringVars[i].seedFracOffsets() + tid + 1].frac() = -1;
0971 }
0972 }
0973 }
0974 alpaka::syncBlockThreads(acc);
0975
0976 if constexpr (debug) {
0977 if (once_per_block(acc))
0978 printf("Computing cluster position for topoId %d\n", topoId);
0979 }
0980
0981
0982 for (int s = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]; s < nSeeds; s += stride) {
0983 int seedRhIdx = getSeedRhIdx(seeds, s);
0984 float norm = logf(pfRecHits[seedRhIdx].energy() * rhENormInv);
0985 clusterPos[s] = Position4{
0986 pfRecHits[seedRhIdx].x() * norm, pfRecHits[seedRhIdx].y() * norm, pfRecHits[seedRhIdx].z() * norm, norm};
0987 clusterEnergy[s] = pfRecHits[seedRhIdx].energy();
0988 if constexpr (debug) {
0989 printf("Cluster %d (seed %d) has energy %f\tpos = (%f, %f, %f, %f)\n",
0990 s,
0991 seeds[s],
0992 clusterEnergy[s],
0993 clusterPos[s].x,
0994 clusterPos[s].y,
0995 clusterPos[s].z,
0996 clusterPos[s].w);
0997 }
0998 }
0999 alpaka::syncBlockThreads(acc);
1000
1001
1002 for (int s = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]; s < nSeeds; s += stride) {
1003 int seedRhIdx = getSeedRhIdx(seeds, s);
1004 for (int r = 0; r < nRHNotSeed - 1; r++) {
1005 int j = rechits[r];
1006 float frac = getRhFrac(pfClusteringVars, topoSeedBegin, fracView, s, r + 1);
1007
1008 if (frac > -0.5) {
1009 clusterEnergy[s] += frac * pfRecHits[j].energy();
1010
1011 if (nSeeds == 1 || j == pfRecHits[seedRhIdx].neighbours()(0) || j == pfRecHits[seedRhIdx].neighbours()(1) ||
1012 j == pfRecHits[seedRhIdx].neighbours()(2) || j == pfRecHits[seedRhIdx].neighbours()(3))
1013 updateClusterPos(pfClusParams, clusterPos[s], frac, j, pfRecHits, rhENormInv);
1014 }
1015 }
1016 }
1017 alpaka::syncBlockThreads(acc);
1018
1019
1020 for (int s = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]; s < nSeeds; s += stride) {
1021 if (clusterPos[s].w >= pfClusParams.minAllowedNormalization()) {
1022
1023 clusterPos[s].x /= clusterPos[s].w;
1024 clusterPos[s].y /= clusterPos[s].w;
1025 clusterPos[s].z /= clusterPos[s].w;
1026
1027 if constexpr (debug)
1028 printf("\tCluster %d (seed %d) energy = %f\tposition = (%f, %f, %f)\n",
1029 s,
1030 seeds[s],
1031 clusterEnergy[s],
1032 clusterPos[s].x,
1033 clusterPos[s].y,
1034 clusterPos[s].z);
1035 } else {
1036 if constexpr (debug)
1037 printf("\tCluster %d (seed %d) position norm (%f) less than minimum (%f)\n",
1038 s,
1039 seeds[s],
1040 clusterPos[s].w,
1041 pfClusParams.minAllowedNormalization());
1042 clusterPos[s].x = 0.0;
1043 clusterPos[s].y = 0.0;
1044 clusterPos[s].z = 0.0;
1045 }
1046 }
1047 alpaka::syncBlockThreads(acc);
1048
1049 for (int s = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]; s < nSeeds; s += stride) {
1050 float delta2 = dR2(prevClusterPos[s], clusterPos[s]);
1051 if constexpr (debug)
1052 printf("\tCluster %d (seed %d) has delta2 = %f\n", s, seeds[s], delta2);
1053 atomicMaxF(acc, &diff2, delta2);
1054 prevClusterPos[s] = clusterPos[s];
1055 }
1056 alpaka::syncBlockThreads(acc);
1057
1058 if (once_per_block(acc)) {
1059 float tol2 = tol * tol;
1060 iter++;
1061 notDone = (diff2 > tol2) && ((unsigned int)iter < pfClusParams.maxIterations());
1062 if constexpr (debug) {
1063 if (diff2 > tol2)
1064 printf("\tTopoId %d has diff2 = %f greater than tolerance %f (continuing)\n", topoId, diff2, tol2);
1065 else if constexpr (debug)
1066 printf("\tTopoId %d has diff2 = %f LESS than tolerance %f (terminating!)\n", topoId, diff2, tol2);
1067 }
1068 }
1069 alpaka::syncBlockThreads(acc);
1070 } while (notDone);
1071 if (once_per_block(acc))
1072 for (int s = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]; s < nSeeds; s += stride) {
1073 int rhIdx = pfClusteringVars[s + pfClusteringVars[topoId].topoSeedOffsets()].topoSeedList();
1074 int seedIdx = pfClusteringVars[rhIdx].rhIdxToSeedIdx();
1075 clusterView[seedIdx].energy() = pfRecHits[s].energy();
1076 clusterView[seedIdx].x() = pfRecHits[s].x();
1077 clusterView[seedIdx].y() = pfRecHits[s].y();
1078 clusterView[seedIdx].z() = pfRecHits[s].z();
1079 }
1080 }
1081
1082
1083 class SeedingTopoThresh {
1084 public:
1085 ALPAKA_FN_ACC void operator()(Acc1D const& acc,
1086 reco::PFClusteringVarsDeviceCollection::View pfClusteringVars,
1087 const ::reco::PFClusterParamsSoA::ConstView pfClusParams,
1088 const reco::PFRecHitHCALTopologyDeviceCollection::ConstView topology,
1089 const reco::PFRecHitDeviceCollection::ConstView pfRecHits,
1090 reco::PFClusterDeviceCollection::View clusterView,
1091 uint32_t* __restrict__ nSeeds) const {
1092 const int nRH = pfRecHits.size();
1093
1094 if (once_per_grid(acc)) {
1095 clusterView.size() = nRH;
1096 }
1097
1098 for (auto i : uniform_elements(acc, nRH)) {
1099
1100 pfClusteringVars[i].pfrh_isSeed() = 0;
1101 pfClusteringVars[i].rhCount() = 0;
1102 pfClusteringVars[i].topoSeedCount() = 0;
1103 pfClusteringVars[i].topoRHCount() = 0;
1104 pfClusteringVars[i].seedFracOffsets() = -1;
1105 pfClusteringVars[i].topoSeedOffsets() = -1;
1106 pfClusteringVars[i].topoSeedList() = -1;
1107 clusterView[i].seedRHIdx() = -1;
1108
1109 int layer = pfRecHits[i].layer();
1110 int depthOffset = pfRecHits[i].depth() - 1;
1111 float energy = pfRecHits[i].energy();
1112 Position3 pos = Position3{pfRecHits[i].x(), pfRecHits[i].y(), pfRecHits[i].z()};
1113 float seedThreshold = 9999.;
1114 float topoThreshold = 9999.;
1115
1116 if (topology.cutsFromDB()) {
1117 seedThreshold = topology[pfRecHits[i].denseId()].seedThreshold();
1118 topoThreshold = topology[pfRecHits[i].denseId()].noiseThreshold();
1119 } else {
1120 if (layer == PFLayer::HCAL_BARREL1) {
1121 seedThreshold = pfClusParams.seedEThresholdHB_vec()[depthOffset];
1122 topoThreshold = pfClusParams.topoEThresholdHB_vec()[depthOffset];
1123 } else if (layer == PFLayer::HCAL_ENDCAP) {
1124 seedThreshold = pfClusParams.seedEThresholdHE_vec()[depthOffset];
1125 topoThreshold = pfClusParams.topoEThresholdHE_vec()[depthOffset];
1126 }
1127 }
1128
1129
1130 float pt2 = energy * energy * (pos.x * pos.x + pos.y * pos.y) / (pos.x * pos.x + pos.y * pos.y + pos.z * pos.z);
1131
1132
1133 if ((layer == PFLayer::HCAL_BARREL1 && energy > seedThreshold && pt2 > pfClusParams.seedPt2ThresholdHB()) ||
1134 (layer == PFLayer::HCAL_ENDCAP && energy > seedThreshold && pt2 > pfClusParams.seedPt2ThresholdHE())) {
1135 pfClusteringVars[i].pfrh_isSeed() = 1;
1136 for (int k = 0; k < 4; k++) {
1137 if (pfRecHits[i].neighbours()(k) < 0)
1138 continue;
1139 if (energy < pfRecHits[pfRecHits[i].neighbours()(k)].energy()) {
1140 pfClusteringVars[i].pfrh_isSeed() = 0;
1141 break;
1142 }
1143 }
1144 if (pfClusteringVars[i].pfrh_isSeed())
1145 alpaka::atomicAdd(acc, nSeeds, 1u);
1146 }
1147
1148
1149 if ((layer == PFLayer::HCAL_ENDCAP && energy > topoThreshold) ||
1150 (layer == PFLayer::HCAL_BARREL1 && energy > topoThreshold)) {
1151 pfClusteringVars[i].pfrh_passTopoThresh() = true;
1152 pfClusteringVars[i].pfrh_topoId() = i;
1153 } else {
1154 pfClusteringVars[i].pfrh_passTopoThresh() = false;
1155 pfClusteringVars[i].pfrh_topoId() = -1;
1156 }
1157 }
1158 }
1159 };
1160
1161
1162 class PrepareTopoInputs {
1163 public:
1164 ALPAKA_FN_ACC void operator()(Acc1D const& acc,
1165 const reco::PFRecHitDeviceCollection::ConstView pfRecHits,
1166 reco::PFClusteringVarsDeviceCollection::View pfClusteringVars,
1167 reco::PFClusteringEdgeVarsDeviceCollection::View pfClusteringEdgeVars,
1168 uint32_t* __restrict__ nSeeds) const {
1169 const int nRH = pfRecHits.size();
1170
1171 if (once_per_grid(acc)) {
1172 pfClusteringVars.nEdges() = nRH * 8;
1173 pfClusteringEdgeVars[nRH].pfrh_edgeIdx() = nRH * 8;
1174 }
1175 for (uint32_t i : cms::alpakatools::uniform_elements(acc, nRH)) {
1176 pfClusteringEdgeVars[i].pfrh_edgeIdx() = i * 8;
1177 pfClusteringVars[i].pfrh_topoId() = 0;
1178 for (int j = 0; j < 8; j++) {
1179 if (pfRecHits[i].neighbours()(j) == -1)
1180 pfClusteringEdgeVars[i * 8 + j].pfrh_edgeList() = i;
1181 else
1182 pfClusteringEdgeVars[i * 8 + j].pfrh_edgeList() = pfRecHits[i].neighbours()(j);
1183 }
1184 }
1185
1186 return;
1187 }
1188 };
1189
1190
1191 class TopoClusterContraction {
1192 public:
1193 ALPAKA_FN_ACC void operator()(Acc1D const& acc,
1194 const reco::PFRecHitDeviceCollection::ConstView pfRecHits,
1195 reco::PFClusteringVarsDeviceCollection::View pfClusteringVars,
1196 reco::PFClusterDeviceCollection::View clusterView,
1197 uint32_t* __restrict__ nSeeds,
1198 uint32_t* __restrict__ nRHF) const {
1199 const int nRH = pfRecHits.size();
1200 int& totalSeedOffset = alpaka::declareSharedVar<int, __COUNTER__>(acc);
1201 int& totalSeedFracOffset = alpaka::declareSharedVar<int, __COUNTER__>(acc);
1202
1203
1204 if (once_per_block(acc)) {
1205 pfClusteringVars.nTopos() = 0;
1206 pfClusteringVars.nRHFracs() = 0;
1207 totalSeedOffset = 0;
1208 totalSeedFracOffset = 0;
1209 pfClusteringVars.pcrhFracSize() = 0;
1210 }
1211
1212 alpaka::syncBlockThreads(acc);
1213
1214
1215
1216 for (int rhIdx = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]; rhIdx < nRH;
1217 rhIdx += alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc)[0u]) {
1218 pfClusteringVars[rhIdx].rhIdxToSeedIdx() = -1;
1219 int topoId = pfClusteringVars[rhIdx].pfrh_topoId();
1220 if (topoId > -1) {
1221
1222 alpaka::atomicAdd(acc, &pfClusteringVars[topoId].topoRHCount(), 1);
1223
1224 if (topoId == rhIdx) {
1225 int topoIdx = alpaka::atomicAdd(acc, &pfClusteringVars.nTopos(), 1);
1226 pfClusteringVars[topoIdx].topoIds() =
1227 topoId;
1228 }
1229
1230 if (pfClusteringVars[rhIdx].pfrh_isSeed()) {
1231 alpaka::atomicAdd(acc, &pfClusteringVars[topoId].topoSeedCount(), 1);
1232 }
1233 }
1234 }
1235
1236 alpaka::syncBlockThreads(acc);
1237
1238
1239 for (int topoId = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]; topoId < nRH;
1240 topoId += alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc)[0u]) {
1241 if (pfClusteringVars[topoId].topoSeedCount() > 0) {
1242
1243 int offset = alpaka::atomicAdd(acc, &totalSeedOffset, pfClusteringVars[topoId].topoSeedCount());
1244 pfClusteringVars[topoId].topoSeedOffsets() = offset;
1245 }
1246 }
1247 alpaka::syncBlockThreads(acc);
1248
1249
1250
1251 for (int rhIdx = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]; rhIdx < nRH;
1252 rhIdx += alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc)[0u]) {
1253 int topoId = pfClusteringVars[rhIdx].pfrh_topoId();
1254 if (pfClusteringVars[rhIdx].pfrh_isSeed()) {
1255
1256 int k = alpaka::atomicAdd(acc, &pfClusteringVars[topoId].rhCount(), 1);
1257 int seedIdx = pfClusteringVars[topoId].topoSeedOffsets() + k;
1258 if ((unsigned int)seedIdx >= *nSeeds)
1259 printf("Warning(contraction) %8d > %8d should not happen, check topoId: %d has %d rh\n",
1260 seedIdx,
1261 *nSeeds,
1262 topoId,
1263 k);
1264 pfClusteringVars[seedIdx].topoSeedList() = rhIdx;
1265 pfClusteringVars[rhIdx].rhIdxToSeedIdx() = seedIdx;
1266 clusterView[seedIdx].topoId() = topoId;
1267 clusterView[seedIdx].seedRHIdx() = rhIdx;
1268 clusterView[seedIdx].depth() = pfRecHits[rhIdx].depth();
1269 }
1270 }
1271
1272 alpaka::syncBlockThreads(acc);
1273
1274
1275 for (int rhIdx = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]; rhIdx < nRH;
1276 rhIdx += alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc)[0u]) {
1277 pfClusteringVars[rhIdx].rhCount() = 1;
1278
1279 int topoId = pfClusteringVars[rhIdx].pfrh_topoId();
1280 if (pfClusteringVars[rhIdx].pfrh_isSeed() && topoId > -1) {
1281
1282 int offset = alpaka::atomicAdd(acc, &totalSeedFracOffset, pfClusteringVars[topoId].topoRHCount());
1283
1284
1285 pfClusteringVars[rhIdx].seedFracOffsets() = offset;
1286
1287
1288 clusterView[pfClusteringVars[rhIdx].rhIdxToSeedIdx()].rhfracOffset() =
1289 pfClusteringVars[rhIdx].seedFracOffsets();
1290 clusterView[pfClusteringVars[rhIdx].rhIdxToSeedIdx()].rhfracSize() =
1291 pfClusteringVars[topoId].topoRHCount() - pfClusteringVars[topoId].topoSeedCount() + 1;
1292 }
1293 }
1294
1295 alpaka::syncBlockThreads(acc);
1296
1297 if (once_per_block(acc)) {
1298 pfClusteringVars.pcrhFracSize() = totalSeedFracOffset;
1299 pfClusteringVars.nRHFracs() = totalSeedFracOffset;
1300 clusterView.nRHFracs() = totalSeedFracOffset;
1301 *nRHF = totalSeedFracOffset;
1302 clusterView.nSeeds() = *nSeeds;
1303 clusterView.nTopos() = pfClusteringVars.nTopos();
1304 }
1305 }
1306 };
1307
1308
1309
1310 class FillRhfIndex {
1311 public:
1312 ALPAKA_FN_ACC void operator()(Acc2D const& acc,
1313 const reco::PFRecHitDeviceCollection::ConstView pfRecHits,
1314 reco::PFClusteringVarsDeviceCollection::View pfClusteringVars,
1315 reco::PFRecHitFractionDeviceCollection::View fracView) const {
1316 const int nRH = pfRecHits.size();
1317
1318 for (auto index : uniform_elements_nd(acc, {nRH, nRH})) {
1319 const int i = index[0u];
1320 const int j = index[1u];
1321 int topoId = pfClusteringVars[i].pfrh_topoId();
1322 if (topoId > -1 && pfClusteringVars[i].pfrh_isSeed() && topoId == pfClusteringVars[j].pfrh_topoId()) {
1323 if (!pfClusteringVars[j].pfrh_isSeed()) {
1324 int k = alpaka::atomicAdd(
1325 acc, &pfClusteringVars[i].rhCount(), 1);
1326 auto fraction = fracView[pfClusteringVars[i].seedFracOffsets() + k];
1327 fraction.pfrhIdx() = j;
1328 fraction.pfcIdx() = pfClusteringVars[i].rhIdxToSeedIdx();
1329 } else if (i == j) {
1330 auto seed = fracView[pfClusteringVars[i].seedFracOffsets()];
1331 seed.pfrhIdx() = j;
1332 seed.frac() = 1;
1333 seed.pfcIdx() = pfClusteringVars[i].rhIdxToSeedIdx();
1334 }
1335 }
1336 }
1337 }
1338 };
1339
1340 class FastCluster {
1341 public:
1342 template <bool debug = false, typename = std::enable_if<!std::is_same_v<Device, alpaka::DevCpu>>>
1343 ALPAKA_FN_ACC void operator()(Acc1D const& acc,
1344 const reco::PFRecHitDeviceCollection::ConstView pfRecHits,
1345 const ::reco::PFClusterParamsSoA::ConstView pfClusParams,
1346 const reco::PFRecHitHCALTopologyDeviceCollection::ConstView topology,
1347 reco::PFClusteringVarsDeviceCollection::View pfClusteringVars,
1348 reco::PFClusterDeviceCollection::View clusterView,
1349 reco::PFRecHitFractionDeviceCollection::View fracView) const {
1350 const int nRH = pfRecHits.size();
1351 int& topoId = alpaka::declareSharedVar<int, __COUNTER__>(acc);
1352 int& nRHTopo = alpaka::declareSharedVar<int, __COUNTER__>(acc);
1353 int& nSeeds = alpaka::declareSharedVar<int, __COUNTER__>(acc);
1354
1355 if (once_per_block(acc)) {
1356 topoId = alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[0u];
1357 nRHTopo = pfClusteringVars[topoId].topoRHCount();
1358 nSeeds = pfClusteringVars[topoId].topoSeedCount();
1359 }
1360
1361 alpaka::syncBlockThreads(acc);
1362
1363 if (topoId < nRH && nRHTopo > 0 && nSeeds > 0) {
1364 if (nRHTopo == nSeeds) {
1365
1366 if (once_per_block(acc)) {
1367
1368 int rhIdx = pfClusteringVars[pfClusteringVars[topoId].topoSeedOffsets()].topoSeedList();
1369 int seedIdx = pfClusteringVars[rhIdx].rhIdxToSeedIdx();
1370 clusterView[seedIdx].energy() = pfRecHits[rhIdx].energy();
1371 clusterView[seedIdx].x() = pfRecHits[rhIdx].x();
1372 clusterView[seedIdx].y() = pfRecHits[rhIdx].y();
1373 clusterView[seedIdx].z() = pfRecHits[rhIdx].z();
1374 }
1375
1376 } else if ((not std::is_same_v<Device, alpaka::DevCpu>) && nSeeds == 1) {
1377
1378 hcalFastCluster_singleSeed(
1379 acc, pfClusParams, topology, topoId, nRHTopo, pfRecHits, pfClusteringVars, clusterView, fracView);
1380 } else if ((not std::is_same_v<Device, alpaka::DevCpu>) && nSeeds <= 100 &&
1381 nRHTopo - nSeeds < threadsPerBlockForClustering) {
1382 hcalFastCluster_multiSeedParallel(
1383 acc, pfClusParams, topology, topoId, nSeeds, nRHTopo, pfRecHits, pfClusteringVars, clusterView, fracView);
1384 } else if (nSeeds <= 400 && nRHTopo - nSeeds <= 1500) {
1385
1386 hcalFastCluster_multiSeedIterative(
1387 acc, pfClusParams, topology, topoId, nSeeds, nRHTopo, pfRecHits, pfClusteringVars, clusterView, fracView);
1388 } else {
1389 if constexpr (debug) {
1390 if (once_per_block(acc))
1391 printf("Topo cluster %d has %d seeds and %d rechits. Will be processed in next kernel.\n",
1392 topoId,
1393 nSeeds,
1394 nRHTopo);
1395 }
1396 }
1397 }
1398 }
1399 };
1400
1401
1402 class FastClusterExotic {
1403 public:
1404 ALPAKA_FN_ACC void operator()(Acc1D const& acc,
1405 const reco::PFRecHitDeviceCollection::ConstView pfRecHits,
1406 const ::reco::PFClusterParamsSoA::ConstView pfClusParams,
1407 const reco::PFRecHitHCALTopologyDeviceCollection::ConstView topology,
1408 reco::PFClusteringVarsDeviceCollection::View pfClusteringVars,
1409 reco::PFClusterDeviceCollection::View clusterView,
1410 reco::PFRecHitFractionDeviceCollection::View fracView,
1411 Position4* __restrict__ globalClusterPos,
1412 Position4* __restrict__ globalPrevClusterPos,
1413 float* __restrict__ globalClusterEnergy,
1414 float* __restrict__ globalRhFracSum,
1415 int* __restrict__ globalSeeds,
1416 int* __restrict__ globalRechits) const {
1417 const int nRH = pfRecHits.size();
1418 for (int topoId = alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[0u]; topoId < nRH;
1419 topoId += blocksForExoticClusters) {
1420 int nRHTopo = pfClusteringVars[topoId].topoRHCount();
1421 int nSeeds = pfClusteringVars[topoId].topoSeedCount();
1422
1423
1424 if (nRHTopo > 0 && (nSeeds > 400 || nRHTopo - nSeeds > 1500)) {
1425 hcalFastCluster_exotic(acc,
1426 pfClusParams,
1427 topology,
1428 topoId,
1429 nSeeds,
1430 nRHTopo,
1431 pfRecHits,
1432 pfClusteringVars,
1433 clusterView,
1434 fracView,
1435 globalClusterPos,
1436 globalPrevClusterPos,
1437 globalClusterEnergy,
1438 globalRhFracSum,
1439 globalSeeds,
1440 globalRechits);
1441 }
1442 alpaka::syncBlockThreads(acc);
1443 }
1444 }
1445 };
1446
1447 PFClusterProducerKernel::PFClusterProducerKernel(Queue& queue)
1448 : nSeeds(cms::alpakatools::make_device_buffer<uint32_t>(queue)),
1449 globalClusterPos(
1450 cms::alpakatools::make_device_buffer<Position4[]>(queue, blocksForExoticClusters * maxTopoInput)),
1451 globalPrevClusterPos(
1452 cms::alpakatools::make_device_buffer<Position4[]>(queue, blocksForExoticClusters * maxTopoInput)),
1453 globalClusterEnergy(
1454 cms::alpakatools::make_device_buffer<float[]>(queue, blocksForExoticClusters * maxTopoInput)),
1455 globalRhFracSum(cms::alpakatools::make_device_buffer<float[]>(queue, blocksForExoticClusters * maxTopoInput)),
1456 globalSeeds(cms::alpakatools::make_device_buffer<int[]>(queue, blocksForExoticClusters * maxTopoInput)),
1457 globalRechits(cms::alpakatools::make_device_buffer<int[]>(queue, blocksForExoticClusters * maxTopoInput)) {
1458 alpaka::memset(queue, nSeeds, 0x00);
1459 }
1460
1461 void PFClusterProducerKernel::seedTopoAndContract(Queue& queue,
1462 const ::reco::PFClusterParamsSoA::ConstView params,
1463 const reco::PFRecHitHCALTopologyDeviceCollection& topology,
1464 reco::PFClusteringVarsDeviceCollection& pfClusteringVars,
1465 reco::PFClusteringEdgeVarsDeviceCollection& pfClusteringEdgeVars,
1466 const reco::PFRecHitDeviceCollection& pfRecHits,
1467 int nRH,
1468 reco::PFClusterDeviceCollection& pfClusters,
1469 uint32_t* __restrict__ nRHF) {
1470 const int threadsPerBlock = 256;
1471 const int blocks = divide_up_by(nRH, threadsPerBlock);
1472
1473
1474 alpaka::exec<Acc1D>(queue,
1475 make_workdiv<Acc1D>(blocks, threadsPerBlock),
1476 SeedingTopoThresh{},
1477 pfClusteringVars.view(),
1478 params,
1479 topology.view(),
1480 pfRecHits.view(),
1481 pfClusters.view(),
1482 nSeeds.data());
1483
1484 alpaka::exec<Acc1D>(queue,
1485 make_workdiv<Acc1D>(blocks, threadsPerBlock),
1486 PrepareTopoInputs{},
1487 pfRecHits.view(),
1488 pfClusteringVars.view(),
1489 pfClusteringEdgeVars.view(),
1490 nSeeds.data());
1491
1492 alpaka::exec<Acc1D>(queue,
1493 make_workdiv<Acc1D>(blocks, threadsPerBlock),
1494 ECLCCInit{},
1495 pfRecHits.view(),
1496 pfClusteringVars.view(),
1497 pfClusteringEdgeVars.view());
1498 alpaka::exec<Acc1D>(queue,
1499 make_workdiv<Acc1D>(blocks, threadsPerBlock),
1500 ECLCCCompute1{},
1501 pfRecHits.view(),
1502 pfClusteringVars.view(),
1503 pfClusteringEdgeVars.view());
1504 alpaka::exec<Acc1D>(queue,
1505 make_workdiv<Acc1D>(blocks, threadsPerBlock),
1506 ECLCCFlatten{},
1507 pfRecHits.view(),
1508 pfClusteringVars.view(),
1509 pfClusteringEdgeVars.view());
1510
1511 alpaka::exec<Acc1D>(queue,
1512 make_workdiv<Acc1D>(1, threadsPerBlockForClustering),
1513 TopoClusterContraction{},
1514 pfRecHits.view(),
1515 pfClusteringVars.view(),
1516 pfClusters.view(),
1517 nSeeds.data(),
1518 nRHF);
1519 }
1520
1521 void PFClusterProducerKernel::cluster(Queue& queue,
1522 const ::reco::PFClusterParamsSoA::ConstView params,
1523 const reco::PFRecHitHCALTopologyDeviceCollection& topology,
1524 reco::PFClusteringVarsDeviceCollection& pfClusteringVars,
1525 reco::PFClusteringEdgeVarsDeviceCollection& pfClusteringEdgeVars,
1526 const reco::PFRecHitDeviceCollection& pfRecHits,
1527 int nRH,
1528 reco::PFClusterDeviceCollection& pfClusters,
1529 reco::PFRecHitFractionDeviceCollection& pfrhFractions) {
1530
1531 alpaka::exec<Acc2D>(queue,
1532 make_workdiv<Acc2D>({divide_up_by(nRH, 32), divide_up_by(nRH, 32)}, {32, 32}),
1533 FillRhfIndex{},
1534 pfRecHits.view(),
1535 pfClusteringVars.view(),
1536 pfrhFractions.view());
1537
1538
1539 alpaka::exec<Acc1D>(queue,
1540 make_workdiv<Acc1D>(nRH, threadsPerBlockForClustering),
1541 FastCluster{},
1542 pfRecHits.view(),
1543 params,
1544 topology.view(),
1545 pfClusteringVars.view(),
1546 pfClusters.view(),
1547 pfrhFractions.view());
1548
1549 alpaka::exec<Acc1D>(queue,
1550 make_workdiv<Acc1D>(blocksForExoticClusters,
1551 threadsPerBlockForClustering),
1552 FastClusterExotic{},
1553 pfRecHits.view(),
1554 params,
1555 topology.view(),
1556 pfClusteringVars.view(),
1557 pfClusters.view(),
1558 pfrhFractions.view(),
1559 globalClusterPos.data(),
1560 globalPrevClusterPos.data(),
1561 globalClusterEnergy.data(),
1562 globalRhFracSum.data(),
1563 globalSeeds.data(),
1564 globalRechits.data());
1565 }
1566
1567 }