File indexing completed on 2024-12-05 02:48:05
0001 #ifndef RecoTracker_LSTCore_src_alpaka_Kernels_h
0002 #define RecoTracker_LSTCore_src_alpaka_Kernels_h
0003
0004 #include "RecoTracker/LSTCore/interface/alpaka/Common.h"
0005 #include "RecoTracker/LSTCore/interface/ModulesSoA.h"
0006 #include "RecoTracker/LSTCore/interface/ObjectRangesSoA.h"
0007 #include "RecoTracker/LSTCore/interface/MiniDoubletsSoA.h"
0008 #include "RecoTracker/LSTCore/interface/PixelQuintupletsSoA.h"
0009 #include "RecoTracker/LSTCore/interface/PixelTripletsSoA.h"
0010 #include "RecoTracker/LSTCore/interface/QuintupletsSoA.h"
0011 #include "RecoTracker/LSTCore/interface/SegmentsSoA.h"
0012 #include "RecoTracker/LSTCore/interface/TripletsSoA.h"
0013
0014 namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
0015 ALPAKA_FN_ACC ALPAKA_FN_INLINE void rmQuintupletFromMemory(Quintuplets quintuplets,
0016 unsigned int quintupletIndex,
0017 bool secondpass = false) {
0018 quintuplets.isDup()[quintupletIndex] |= 1 + secondpass;
0019 }
0020
0021 ALPAKA_FN_ACC ALPAKA_FN_INLINE void rmPixelTripletFromMemory(PixelTriplets pixelTriplets,
0022 unsigned int pixelTripletIndex) {
0023 pixelTriplets.isDup()[pixelTripletIndex] = true;
0024 }
0025
0026 ALPAKA_FN_ACC ALPAKA_FN_INLINE void rmPixelQuintupletFromMemory(PixelQuintuplets pixelQuintuplets,
0027 unsigned int pixelQuintupletIndex) {
0028 pixelQuintuplets.isDup()[pixelQuintupletIndex] = true;
0029 }
0030
0031 ALPAKA_FN_ACC ALPAKA_FN_INLINE void rmPixelSegmentFromMemory(SegmentsPixel segmentsPixel,
0032 unsigned int pixelSegmentArrayIndex,
0033 bool secondpass = false) {
0034 segmentsPixel.isDup()[pixelSegmentArrayIndex] |= 1 + secondpass;
0035 }
0036
0037 ALPAKA_FN_ACC ALPAKA_FN_INLINE int checkHitsT5(unsigned int ix, unsigned int jx, QuintupletsConst quintuplets) {
0038 unsigned int hits1[Params_T5::kHits];
0039 unsigned int hits2[Params_T5::kHits];
0040
0041 for (int i = 0; i < Params_T5::kHits; i++) {
0042 hits1[i] = quintuplets.hitIndices()[ix][i];
0043 hits2[i] = quintuplets.hitIndices()[jx][i];
0044 }
0045
0046 int nMatched = 0;
0047 for (int i = 0; i < Params_T5::kHits; i++) {
0048 bool matched = false;
0049 for (int j = 0; j < Params_T5::kHits; j++) {
0050 if (hits1[i] == hits2[j]) {
0051 matched = true;
0052 break;
0053 }
0054 }
0055 if (matched) {
0056 nMatched++;
0057 }
0058 }
0059 return nMatched;
0060 }
0061
0062 ALPAKA_FN_ACC ALPAKA_FN_INLINE int checkHitspT5(unsigned int ix,
0063 unsigned int jx,
0064 PixelQuintupletsConst pixelQuintuplets) {
0065 unsigned int hits1[Params_pT5::kHits];
0066 unsigned int hits2[Params_pT5::kHits];
0067
0068 for (int i = 0; i < Params_pT5::kHits; i++) {
0069 hits1[i] = pixelQuintuplets.hitIndices()[ix][i];
0070 hits2[i] = pixelQuintuplets.hitIndices()[jx][i];
0071 }
0072
0073 int nMatched = 0;
0074 for (int i = 0; i < Params_pT5::kHits; i++) {
0075 bool matched = false;
0076 for (int j = 0; j < Params_pT5::kHits; j++) {
0077 if (hits1[i] == hits2[j]) {
0078 matched = true;
0079 break;
0080 }
0081 }
0082 if (matched) {
0083 nMatched++;
0084 }
0085 }
0086 return nMatched;
0087 }
0088
0089 ALPAKA_FN_ACC ALPAKA_FN_INLINE void checkHitspT3(unsigned int ix,
0090 unsigned int jx,
0091 PixelTripletsConst pixelTriplets,
0092 int* matched) {
0093 int phits1[Params_pLS::kHits];
0094 int phits2[Params_pLS::kHits];
0095
0096 for (int i = 0; i < Params_pLS::kHits; i++) {
0097 phits1[i] = pixelTriplets.hitIndices()[ix][i];
0098 phits2[i] = pixelTriplets.hitIndices()[jx][i];
0099 }
0100
0101 int npMatched = 0;
0102 for (int i = 0; i < Params_pLS::kHits; i++) {
0103 bool pmatched = false;
0104 for (int j = 0; j < Params_pLS::kHits; j++) {
0105 if (phits1[i] == phits2[j]) {
0106 pmatched = true;
0107 break;
0108 }
0109 }
0110 if (pmatched) {
0111 npMatched++;
0112 }
0113 }
0114
0115 int hits1[Params_T3::kHits];
0116 int hits2[Params_T3::kHits];
0117
0118 for (int i = 0; i < Params_T3::kHits; i++) {
0119 hits1[i] = pixelTriplets.hitIndices()[ix][i + 4];
0120 hits2[i] = pixelTriplets.hitIndices()[jx][i + 4];
0121 }
0122
0123 int nMatched = 0;
0124 for (int i = 0; i < Params_T3::kHits; i++) {
0125 bool tmatched = false;
0126 for (int j = 0; j < Params_T3::kHits; j++) {
0127 if (hits1[i] == hits2[j]) {
0128 tmatched = true;
0129 break;
0130 }
0131 }
0132 if (tmatched) {
0133 nMatched++;
0134 }
0135 }
0136
0137 matched[0] = npMatched;
0138 matched[1] = nMatched;
0139 }
0140
0141 struct RemoveDupQuintupletsAfterBuild {
0142 template <typename TAcc>
0143 ALPAKA_FN_ACC void operator()(TAcc const& acc,
0144 ModulesConst modules,
0145 Quintuplets quintuplets,
0146 QuintupletsOccupancyConst quintupletsOccupancy,
0147 ObjectRangesConst ranges) const {
0148 auto const globalThreadIdx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
0149 auto const gridThreadExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc);
0150
0151 for (unsigned int lowmod = globalThreadIdx[0]; lowmod < modules.nLowerModules(); lowmod += gridThreadExtent[0]) {
0152 unsigned int nQuintuplets_lowmod = quintupletsOccupancy.nQuintuplets()[lowmod];
0153 int quintupletModuleIndices_lowmod = ranges.quintupletModuleIndices()[lowmod];
0154
0155 for (unsigned int ix1 = globalThreadIdx[1]; ix1 < nQuintuplets_lowmod; ix1 += gridThreadExtent[1]) {
0156 unsigned int ix = quintupletModuleIndices_lowmod + ix1;
0157 float eta1 = __H2F(quintuplets.eta()[ix]);
0158 float phi1 = __H2F(quintuplets.phi()[ix]);
0159 float score_rphisum1 = __H2F(quintuplets.score_rphisum()[ix]);
0160
0161 for (unsigned int jx1 = globalThreadIdx[2] + ix1 + 1; jx1 < nQuintuplets_lowmod; jx1 += gridThreadExtent[2]) {
0162 unsigned int jx = quintupletModuleIndices_lowmod + jx1;
0163
0164 float eta2 = __H2F(quintuplets.eta()[jx]);
0165 float phi2 = __H2F(quintuplets.phi()[jx]);
0166 float dEta = alpaka::math::abs(acc, eta1 - eta2);
0167 float dPhi = calculate_dPhi(phi1, phi2);
0168 float score_rphisum2 = __H2F(quintuplets.score_rphisum()[jx]);
0169
0170 if (dEta > 0.1f)
0171 continue;
0172
0173 if (alpaka::math::abs(acc, dPhi) > 0.1f)
0174 continue;
0175
0176 int nMatched = checkHitsT5(ix, jx, quintuplets);
0177 const int minNHitsForDup_T5 = 7;
0178 if (nMatched >= minNHitsForDup_T5) {
0179 if (score_rphisum1 >= score_rphisum2) {
0180 rmQuintupletFromMemory(quintuplets, ix);
0181 } else {
0182 rmQuintupletFromMemory(quintuplets, jx);
0183 }
0184 }
0185 }
0186 }
0187 }
0188 }
0189 };
0190
0191 struct RemoveDupQuintupletsBeforeTC {
0192 template <typename TAcc>
0193 ALPAKA_FN_ACC void operator()(TAcc const& acc,
0194 Quintuplets quintuplets,
0195 QuintupletsOccupancyConst quintupletsOccupancy,
0196 ObjectRangesConst ranges) const {
0197 auto const globalThreadIdx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
0198 auto const gridThreadExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc);
0199
0200 for (unsigned int lowmodIdx1 = globalThreadIdx[1]; lowmodIdx1 < ranges.nEligibleT5Modules();
0201 lowmodIdx1 += gridThreadExtent[1]) {
0202 uint16_t lowmod1 = ranges.indicesOfEligibleT5Modules()[lowmodIdx1];
0203 unsigned int nQuintuplets_lowmod1 = quintupletsOccupancy.nQuintuplets()[lowmod1];
0204 if (nQuintuplets_lowmod1 == 0)
0205 continue;
0206
0207 unsigned int quintupletModuleIndices_lowmod1 = ranges.quintupletModuleIndices()[lowmod1];
0208
0209 for (unsigned int lowmodIdx2 = globalThreadIdx[2] + lowmodIdx1; lowmodIdx2 < ranges.nEligibleT5Modules();
0210 lowmodIdx2 += gridThreadExtent[2]) {
0211 uint16_t lowmod2 = ranges.indicesOfEligibleT5Modules()[lowmodIdx2];
0212 unsigned int nQuintuplets_lowmod2 = quintupletsOccupancy.nQuintuplets()[lowmod2];
0213 if (nQuintuplets_lowmod2 == 0)
0214 continue;
0215
0216 unsigned int quintupletModuleIndices_lowmod2 = ranges.quintupletModuleIndices()[lowmod2];
0217
0218 for (unsigned int ix1 = 0; ix1 < nQuintuplets_lowmod1; ix1 += 1) {
0219 unsigned int ix = quintupletModuleIndices_lowmod1 + ix1;
0220 if (quintuplets.isDup()[ix] & 1)
0221 continue;
0222
0223 bool isPT5_ix = quintuplets.partOfPT5()[ix];
0224
0225 for (unsigned int jx1 = 0; jx1 < nQuintuplets_lowmod2; jx1++) {
0226 unsigned int jx = quintupletModuleIndices_lowmod2 + jx1;
0227 if (ix == jx)
0228 continue;
0229
0230 if (quintuplets.isDup()[jx] & 1)
0231 continue;
0232
0233 bool isPT5_jx = quintuplets.partOfPT5()[jx];
0234
0235 if (isPT5_ix && isPT5_jx)
0236 continue;
0237
0238 float eta1 = __H2F(quintuplets.eta()[ix]);
0239 float phi1 = __H2F(quintuplets.phi()[ix]);
0240 float score_rphisum1 = __H2F(quintuplets.score_rphisum()[ix]);
0241
0242 float eta2 = __H2F(quintuplets.eta()[jx]);
0243 float phi2 = __H2F(quintuplets.phi()[jx]);
0244 float score_rphisum2 = __H2F(quintuplets.score_rphisum()[jx]);
0245
0246 float dEta = alpaka::math::abs(acc, eta1 - eta2);
0247 float dPhi = calculate_dPhi(phi1, phi2);
0248
0249 if (dEta > 0.1f)
0250 continue;
0251
0252 if (alpaka::math::abs(acc, dPhi) > 0.1f)
0253 continue;
0254
0255 float dR2 = dEta * dEta + dPhi * dPhi;
0256 int nMatched = checkHitsT5(ix, jx, quintuplets);
0257 const int minNHitsForDup_T5 = 5;
0258 if (dR2 < 0.001f || nMatched >= minNHitsForDup_T5) {
0259 if (isPT5_jx || score_rphisum1 > score_rphisum2) {
0260 rmQuintupletFromMemory(quintuplets, ix, true);
0261 } else if (isPT5_ix || score_rphisum1 < score_rphisum2) {
0262 rmQuintupletFromMemory(quintuplets, jx, true);
0263 } else {
0264 rmQuintupletFromMemory(quintuplets, (ix < jx ? ix : jx), true);
0265 }
0266 }
0267 }
0268 }
0269 }
0270 }
0271 }
0272 };
0273
0274 struct RemoveDupPixelTripletsFromMap {
0275 template <typename TAcc>
0276 ALPAKA_FN_ACC void operator()(TAcc const& acc, PixelTriplets pixelTriplets) const {
0277 auto const globalThreadIdx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
0278 auto const gridThreadExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc);
0279
0280 for (unsigned int ix = globalThreadIdx[1]; ix < pixelTriplets.nPixelTriplets(); ix += gridThreadExtent[1]) {
0281 for (unsigned int jx = globalThreadIdx[2]; jx < pixelTriplets.nPixelTriplets(); jx += gridThreadExtent[2]) {
0282 if (ix == jx)
0283 continue;
0284
0285 int nMatched[2];
0286 checkHitspT3(ix, jx, pixelTriplets, nMatched);
0287 const int minNHitsForDup_pT3 = 5;
0288 if ((nMatched[0] + nMatched[1]) >= minNHitsForDup_pT3) {
0289
0290 if (pixelTriplets.logicalLayers()[jx][2] < pixelTriplets.logicalLayers()[ix][2]) {
0291 rmPixelTripletFromMemory(pixelTriplets, ix);
0292 break;
0293 } else if (pixelTriplets.logicalLayers()[ix][2] == pixelTriplets.logicalLayers()[jx][2] &&
0294 __H2F(pixelTriplets.score()[ix]) > __H2F(pixelTriplets.score()[jx])) {
0295 rmPixelTripletFromMemory(pixelTriplets, ix);
0296 break;
0297 } else if (pixelTriplets.logicalLayers()[ix][2] == pixelTriplets.logicalLayers()[jx][2] &&
0298 (__H2F(pixelTriplets.score()[ix]) == __H2F(pixelTriplets.score()[jx])) && (ix < jx)) {
0299 rmPixelTripletFromMemory(pixelTriplets, ix);
0300 break;
0301 }
0302 }
0303 }
0304 }
0305 }
0306 };
0307
0308 struct RemoveDupPixelQuintupletsFromMap {
0309 template <typename TAcc>
0310 ALPAKA_FN_ACC void operator()(TAcc const& acc, PixelQuintuplets pixelQuintuplets) const {
0311 auto const globalThreadIdx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
0312 auto const gridThreadExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc);
0313
0314 unsigned int nPixelQuintuplets = pixelQuintuplets.nPixelQuintuplets();
0315 for (unsigned int ix = globalThreadIdx[1]; ix < nPixelQuintuplets; ix += gridThreadExtent[1]) {
0316 float score1 = __H2F(pixelQuintuplets.score()[ix]);
0317 for (unsigned int jx = globalThreadIdx[2]; jx < nPixelQuintuplets; jx += gridThreadExtent[2]) {
0318 if (ix == jx)
0319 continue;
0320
0321 int nMatched = checkHitspT5(ix, jx, pixelQuintuplets);
0322 float score2 = __H2F(pixelQuintuplets.score()[jx]);
0323 const int minNHitsForDup_pT5 = 7;
0324 if (nMatched >= minNHitsForDup_pT5) {
0325 if (score1 > score2 or ((score1 == score2) and (ix > jx))) {
0326 rmPixelQuintupletFromMemory(pixelQuintuplets, ix);
0327 break;
0328 }
0329 }
0330 }
0331 }
0332 }
0333 };
0334
0335 struct CheckHitspLS {
0336 template <typename TAcc>
0337 ALPAKA_FN_ACC void operator()(TAcc const& acc,
0338 ModulesConst modules,
0339 SegmentsOccupancyConst segmentsOccupancy,
0340 SegmentsPixel segmentsPixel,
0341 bool secondpass) const {
0342 auto const globalThreadIdx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
0343 auto const gridThreadExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc);
0344
0345 int pixelModuleIndex = modules.nLowerModules();
0346 unsigned int nPixelSegments = segmentsOccupancy.nSegments()[pixelModuleIndex];
0347
0348 if (nPixelSegments > n_max_pixel_segments_per_module)
0349 nPixelSegments = n_max_pixel_segments_per_module;
0350
0351 for (unsigned int ix = globalThreadIdx[1]; ix < nPixelSegments; ix += gridThreadExtent[1]) {
0352 if (secondpass && (!segmentsPixel.isQuad()[ix] || (segmentsPixel.isDup()[ix] & 1)))
0353 continue;
0354
0355 unsigned int phits1[Params_pLS::kHits];
0356 phits1[0] = segmentsPixel.pLSHitsIdxs()[ix].x;
0357 phits1[1] = segmentsPixel.pLSHitsIdxs()[ix].y;
0358 phits1[2] = segmentsPixel.pLSHitsIdxs()[ix].z;
0359 phits1[3] = segmentsPixel.pLSHitsIdxs()[ix].w;
0360 float eta_pix1 = segmentsPixel.eta()[ix];
0361 float phi_pix1 = segmentsPixel.phi()[ix];
0362
0363 for (unsigned int jx = ix + 1 + globalThreadIdx[2]; jx < nPixelSegments; jx += gridThreadExtent[2]) {
0364 float eta_pix2 = segmentsPixel.eta()[jx];
0365 float phi_pix2 = segmentsPixel.phi()[jx];
0366
0367 if (alpaka::math::abs(acc, eta_pix2 - eta_pix1) > 0.1f)
0368 continue;
0369
0370 if (secondpass && (!segmentsPixel.isQuad()[jx] || (segmentsPixel.isDup()[jx] & 1)))
0371 continue;
0372
0373 int8_t quad_diff = segmentsPixel.isQuad()[ix] - segmentsPixel.isQuad()[jx];
0374 float score_diff = segmentsPixel.score()[ix] - segmentsPixel.score()[jx];
0375
0376 int idxToRemove;
0377 if (quad_diff > 0)
0378 idxToRemove = jx;
0379 else if (quad_diff < 0)
0380 idxToRemove = ix;
0381 else if (score_diff < 0)
0382 idxToRemove = jx;
0383 else if (score_diff > 0)
0384 idxToRemove = ix;
0385 else
0386 idxToRemove = ix;
0387
0388 unsigned int phits2[Params_pLS::kHits];
0389 phits2[0] = segmentsPixel.pLSHitsIdxs()[jx].x;
0390 phits2[1] = segmentsPixel.pLSHitsIdxs()[jx].y;
0391 phits2[2] = segmentsPixel.pLSHitsIdxs()[jx].z;
0392 phits2[3] = segmentsPixel.pLSHitsIdxs()[jx].w;
0393
0394 int npMatched = 0;
0395 for (int i = 0; i < Params_pLS::kHits; i++) {
0396 bool pmatched = false;
0397 for (int j = 0; j < Params_pLS::kHits; j++) {
0398 if (phits1[i] == phits2[j]) {
0399 pmatched = true;
0400 break;
0401 }
0402 }
0403 if (pmatched) {
0404 npMatched++;
0405
0406 if (secondpass)
0407 break;
0408 }
0409 }
0410 const int minNHitsForDup_pLS = 3;
0411 if (npMatched >= minNHitsForDup_pLS) {
0412 rmPixelSegmentFromMemory(segmentsPixel, idxToRemove, secondpass);
0413 }
0414 if (secondpass) {
0415 float dEta = alpaka::math::abs(acc, eta_pix1 - eta_pix2);
0416 float dPhi = calculate_dPhi(phi_pix1, phi_pix2);
0417
0418 float dR2 = dEta * dEta + dPhi * dPhi;
0419 if ((npMatched >= 1) || (dR2 < 1e-5f)) {
0420 rmPixelSegmentFromMemory(segmentsPixel, idxToRemove, secondpass);
0421 }
0422 }
0423 }
0424 }
0425 }
0426 };
0427 }
0428 #endif