File indexing completed on 2023-03-17 11:22:37
0001 #ifndef RecoTracker_MkFitCore_standalone_attic_BestCands_h
0002 #define RecoTracker_MkFitCore_standalone_attic_BestCands_h
0003
0004 #include "Config.h"
0005
0006 #include <cstdio>
0007 #include <limits>
0008
0009 namespace CandsGPU {
0010
0011 constexpr int trkIdx_sentinel = -1;
0012 constexpr int hitIdx_sentinel = -1;
0013 constexpr int nhits_sentinel = -1;
0014 constexpr float chi2_sentinel = std::numeric_limits<float>::max();
0015
0016 template <typename T>
0017 __device__ void swap_values(T& a, T& b) {
0018 T c(a);
0019 a = b;
0020 b = c;
0021 }
0022
0023 template <int MaxCandsPerSeed, int BlockSize>
0024 struct BestCands {
0025
0026 int trkIdx[MaxCandsPerSeed][BlockSize];
0027 int hitIdx[MaxCandsPerSeed][BlockSize];
0028 int nhits[MaxCandsPerSeed][BlockSize];
0029 float chi2[MaxCandsPerSeed][BlockSize];
0030
0031 __device__ void reset(int itrack);
0032 __device__ void update(int itrack, int cand_trIdx, int cand_hitIdx, int cand_nhits, float cand_chi2);
0033 __device__ void heapify(int itrack, int idx, int heap_size);
0034 __device__ int left(int idx);
0035 __device__ int right(int idx);
0036
0037 __device__ bool better(int icand_fst, int fst, int icand_snd, int snd);
0038
0039 __device__ void heap_sort(int icand, int heap_size);
0040 __device__ void merge_cands_for_seed(int iseed, int icand);
0041 __device__ void swap_nodes(int icand_fst, int fst, int icand_snd, int snd);
0042 __device__ void copy_node(int icand_fst, int fst, int icand_snd, int snd);
0043
0044 __device__ int count_valid_cands(int itrack);
0045
0046
0047 __device__ void get_cand_info(
0048 const int tid, const int cid, int& my_trkIdx, int& my_hitIdx, int& my_nhits, float& my_chi2);
0049 __device__ int get_nhits(const int tid, const int cid) { return nhits[cid][tid]; }
0050
0051 __device__ void print_heap(const int tid);
0052 };
0053
0054 template <int M, int B>
0055 __device__ void BestCands<M, B>::reset(int itrack) {
0056 for (auto j = 0; j < M; ++j) {
0057 trkIdx[j][itrack] = trkIdx_sentinel;
0058 hitIdx[j][itrack] = hitIdx_sentinel;
0059 nhits[j][itrack] = nhits_sentinel;
0060 chi2[j][itrack] = chi2_sentinel;
0061 }
0062 }
0063
0064 template <int M, int B>
0065 __device__ void BestCands<M, B>::update(int itrack, int cand_trIdx, int cand_hitIdx, int cand_nhits, float cand_chi2) {
0066 if (cand_nhits < nhits[0][itrack])
0067 return;
0068 if (cand_chi2 > chi2[0][itrack])
0069 return;
0070 trkIdx[0][itrack] = cand_trIdx;
0071 hitIdx[0][itrack] = cand_hitIdx;
0072 nhits[0][itrack] = cand_nhits;
0073 chi2[0][itrack] = cand_chi2;
0074
0075 heapify(itrack, 0, M);
0076 }
0077
0078 template <int M, int B>
0079 __device__ void BestCands<M, B>::print_heap(const int tid) {
0080 for (int cid = 0; cid < M; cid++) {
0081 printf(">>>>> tid %d rowIdx %d hitIdx %d nhits %d chi2 %f\n",
0082 tid,
0083 cid,
0084 hitIdx[cid][tid],
0085 nhits[cid][tid],
0086 chi2[cid][tid]);
0087 }
0088 }
0089
0090 template <int M, int B>
0091 __device__ int BestCands<M, B>::left(int idx) {
0092 return (++idx << 1) - 1;
0093 }
0094
0095 template <int M, int B>
0096 __device__ int BestCands<M, B>::right(int idx) {
0097 return ++idx << 1;
0098 }
0099
0100 template <int M, int B>
0101 __device__ bool BestCands<M, B>::better(int icand_fst, int fst, int icand_snd, int snd) {
0102 return (nhits[fst][icand_fst] > nhits[snd][icand_snd]) ||
0103 ((nhits[fst][icand_fst] == nhits[snd][icand_snd]) && (chi2[fst][icand_fst] < chi2[snd][icand_snd]));
0104 }
0105
0106 template <int M, int B>
0107 __device__ void BestCands<M, B>::swap_nodes(int icand_fst, int fst, int icand_snd, int snd) {
0108 swap_values(trkIdx[fst][icand_fst], trkIdx[snd][icand_snd]);
0109 swap_values(hitIdx[fst][icand_fst], hitIdx[snd][icand_snd]);
0110 swap_values(nhits[fst][icand_fst], nhits[snd][icand_snd]);
0111 swap_values(chi2[fst][icand_fst], chi2[snd][icand_snd]);
0112 }
0113
0114 template <int M, int B>
0115 __device__ void BestCands<M, B>::copy_node(int icand_fst, int fst, int icand_snd, int snd) {
0116 trkIdx[snd][icand_snd] = trkIdx[fst][icand_fst];
0117 hitIdx[snd][icand_snd] = hitIdx[fst][icand_fst];
0118 nhits[snd][icand_snd] = nhits[fst][icand_fst];
0119 chi2[snd][icand_snd] = chi2[fst][icand_fst];
0120 }
0121
0122 template <int M, int B>
0123 __device__ void BestCands<M, B>::heapify(int icand, int idx, int heap_size) {
0124
0125 int smallest = -1;
0126 while (idx != smallest) {
0127 if (idx < 0 || idx >= heap_size / 2)
0128 return;
0129
0130 smallest = idx;
0131 if (heap_size > left(idx) && better(icand, smallest, icand, left(idx)))
0132 smallest = left(idx);
0133 if (heap_size > right(idx) && better(icand, smallest, icand, right(idx)))
0134 smallest = right(idx);
0135
0136 if (smallest != idx) {
0137 swap_nodes(icand, smallest, icand, idx);
0138 idx = smallest;
0139 smallest = -1;
0140 }
0141 }
0142 }
0143
0144 template <int M, int B>
0145 __device__ void BestCands<M, B>::merge_cands_for_seed(int iseed, int icand) {
0146 int itrack = iseed * M + icand;
0147
0148
0149 #if 1
0150 if (icand) {
0151 heap_sort(itrack, M);
0152 }
0153 __syncthreads();
0154 if (icand)
0155 return;
0156
0157 for (int i = itrack + 1; i < itrack + M; ++i) {
0158 for (int j = 0; j < M; ++j) {
0159 if (better(i, j, itrack, 0)) {
0160 copy_node(i, j, itrack, 0);
0161 heapify(itrack, 0, M);
0162 } else {
0163 break;
0164 }
0165 }
0166 }
0167 heap_sort(itrack, M);
0168 __syncthreads();
0169 #else
0170
0171 for (int step = 2; step <= Config::maxCandsPerSeed; step <<= 1) {
0172 if (icand % step == step / 2) {
0173 heap_sort(itrack, M);
0174 }
0175 __syncthreads();
0176
0177 if (icand % step == 0) {
0178 int i = itrack + step / 2;
0179 if ((i < iseed * M + M) && (i < B) && (icand + step / 2 < M)) {
0180 for (int j = 0; j < M; ++j) {
0181 if (better(i, j, itrack, 0)) {
0182 copy_node(i, j, itrack, 0);
0183 heapify(itrack, 0, M);
0184 } else {
0185 break;
0186 }
0187 }
0188 }
0189 }
0190
0191 }
0192
0193 if (icand == 0) {
0194 heap_sort(itrack, M);
0195 }
0196 __syncthreads();
0197 #endif
0198 }
0199
0200 template <int M, int B>
0201 __device__ void BestCands<M, B>::heap_sort(int icand, int heap_size) {
0202 int num_unsorted_elts = heap_size;
0203
0204 for (int i = heap_size - 1; i > 0; --i) {
0205 swap_nodes(icand, 0, icand, i);
0206 heapify(icand, 0, --num_unsorted_elts);
0207 }
0208 }
0209
0210 template <int MaxCandsPerSeed, int BlockSize>
0211 __device__ void BestCands<MaxCandsPerSeed, BlockSize>::get_cand_info(
0212 const int tid, const int cid, int& my_trkIdx, int& my_hitIdx, int& my_nhits, float& my_chi2) {
0213 if (cid < MaxCandsPerSeed && tid < BlockSize) {
0214 my_trkIdx = trkIdx[cid][tid];
0215 my_hitIdx = hitIdx[cid][tid];
0216 my_nhits = nhits[cid][tid];
0217 my_chi2 = chi2[cid][tid];
0218 }
0219 }
0220
0221 template <int M, int B>
0222 __device__ int BestCands<M, B>::count_valid_cands(int itrack) {
0223 int count = 0;
0224 for (int i = 0; i < M; ++i) {
0225 if (trkIdx[i][itrack] != trkIdx_sentinel)
0226 ++count;
0227 }
0228 return count;
0229 }
0230
0231 }
0232
0233 #endif