File indexing completed on 2024-09-07 04:38:05
0001 #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
0002
0003 #include "CUDADataFormats/Track/interface/PixelTrackUtilities.h"
0004 #include "CUDADataFormats/Vertex/interface/ZVertexUtilities.h"
0005
0006 #include "PixelVertexWorkSpaceUtilities.h"
0007 #include "PixelVertexWorkSpaceSoAHost.h"
0008 #include "PixelVertexWorkSpaceSoADevice.h"
0009
0010 #include "gpuClusterTracksByDensity.h"
0011 #include "gpuClusterTracksDBSCAN.h"
0012 #include "gpuClusterTracksIterative.h"
0013 #include "gpuFitVertices.h"
0014 #include "gpuSortByPt2.h"
0015 #include "gpuSplitVertices.h"
0016
0017 #undef PIXVERTEX_DEBUG_PRODUCE
0018
0019 namespace gpuVertexFinder {
0020
0021
0022 constexpr float maxChi2ForFirstFit = 50.f;
0023 constexpr float maxChi2ForFinalFit = 5000.f;
0024
0025
0026 constexpr float maxChi2ForSplit = 9.f;
0027
0028 template <typename TrackerTraits>
0029 __global__ void loadTracks(
0030 TrackSoAConstView<TrackerTraits> tracks_view, VtxSoAView soa, WsSoAView pws, float ptMin, float ptMax) {
0031 auto const* quality = tracks_view.quality();
0032 using helper = TracksUtilities<TrackerTraits>;
0033 auto first = blockIdx.x * blockDim.x + threadIdx.x;
0034 for (int idx = first, nt = tracks_view.nTracks(); idx < nt; idx += gridDim.x * blockDim.x) {
0035 auto nHits = helper::nHits(tracks_view, idx);
0036 assert(nHits >= 3);
0037
0038
0039 soa[idx].idv() = -1;
0040
0041 if (helper::isTriplet(tracks_view, idx))
0042 continue;
0043 if (quality[idx] < pixelTrack::Quality::highPurity)
0044 continue;
0045
0046 auto pt = tracks_view[idx].pt();
0047
0048 if (pt < ptMin)
0049 continue;
0050
0051
0052 pt = std::min(pt, ptMax);
0053
0054 auto& data = pws;
0055 auto it = atomicAdd(&data.ntrks(), 1);
0056 data[it].itrk() = idx;
0057 data[it].zt() = helper::zip(tracks_view, idx);
0058 data[it].ezt2() = tracks_view[idx].covariance()(14);
0059 data[it].ptt2() = pt * pt;
0060 }
0061 }
0062
0063
0064 #ifndef THREE_KERNELS
0065 __global__ void vertexFinderOneKernel(VtxSoAView pdata,
0066 WsSoAView pws,
0067 int minT,
0068 float eps,
0069 float errmax,
0070 float chi2max
0071 ) {
0072 clusterTracksByDensity(pdata, pws, minT, eps, errmax, chi2max);
0073 __syncthreads();
0074 fitVertices(pdata, pws, maxChi2ForFirstFit);
0075 __syncthreads();
0076 splitVertices(pdata, pws, maxChi2ForSplit);
0077 __syncthreads();
0078 fitVertices(pdata, pws, maxChi2ForFinalFit);
0079 __syncthreads();
0080 sortByPt2(pdata, pws);
0081 }
0082 #else
0083 __global__ void vertexFinderKernel1(VtxSoAView pdata,
0084 WsSoAView pws,
0085 int minT,
0086 float eps,
0087 float errmax,
0088 float chi2max
0089 ) {
0090 clusterTracksByDensity(pdata, pws, minT, eps, errmax, chi2max);
0091 __syncthreads();
0092 fitVertices(pdata, pws, maxChi2ForFirstFit);
0093 }
0094
0095 __global__ void vertexFinderKernel2(VtxSoAView pdata, WsSoAView pws) {
0096 fitVertices(pdata, pws, maxChi2ForFinalFit);
0097 __syncthreads();
0098 sortByPt2(pdata, pws);
0099 }
0100 #endif
0101
0102 template <typename TrackerTraits>
0103 #ifdef __CUDACC__
0104 ZVertexSoADevice Producer<TrackerTraits>::makeAsync(cudaStream_t stream,
0105 const TrackSoAConstView<TrackerTraits>& tracks_view,
0106 float ptMin,
0107 float ptMax) const {
0108 #ifdef PIXVERTEX_DEBUG_PRODUCE
0109 std::cout << "producing Vertices on GPU" << std::endl;
0110 #endif
0111 ZVertexSoADevice vertices(stream);
0112 #else
0113 ZVertexSoAHost Producer<TrackerTraits>::make(const TrackSoAConstView<TrackerTraits>& tracks_view,
0114 float ptMin,
0115 float ptMax) const {
0116 #ifdef PIXVERTEX_DEBUG_PRODUCE
0117 std::cout << "producing Vertices on CPU" << std::endl;
0118 #endif
0119 ZVertexSoAHost vertices;
0120 #endif
0121 auto soa = vertices.view();
0122
0123 assert(vertices.buffer());
0124
0125 #ifdef __CUDACC__
0126 auto ws_d = gpuVertexFinder::workSpace::PixelVertexWorkSpaceSoADevice(stream);
0127 #else
0128 auto ws_d = gpuVertexFinder::workSpace::PixelVertexWorkSpaceSoAHost();
0129 #endif
0130
0131 #ifdef __CUDACC__
0132 init<<<1, 1, 0, stream>>>(soa, ws_d.view());
0133 auto blockSize = 128;
0134 auto numberOfBlocks = (tracks_view.metadata().size() + blockSize - 1) / blockSize;
0135 loadTracks<TrackerTraits><<<numberOfBlocks, blockSize, 0, stream>>>(tracks_view, soa, ws_d.view(), ptMin, ptMax);
0136 cudaCheck(cudaGetLastError());
0137 #else
0138 init(soa, ws_d.view());
0139 loadTracks<TrackerTraits>(tracks_view, soa, ws_d.view(), ptMin, ptMax);
0140 #endif
0141
0142 #ifdef __CUDACC__
0143
0144 constexpr int maxThreadsForPrint = 1024 - 128;
0145 constexpr int numBlocks = 1024;
0146 constexpr int threadsPerBlock = 128;
0147
0148 if (oneKernel_) {
0149
0150 #ifndef THREE_KERNELS
0151 vertexFinderOneKernel<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.view(), minT, eps, errmax, chi2max);
0152 #else
0153 vertexFinderKernel1<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.view(), minT, eps, errmax, chi2max);
0154 cudaCheck(cudaGetLastError());
0155
0156 splitVerticesKernel<<<numBlocks, threadsPerBlock, 0, stream>>>(soa, ws_d.view(), maxChi2ForSplit);
0157 cudaCheck(cudaGetLastError());
0158 vertexFinderKernel2<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.view());
0159 #endif
0160 } else {
0161 if (useDensity_) {
0162 clusterTracksByDensityKernel<<<1, maxThreadsForPrint, 0, stream>>>(
0163 soa, ws_d.view(), minT, eps, errmax, chi2max);
0164 } else if (useDBSCAN_) {
0165 clusterTracksDBSCAN<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.view(), minT, eps, errmax, chi2max);
0166 } else if (useIterative_) {
0167 clusterTracksIterative<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.view(), minT, eps, errmax, chi2max);
0168 }
0169 cudaCheck(cudaGetLastError());
0170 fitVerticesKernel<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.view(), maxChi2ForFirstFit);
0171 cudaCheck(cudaGetLastError());
0172 if (doSplitting_) {
0173
0174 splitVerticesKernel<<<numBlocks, threadsPerBlock, 0, stream>>>(soa, ws_d.view(), maxChi2ForSplit);
0175 cudaCheck(cudaGetLastError());
0176 fitVerticesKernel<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.view(), maxChi2ForFinalFit);
0177 cudaCheck(cudaGetLastError());
0178 }
0179 sortByPt2Kernel<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.view());
0180 }
0181 cudaCheck(cudaGetLastError());
0182 #else
0183 if (useDensity_) {
0184 clusterTracksByDensity(soa, ws_d.view(), minT, eps, errmax, chi2max);
0185 } else if (useDBSCAN_) {
0186 clusterTracksDBSCAN(soa, ws_d.view(), minT, eps, errmax, chi2max);
0187 } else if (useIterative_) {
0188 clusterTracksIterative(soa, ws_d.view(), minT, eps, errmax, chi2max);
0189 }
0190 #ifdef PIXVERTEX_DEBUG_PRODUCE
0191 std::cout << "found " << ws_d.view().nvIntermediate() << " vertices " << std::endl;
0192 #endif
0193 fitVertices(soa, ws_d.view(), maxChi2ForFirstFit);
0194
0195 if (doSplitting_) {
0196 splitVertices(soa, ws_d.view(), maxChi2ForSplit);
0197 fitVertices(soa, ws_d.view(), maxChi2ForFinalFit);
0198 }
0199 sortByPt2(soa, ws_d.view());
0200 #endif
0201
0202 return vertices;
0203 }
0204
0205 template class Producer<pixelTopology::Phase1>;
0206 template class Producer<pixelTopology::Phase2>;
0207 template class Producer<pixelTopology::HIonPhase1>;
0208 }