Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-09-07 04:38:05

0001 #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
0002 
0003 #include "CUDADataFormats/Track/interface/PixelTrackUtilities.h"
0004 #include "CUDADataFormats/Vertex/interface/ZVertexUtilities.h"
0005 
0006 #include "PixelVertexWorkSpaceUtilities.h"
0007 #include "PixelVertexWorkSpaceSoAHost.h"
0008 #include "PixelVertexWorkSpaceSoADevice.h"
0009 
0010 #include "gpuClusterTracksByDensity.h"
0011 #include "gpuClusterTracksDBSCAN.h"
0012 #include "gpuClusterTracksIterative.h"
0013 #include "gpuFitVertices.h"
0014 #include "gpuSortByPt2.h"
0015 #include "gpuSplitVertices.h"
0016 
0017 #undef PIXVERTEX_DEBUG_PRODUCE
0018 
0019 namespace gpuVertexFinder {
0020 
0021   // reject outlier tracks that contribute more than this to the chi2 of the vertex fit
0022   constexpr float maxChi2ForFirstFit = 50.f;
0023   constexpr float maxChi2ForFinalFit = 5000.f;
0024 
0025   // split vertices with a chi2/NDoF greater than this
0026   constexpr float maxChi2ForSplit = 9.f;
0027 
0028   template <typename TrackerTraits>
0029   __global__ void loadTracks(
0030       TrackSoAConstView<TrackerTraits> tracks_view, VtxSoAView soa, WsSoAView pws, float ptMin, float ptMax) {
0031     auto const* quality = tracks_view.quality();
0032     using helper = TracksUtilities<TrackerTraits>;
0033     auto first = blockIdx.x * blockDim.x + threadIdx.x;
0034     for (int idx = first, nt = tracks_view.nTracks(); idx < nt; idx += gridDim.x * blockDim.x) {
0035       auto nHits = helper::nHits(tracks_view, idx);
0036       assert(nHits >= 3);
0037 
0038       // initialize soa...
0039       soa[idx].idv() = -1;
0040 
0041       if (helper::isTriplet(tracks_view, idx))
0042         continue;  // no triplets
0043       if (quality[idx] < pixelTrack::Quality::highPurity)
0044         continue;
0045 
0046       auto pt = tracks_view[idx].pt();
0047 
0048       if (pt < ptMin)
0049         continue;
0050 
0051       // clamp pt
0052       pt = std::min(pt, ptMax);
0053 
0054       auto& data = pws;
0055       auto it = atomicAdd(&data.ntrks(), 1);
0056       data[it].itrk() = idx;
0057       data[it].zt() = helper::zip(tracks_view, idx);
0058       data[it].ezt2() = tracks_view[idx].covariance()(14);
0059       data[it].ptt2() = pt * pt;
0060     }
0061   }
0062 
0063 // #define THREE_KERNELS
0064 #ifndef THREE_KERNELS
0065   __global__ void vertexFinderOneKernel(VtxSoAView pdata,
0066                                         WsSoAView pws,
0067                                         int minT,      // min number of neighbours to be "seed"
0068                                         float eps,     // max absolute distance to cluster
0069                                         float errmax,  // max error to be "seed"
0070                                         float chi2max  // max normalized distance to cluster,
0071   ) {
0072     clusterTracksByDensity(pdata, pws, minT, eps, errmax, chi2max);
0073     __syncthreads();
0074     fitVertices(pdata, pws, maxChi2ForFirstFit);
0075     __syncthreads();
0076     splitVertices(pdata, pws, maxChi2ForSplit);
0077     __syncthreads();
0078     fitVertices(pdata, pws, maxChi2ForFinalFit);
0079     __syncthreads();
0080     sortByPt2(pdata, pws);
0081   }
0082 #else
0083   __global__ void vertexFinderKernel1(VtxSoAView pdata,
0084                                       WsSoAView pws,
0085                                       int minT,      // min number of neighbours to be "seed"
0086                                       float eps,     // max absolute distance to cluster
0087                                       float errmax,  // max error to be "seed"
0088                                       float chi2max  // max normalized distance to cluster,
0089   ) {
0090     clusterTracksByDensity(pdata, pws, minT, eps, errmax, chi2max);
0091     __syncthreads();
0092     fitVertices(pdata, pws, maxChi2ForFirstFit);
0093   }
0094 
0095   __global__ void vertexFinderKernel2(VtxSoAView pdata, WsSoAView pws) {
0096     fitVertices(pdata, pws, maxChi2ForFinalFit);
0097     __syncthreads();
0098     sortByPt2(pdata, pws);
0099   }
0100 #endif
0101 
0102   template <typename TrackerTraits>
0103 #ifdef __CUDACC__
0104   ZVertexSoADevice Producer<TrackerTraits>::makeAsync(cudaStream_t stream,
0105                                                       const TrackSoAConstView<TrackerTraits>& tracks_view,
0106                                                       float ptMin,
0107                                                       float ptMax) const {
0108 #ifdef PIXVERTEX_DEBUG_PRODUCE
0109     std::cout << "producing Vertices on GPU" << std::endl;
0110 #endif  // PIXVERTEX_DEBUG_PRODUCE
0111     ZVertexSoADevice vertices(stream);
0112 #else
0113   ZVertexSoAHost Producer<TrackerTraits>::make(const TrackSoAConstView<TrackerTraits>& tracks_view,
0114                                                float ptMin,
0115                                                float ptMax) const {
0116 #ifdef PIXVERTEX_DEBUG_PRODUCE
0117     std::cout << "producing Vertices on  CPU" << std::endl;
0118 #endif  // PIXVERTEX_DEBUG_PRODUCE
0119     ZVertexSoAHost vertices;
0120 #endif
0121     auto soa = vertices.view();
0122 
0123     assert(vertices.buffer());
0124 
0125 #ifdef __CUDACC__
0126     auto ws_d = gpuVertexFinder::workSpace::PixelVertexWorkSpaceSoADevice(stream);
0127 #else
0128     auto ws_d = gpuVertexFinder::workSpace::PixelVertexWorkSpaceSoAHost();
0129 #endif
0130 
0131 #ifdef __CUDACC__
0132     init<<<1, 1, 0, stream>>>(soa, ws_d.view());
0133     auto blockSize = 128;
0134     auto numberOfBlocks = (tracks_view.metadata().size() + blockSize - 1) / blockSize;
0135     loadTracks<TrackerTraits><<<numberOfBlocks, blockSize, 0, stream>>>(tracks_view, soa, ws_d.view(), ptMin, ptMax);
0136     cudaCheck(cudaGetLastError());
0137 #else
0138     init(soa, ws_d.view());
0139     loadTracks<TrackerTraits>(tracks_view, soa, ws_d.view(), ptMin, ptMax);
0140 #endif
0141 
0142 #ifdef __CUDACC__
0143     // Running too many thread lead to problems when printf is enabled.
0144     constexpr int maxThreadsForPrint = 1024 - 128;
0145     constexpr int numBlocks = 1024;
0146     constexpr int threadsPerBlock = 128;
0147 
0148     if (oneKernel_) {
0149       // implemented only for density clustesrs
0150 #ifndef THREE_KERNELS
0151       vertexFinderOneKernel<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.view(), minT, eps, errmax, chi2max);
0152 #else
0153       vertexFinderKernel1<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.view(), minT, eps, errmax, chi2max);
0154       cudaCheck(cudaGetLastError());
0155       // one block per vertex...
0156       splitVerticesKernel<<<numBlocks, threadsPerBlock, 0, stream>>>(soa, ws_d.view(), maxChi2ForSplit);
0157       cudaCheck(cudaGetLastError());
0158       vertexFinderKernel2<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.view());
0159 #endif
0160     } else {  // five kernels
0161       if (useDensity_) {
0162         clusterTracksByDensityKernel<<<1, maxThreadsForPrint, 0, stream>>>(
0163             soa, ws_d.view(), minT, eps, errmax, chi2max);
0164       } else if (useDBSCAN_) {
0165         clusterTracksDBSCAN<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.view(), minT, eps, errmax, chi2max);
0166       } else if (useIterative_) {
0167         clusterTracksIterative<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.view(), minT, eps, errmax, chi2max);
0168       }
0169       cudaCheck(cudaGetLastError());
0170       fitVerticesKernel<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.view(), maxChi2ForFirstFit);
0171       cudaCheck(cudaGetLastError());
0172       if (doSplitting_) {
0173         // one block per vertex...
0174         splitVerticesKernel<<<numBlocks, threadsPerBlock, 0, stream>>>(soa, ws_d.view(), maxChi2ForSplit);
0175         cudaCheck(cudaGetLastError());
0176         fitVerticesKernel<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.view(), maxChi2ForFinalFit);
0177         cudaCheck(cudaGetLastError());
0178       }
0179       sortByPt2Kernel<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.view());
0180     }
0181     cudaCheck(cudaGetLastError());
0182 #else  // __CUDACC__
0183     if (useDensity_) {
0184       clusterTracksByDensity(soa, ws_d.view(), minT, eps, errmax, chi2max);
0185     } else if (useDBSCAN_) {
0186       clusterTracksDBSCAN(soa, ws_d.view(), minT, eps, errmax, chi2max);
0187     } else if (useIterative_) {
0188       clusterTracksIterative(soa, ws_d.view(), minT, eps, errmax, chi2max);
0189     }
0190 #ifdef PIXVERTEX_DEBUG_PRODUCE
0191     std::cout << "found " << ws_d.view().nvIntermediate() << " vertices " << std::endl;
0192 #endif  // PIXVERTEX_DEBUG_PRODUCE
0193     fitVertices(soa, ws_d.view(), maxChi2ForFirstFit);
0194     // one block per vertex!
0195     if (doSplitting_) {
0196       splitVertices(soa, ws_d.view(), maxChi2ForSplit);
0197       fitVertices(soa, ws_d.view(), maxChi2ForFinalFit);
0198     }
0199     sortByPt2(soa, ws_d.view());
0200 #endif
0201 
0202     return vertices;
0203   }
0204 
0205   template class Producer<pixelTopology::Phase1>;
0206   template class Producer<pixelTopology::Phase2>;
0207   template class Producer<pixelTopology::HIonPhase1>;
0208 }  // namespace gpuVertexFinder