Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2023-10-25 10:02:47

0001 #include "RiemannFitOnGPU.h"
0002 #include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
0003 
0004 template <typename TrackerTraits>
0005 void HelixFitOnGPU<TrackerTraits>::launchRiemannKernels(const TrackingRecHitSoAConstView<TrackerTraits> &hv,
0006                                                         uint32_t nhits,
0007                                                         uint32_t maxNumberOfTuples,
0008                                                         cudaStream_t stream) {
0009   assert(tuples_);
0010 
0011   auto blockSize = 64;
0012   auto numberOfBlocks = (maxNumberOfConcurrentFits_ + blockSize - 1) / blockSize;
0013 
0014   //  Fit internals
0015   auto hitsGPU = cms::cuda::make_device_unique<double[]>(
0016       maxNumberOfConcurrentFits_ * sizeof(riemannFit::Matrix3xNd<4>) / sizeof(double), stream);
0017   auto hits_geGPU = cms::cuda::make_device_unique<float[]>(
0018       maxNumberOfConcurrentFits_ * sizeof(riemannFit::Matrix6x4f) / sizeof(float), stream);
0019   auto fast_fit_resultsGPU = cms::cuda::make_device_unique<double[]>(
0020       maxNumberOfConcurrentFits_ * sizeof(riemannFit::Vector4d) / sizeof(double), stream);
0021   auto circle_fit_resultsGPU_holder =
0022       cms::cuda::make_device_unique<char[]>(maxNumberOfConcurrentFits_ * sizeof(riemannFit::CircleFit), stream);
0023   riemannFit::CircleFit *circle_fit_resultsGPU_ = (riemannFit::CircleFit *)(circle_fit_resultsGPU_holder.get());
0024 
0025   for (uint32_t offset = 0; offset < maxNumberOfTuples; offset += maxNumberOfConcurrentFits_) {
0026     // triplets
0027     kernel_FastFit<3, TrackerTraits><<<numberOfBlocks, blockSize, 0, stream>>>(
0028         tuples_, tupleMultiplicity_, 3, hv, hitsGPU.get(), hits_geGPU.get(), fast_fit_resultsGPU.get(), offset);
0029     cudaCheck(cudaGetLastError());
0030 
0031     kernel_CircleFit<3, TrackerTraits><<<numberOfBlocks, blockSize, 0, stream>>>(tupleMultiplicity_,
0032                                                                                  3,
0033                                                                                  bField_,
0034                                                                                  hitsGPU.get(),
0035                                                                                  hits_geGPU.get(),
0036                                                                                  fast_fit_resultsGPU.get(),
0037                                                                                  circle_fit_resultsGPU_,
0038                                                                                  offset);
0039     cudaCheck(cudaGetLastError());
0040 
0041     kernel_LineFit<3, TrackerTraits><<<numberOfBlocks, blockSize, 0, stream>>>(tupleMultiplicity_,
0042                                                                                3,
0043                                                                                bField_,
0044                                                                                outputSoa_,
0045                                                                                hitsGPU.get(),
0046                                                                                hits_geGPU.get(),
0047                                                                                fast_fit_resultsGPU.get(),
0048                                                                                circle_fit_resultsGPU_,
0049                                                                                offset);
0050     cudaCheck(cudaGetLastError());
0051 
0052     // quads
0053     kernel_FastFit<4, TrackerTraits><<<numberOfBlocks / 4, blockSize, 0, stream>>>(
0054         tuples_, tupleMultiplicity_, 4, hv, hitsGPU.get(), hits_geGPU.get(), fast_fit_resultsGPU.get(), offset);
0055     cudaCheck(cudaGetLastError());
0056 
0057     kernel_CircleFit<4, TrackerTraits><<<numberOfBlocks / 4, blockSize, 0, stream>>>(tupleMultiplicity_,
0058                                                                                      4,
0059                                                                                      bField_,
0060                                                                                      hitsGPU.get(),
0061                                                                                      hits_geGPU.get(),
0062                                                                                      fast_fit_resultsGPU.get(),
0063                                                                                      circle_fit_resultsGPU_,
0064                                                                                      offset);
0065     cudaCheck(cudaGetLastError());
0066 
0067     kernel_LineFit<4, TrackerTraits><<<numberOfBlocks / 4, blockSize, 0, stream>>>(tupleMultiplicity_,
0068                                                                                    4,
0069                                                                                    bField_,
0070                                                                                    outputSoa_,
0071                                                                                    hitsGPU.get(),
0072                                                                                    hits_geGPU.get(),
0073                                                                                    fast_fit_resultsGPU.get(),
0074                                                                                    circle_fit_resultsGPU_,
0075                                                                                    offset);
0076     cudaCheck(cudaGetLastError());
0077 
0078     if (fitNas4_) {
0079       // penta
0080       kernel_FastFit<4, TrackerTraits><<<numberOfBlocks / 4, blockSize, 0, stream>>>(
0081           tuples_, tupleMultiplicity_, 5, hv, hitsGPU.get(), hits_geGPU.get(), fast_fit_resultsGPU.get(), offset);
0082       cudaCheck(cudaGetLastError());
0083 
0084       kernel_CircleFit<4, TrackerTraits><<<numberOfBlocks / 4, blockSize, 0, stream>>>(tupleMultiplicity_,
0085                                                                                        5,
0086                                                                                        bField_,
0087                                                                                        hitsGPU.get(),
0088                                                                                        hits_geGPU.get(),
0089                                                                                        fast_fit_resultsGPU.get(),
0090                                                                                        circle_fit_resultsGPU_,
0091                                                                                        offset);
0092       cudaCheck(cudaGetLastError());
0093 
0094       kernel_LineFit<4, TrackerTraits><<<numberOfBlocks / 4, blockSize, 0, stream>>>(tupleMultiplicity_,
0095                                                                                      5,
0096                                                                                      bField_,
0097                                                                                      outputSoa_,
0098                                                                                      hitsGPU.get(),
0099                                                                                      hits_geGPU.get(),
0100                                                                                      fast_fit_resultsGPU.get(),
0101                                                                                      circle_fit_resultsGPU_,
0102                                                                                      offset);
0103       cudaCheck(cudaGetLastError());
0104     } else {
0105       // penta all 5
0106       kernel_FastFit<5, TrackerTraits><<<numberOfBlocks / 4, blockSize, 0, stream>>>(
0107           tuples_, tupleMultiplicity_, 5, hv, hitsGPU.get(), hits_geGPU.get(), fast_fit_resultsGPU.get(), offset);
0108       cudaCheck(cudaGetLastError());
0109 
0110       kernel_CircleFit<5, TrackerTraits><<<numberOfBlocks / 4, blockSize, 0, stream>>>(tupleMultiplicity_,
0111                                                                                        5,
0112                                                                                        bField_,
0113                                                                                        hitsGPU.get(),
0114                                                                                        hits_geGPU.get(),
0115                                                                                        fast_fit_resultsGPU.get(),
0116                                                                                        circle_fit_resultsGPU_,
0117                                                                                        offset);
0118       cudaCheck(cudaGetLastError());
0119 
0120       kernel_LineFit<5, TrackerTraits><<<numberOfBlocks / 4, blockSize, 0, stream>>>(tupleMultiplicity_,
0121                                                                                      5,
0122                                                                                      bField_,
0123                                                                                      outputSoa_,
0124                                                                                      hitsGPU.get(),
0125                                                                                      hits_geGPU.get(),
0126                                                                                      fast_fit_resultsGPU.get(),
0127                                                                                      circle_fit_resultsGPU_,
0128                                                                                      offset);
0129       cudaCheck(cudaGetLastError());
0130     }
0131   }
0132 }