Back to home page

Project CMSSW displayed by LXR

 
 

    


Warning, /RecoTracker/PixelSeeding/plugins/BrokenLineFitOnGPU.cu is written in an unsupported language. File is not indexed.

0001 #include "BrokenLineFitOnGPU.h"
0002 #include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
0003 
0004 template <typename TrackerTraits>
0005 void HelixFitOnGPU<TrackerTraits>::launchBrokenLineKernels(const TrackingRecHitSoAConstView<TrackerTraits>& hv,
0006                                                            uint32_t hitsInFit,
0007                                                            uint32_t maxNumberOfTuples,
0008                                                            cudaStream_t stream) {
0009   assert(tuples_);
0010 
0011   auto blockSize = 64;
0012   auto numberOfBlocks = (maxNumberOfConcurrentFits_ + blockSize - 1) / blockSize;
0013 
0014   //  Fit internals
0015   auto tkidGPU =
0016       cms::cuda::make_device_unique<typename TrackerTraits::tindex_type[]>(maxNumberOfConcurrentFits_, stream);
0017   auto hitsGPU = cms::cuda::make_device_unique<double[]>(
0018       maxNumberOfConcurrentFits_ * sizeof(riemannFit::Matrix3xNd<6>) / sizeof(double), stream);
0019   auto hits_geGPU = cms::cuda::make_device_unique<float[]>(
0020       maxNumberOfConcurrentFits_ * sizeof(riemannFit::Matrix6xNf<6>) / sizeof(float), stream);
0021   auto fast_fit_resultsGPU = cms::cuda::make_device_unique<double[]>(
0022       maxNumberOfConcurrentFits_ * sizeof(riemannFit::Vector4d) / sizeof(double), stream);
0023 
0024   for (uint32_t offset = 0; offset < maxNumberOfTuples; offset += maxNumberOfConcurrentFits_) {
0025     // fit triplets
0026 
0027     kernel_BLFastFit<3, TrackerTraits><<<numberOfBlocks, blockSize, 0, stream>>>(tuples_,
0028                                                                                  tupleMultiplicity_,
0029                                                                                  hv,
0030                                                                                  tkidGPU.get(),
0031                                                                                  hitsGPU.get(),
0032                                                                                  hits_geGPU.get(),
0033                                                                                  fast_fit_resultsGPU.get(),
0034                                                                                  3,
0035                                                                                  3,
0036                                                                                  offset);
0037     cudaCheck(cudaGetLastError());
0038 
0039     kernel_BLFit<3, TrackerTraits><<<numberOfBlocks, blockSize, 0, stream>>>(tupleMultiplicity_,
0040                                                                              bField_,
0041                                                                              outputSoa_,
0042                                                                              tkidGPU.get(),
0043                                                                              hitsGPU.get(),
0044                                                                              hits_geGPU.get(),
0045                                                                              fast_fit_resultsGPU.get());
0046     cudaCheck(cudaGetLastError());
0047 
0048     if (fitNas4_) {
0049       // fit all as 4
0050       riemannFit::rolling_fits<4, TrackerTraits::maxHitsOnTrack, 1>([this,
0051                                                                      &hv,
0052                                                                      &tkidGPU,
0053                                                                      &hitsGPU,
0054                                                                      &hits_geGPU,
0055                                                                      &fast_fit_resultsGPU,
0056                                                                      &offset,
0057                                                                      &numberOfBlocks,
0058                                                                      &blockSize,
0059                                                                      &stream](auto i) {
0060         kernel_BLFastFit<4, TrackerTraits><<<numberOfBlocks / 4, blockSize, 0, stream>>>(tuples_,
0061                                                                                          tupleMultiplicity_,
0062                                                                                          hv,
0063                                                                                          tkidGPU.get(),
0064                                                                                          hitsGPU.get(),
0065                                                                                          hits_geGPU.get(),
0066                                                                                          fast_fit_resultsGPU.get(),
0067                                                                                          4,
0068                                                                                          4,
0069                                                                                          offset);
0070 
0071         cudaCheck(cudaGetLastError());
0072 
0073         kernel_BLFit<4, TrackerTraits><<<numberOfBlocks / 4, blockSize, 0, stream>>>(tupleMultiplicity_,
0074                                                                                      bField_,
0075                                                                                      outputSoa_,
0076                                                                                      tkidGPU.get(),
0077                                                                                      hitsGPU.get(),
0078                                                                                      hits_geGPU.get(),
0079                                                                                      fast_fit_resultsGPU.get());
0080 
0081         cudaCheck(cudaGetLastError());
0082       });
0083 
0084     } else {
0085       riemannFit::rolling_fits<4, TrackerTraits::maxHitsOnTrackForFullFit, 1>([this,
0086                                                                                &hv,
0087                                                                                &tkidGPU,
0088                                                                                &hitsGPU,
0089                                                                                &hits_geGPU,
0090                                                                                &fast_fit_resultsGPU,
0091                                                                                &offset,
0092                                                                                &numberOfBlocks,
0093                                                                                &blockSize,
0094                                                                                &stream](auto i) {
0095         kernel_BLFastFit<i, TrackerTraits><<<numberOfBlocks / 4, blockSize, 0, stream>>>(tuples_,
0096                                                                                          tupleMultiplicity_,
0097                                                                                          hv,
0098                                                                                          tkidGPU.get(),
0099                                                                                          hitsGPU.get(),
0100                                                                                          hits_geGPU.get(),
0101                                                                                          fast_fit_resultsGPU.get(),
0102                                                                                          i,
0103                                                                                          i,
0104                                                                                          offset);
0105 
0106         kernel_BLFit<i, TrackerTraits><<<8, blockSize, 0, stream>>>(tupleMultiplicity_,
0107                                                                     bField_,
0108                                                                     outputSoa_,
0109                                                                     tkidGPU.get(),
0110                                                                     hitsGPU.get(),
0111                                                                     hits_geGPU.get(),
0112                                                                     fast_fit_resultsGPU.get());
0113       });
0114 
0115       static_assert(TrackerTraits::maxHitsOnTrackForFullFit < TrackerTraits::maxHitsOnTrack);
0116 
0117       //Fit all the rest using the maximum from previous call
0118       kernel_BLFastFit<TrackerTraits::maxHitsOnTrackForFullFit, TrackerTraits>
0119           <<<numberOfBlocks / 4, blockSize, 0, stream>>>(tuples_,
0120                                                          tupleMultiplicity_,
0121                                                          hv,
0122                                                          tkidGPU.get(),
0123                                                          hitsGPU.get(),
0124                                                          hits_geGPU.get(),
0125                                                          fast_fit_resultsGPU.get(),
0126                                                          TrackerTraits::maxHitsOnTrackForFullFit,
0127                                                          TrackerTraits::maxHitsOnTrack - 1,
0128                                                          offset);
0129 
0130       kernel_BLFit<TrackerTraits::maxHitsOnTrackForFullFit, TrackerTraits>
0131           <<<8, blockSize, 0, stream>>>(tupleMultiplicity_,
0132                                         bField_,
0133                                         outputSoa_,
0134                                         tkidGPU.get(),
0135                                         hitsGPU.get(),
0136                                         hits_geGPU.get(),
0137                                         fast_fit_resultsGPU.get());
0138     }
0139 
0140   }  // loop on concurrent fits
0141 }
0142 
0143 template class HelixFitOnGPU<pixelTopology::Phase1>;
0144 template class HelixFitOnGPU<pixelTopology::Phase2>;
0145 template class HelixFitOnGPU<pixelTopology::HIonPhase1>;