File indexing completed on 2024-04-06 12:28:35
0001 #include "RiemannFitOnGPU.h"
0002 #include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
0003
0004 template <typename TrackerTraits>
0005 void HelixFitOnGPU<TrackerTraits>::launchRiemannKernels(const TrackingRecHitSoAConstView<TrackerTraits> &hv,
0006 uint32_t nhits,
0007 uint32_t maxNumberOfTuples,
0008 cudaStream_t stream) {
0009 assert(tuples_);
0010
0011 auto blockSize = 64;
0012 auto numberOfBlocks = (maxNumberOfConcurrentFits_ + blockSize - 1) / blockSize;
0013
0014
0015 auto hitsGPU = cms::cuda::make_device_unique<double[]>(
0016 maxNumberOfConcurrentFits_ * sizeof(riemannFit::Matrix3xNd<4>) / sizeof(double), stream);
0017 auto hits_geGPU = cms::cuda::make_device_unique<float[]>(
0018 maxNumberOfConcurrentFits_ * sizeof(riemannFit::Matrix6x4f) / sizeof(float), stream);
0019 auto fast_fit_resultsGPU = cms::cuda::make_device_unique<double[]>(
0020 maxNumberOfConcurrentFits_ * sizeof(riemannFit::Vector4d) / sizeof(double), stream);
0021 auto circle_fit_resultsGPU_holder =
0022 cms::cuda::make_device_unique<char[]>(maxNumberOfConcurrentFits_ * sizeof(riemannFit::CircleFit), stream);
0023 riemannFit::CircleFit *circle_fit_resultsGPU_ = (riemannFit::CircleFit *)(circle_fit_resultsGPU_holder.get());
0024
0025 for (uint32_t offset = 0; offset < maxNumberOfTuples; offset += maxNumberOfConcurrentFits_) {
0026
0027 kernel_FastFit<3, TrackerTraits><<<numberOfBlocks, blockSize, 0, stream>>>(
0028 tuples_, tupleMultiplicity_, 3, hv, hitsGPU.get(), hits_geGPU.get(), fast_fit_resultsGPU.get(), offset);
0029 cudaCheck(cudaGetLastError());
0030
0031 kernel_CircleFit<3, TrackerTraits><<<numberOfBlocks, blockSize, 0, stream>>>(tupleMultiplicity_,
0032 3,
0033 bField_,
0034 hitsGPU.get(),
0035 hits_geGPU.get(),
0036 fast_fit_resultsGPU.get(),
0037 circle_fit_resultsGPU_,
0038 offset);
0039 cudaCheck(cudaGetLastError());
0040
0041 kernel_LineFit<3, TrackerTraits><<<numberOfBlocks, blockSize, 0, stream>>>(tupleMultiplicity_,
0042 3,
0043 bField_,
0044 outputSoa_,
0045 hitsGPU.get(),
0046 hits_geGPU.get(),
0047 fast_fit_resultsGPU.get(),
0048 circle_fit_resultsGPU_,
0049 offset);
0050 cudaCheck(cudaGetLastError());
0051
0052
0053 kernel_FastFit<4, TrackerTraits><<<numberOfBlocks / 4, blockSize, 0, stream>>>(
0054 tuples_, tupleMultiplicity_, 4, hv, hitsGPU.get(), hits_geGPU.get(), fast_fit_resultsGPU.get(), offset);
0055 cudaCheck(cudaGetLastError());
0056
0057 kernel_CircleFit<4, TrackerTraits><<<numberOfBlocks / 4, blockSize, 0, stream>>>(tupleMultiplicity_,
0058 4,
0059 bField_,
0060 hitsGPU.get(),
0061 hits_geGPU.get(),
0062 fast_fit_resultsGPU.get(),
0063 circle_fit_resultsGPU_,
0064 offset);
0065 cudaCheck(cudaGetLastError());
0066
0067 kernel_LineFit<4, TrackerTraits><<<numberOfBlocks / 4, blockSize, 0, stream>>>(tupleMultiplicity_,
0068 4,
0069 bField_,
0070 outputSoa_,
0071 hitsGPU.get(),
0072 hits_geGPU.get(),
0073 fast_fit_resultsGPU.get(),
0074 circle_fit_resultsGPU_,
0075 offset);
0076 cudaCheck(cudaGetLastError());
0077
0078 if (fitNas4_) {
0079
0080 kernel_FastFit<4, TrackerTraits><<<numberOfBlocks / 4, blockSize, 0, stream>>>(
0081 tuples_, tupleMultiplicity_, 5, hv, hitsGPU.get(), hits_geGPU.get(), fast_fit_resultsGPU.get(), offset);
0082 cudaCheck(cudaGetLastError());
0083
0084 kernel_CircleFit<4, TrackerTraits><<<numberOfBlocks / 4, blockSize, 0, stream>>>(tupleMultiplicity_,
0085 5,
0086 bField_,
0087 hitsGPU.get(),
0088 hits_geGPU.get(),
0089 fast_fit_resultsGPU.get(),
0090 circle_fit_resultsGPU_,
0091 offset);
0092 cudaCheck(cudaGetLastError());
0093
0094 kernel_LineFit<4, TrackerTraits><<<numberOfBlocks / 4, blockSize, 0, stream>>>(tupleMultiplicity_,
0095 5,
0096 bField_,
0097 outputSoa_,
0098 hitsGPU.get(),
0099 hits_geGPU.get(),
0100 fast_fit_resultsGPU.get(),
0101 circle_fit_resultsGPU_,
0102 offset);
0103 cudaCheck(cudaGetLastError());
0104 } else {
0105
0106 kernel_FastFit<5, TrackerTraits><<<numberOfBlocks / 4, blockSize, 0, stream>>>(
0107 tuples_, tupleMultiplicity_, 5, hv, hitsGPU.get(), hits_geGPU.get(), fast_fit_resultsGPU.get(), offset);
0108 cudaCheck(cudaGetLastError());
0109
0110 kernel_CircleFit<5, TrackerTraits><<<numberOfBlocks / 4, blockSize, 0, stream>>>(tupleMultiplicity_,
0111 5,
0112 bField_,
0113 hitsGPU.get(),
0114 hits_geGPU.get(),
0115 fast_fit_resultsGPU.get(),
0116 circle_fit_resultsGPU_,
0117 offset);
0118 cudaCheck(cudaGetLastError());
0119
0120 kernel_LineFit<5, TrackerTraits><<<numberOfBlocks / 4, blockSize, 0, stream>>>(tupleMultiplicity_,
0121 5,
0122 bField_,
0123 outputSoa_,
0124 hitsGPU.get(),
0125 hits_geGPU.get(),
0126 fast_fit_resultsGPU.get(),
0127 circle_fit_resultsGPU_,
0128 offset);
0129 cudaCheck(cudaGetLastError());
0130 }
0131 }
0132 }