Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2022-08-01 22:34:27

0001 //
0002 // Original Author: Felice Pantaleo, CERN
0003 //
0004 
0005 // #define GPU_DEBUG
0006 
0007 #include <array>
0008 #include <cassert>
0009 #include <functional>
0010 #include <vector>
0011 
0012 #include "DataFormats/Common/interface/Handle.h"
0013 #include "FWCore/Framework/interface/ConsumesCollector.h"
0014 #include "FWCore/Framework/interface/Event.h"
0015 #include "FWCore/MessageLogger/interface/MessageLogger.h"
0016 #include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
0017 #include "FWCore/ServiceRegistry/interface/Service.h"
0018 #include "FWCore/Utilities/interface/EDMException.h"
0019 #include "FWCore/Utilities/interface/isFinite.h"
0020 #include "HeterogeneousCore/CUDAServices/interface/CUDAService.h"
0021 #include "TrackingTools/DetLayers/interface/BarrelDetLayer.h"
0022 
0023 #include "CAHitNtupletGeneratorOnGPU.h"
0024 
0025 namespace {
0026 
0027   template <typename T>
0028   T sqr(T x) {
0029     return x * x;
0030   }
0031 
0032   cAHitNtupletGenerator::QualityCuts makeQualityCuts(edm::ParameterSet const& pset) {
0033     auto coeff = pset.getParameter<std::vector<double>>("chi2Coeff");
0034     auto ptMax = pset.getParameter<double>("chi2MaxPt");
0035     if (coeff.size() != 2) {
0036       throw edm::Exception(edm::errors::Configuration,
0037                            "CAHitNtupletGeneratorOnGPU.trackQualityCuts.chi2Coeff must have 2 elements");
0038     }
0039     coeff[1] = (coeff[1] - coeff[0]) / log2(ptMax);
0040     return cAHitNtupletGenerator::QualityCuts{// polynomial coefficients for the pT-dependent chi2 cut
0041                                               {(float)coeff[0], (float)coeff[1], 0.f, 0.f},
0042                                               // max pT used to determine the chi2 cut
0043                                               (float)ptMax,
0044                                               // chi2 scale factor: 8 for broken line fit, ?? for Riemann fit
0045                                               (float)pset.getParameter<double>("chi2Scale"),
0046                                               // regional cuts for triplets
0047                                               {(float)pset.getParameter<double>("tripletMaxTip"),
0048                                                (float)pset.getParameter<double>("tripletMinPt"),
0049                                                (float)pset.getParameter<double>("tripletMaxZip")},
0050                                               // regional cuts for quadruplets
0051                                               {(float)pset.getParameter<double>("quadrupletMaxTip"),
0052                                                (float)pset.getParameter<double>("quadrupletMinPt"),
0053                                                (float)pset.getParameter<double>("quadrupletMaxZip")}};
0054   }
0055 
0056 }  // namespace
0057 
0058 using namespace std;
0059 
0060 CAHitNtupletGeneratorOnGPU::CAHitNtupletGeneratorOnGPU(const edm::ParameterSet& cfg, edm::ConsumesCollector& iC)
0061     : m_params(cfg.getParameter<bool>("onGPU"),
0062                cfg.getParameter<unsigned int>("minHitsPerNtuplet"),
0063                cfg.getParameter<unsigned int>("maxNumberOfDoublets"),
0064                cfg.getParameter<unsigned int>("minHitsForSharingCut"),
0065                cfg.getParameter<bool>("useRiemannFit"),
0066                cfg.getParameter<bool>("fitNas4"),
0067                cfg.getParameter<bool>("includeJumpingForwardDoublets"),
0068                cfg.getParameter<bool>("earlyFishbone"),
0069                cfg.getParameter<bool>("lateFishbone"),
0070                cfg.getParameter<bool>("idealConditions"),
0071                cfg.getParameter<bool>("fillStatistics"),
0072                cfg.getParameter<bool>("doClusterCut"),
0073                cfg.getParameter<bool>("doZ0Cut"),
0074                cfg.getParameter<bool>("doPtCut"),
0075                cfg.getParameter<bool>("doSharedHitCut"),
0076                cfg.getParameter<bool>("dupPassThrough"),
0077                cfg.getParameter<bool>("useSimpleTripletCleaner"),
0078                cfg.getParameter<double>("ptmin"),
0079                cfg.getParameter<double>("CAThetaCutBarrel"),
0080                cfg.getParameter<double>("CAThetaCutForward"),
0081                cfg.getParameter<double>("hardCurvCut"),
0082                cfg.getParameter<double>("dcaCutInnerTriplet"),
0083                cfg.getParameter<double>("dcaCutOuterTriplet"),
0084                makeQualityCuts(cfg.getParameterSet("trackQualityCuts"))) {
0085 #ifdef DUMP_GPU_TK_TUPLES
0086   printf("TK: %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s\n",
0087          "tid",
0088          "qual",
0089          "nh",
0090          "nl",
0091          "charge",
0092          "pt",
0093          "eta",
0094          "phi",
0095          "tip",
0096          "zip",
0097          "chi2",
0098          "h1",
0099          "h2",
0100          "h3",
0101          "h4",
0102          "h5",
0103          "hn");
0104 #endif
0105 }
0106 
0107 void CAHitNtupletGeneratorOnGPU::fillDescriptions(edm::ParameterSetDescription& desc) {
0108   // 87 cm/GeV = 1/(3.8T * 0.3)
0109   // take less than radius given by the hardPtCut and reject everything below
0110   // auto hardCurvCut = 1.f/(0.35 * 87.f);
0111   desc.add<double>("ptmin", 0.9f)->setComment("Cut on minimum pt");
0112   desc.add<double>("CAThetaCutBarrel", 0.002f)->setComment("Cut on RZ alignement for Barrel");
0113   desc.add<double>("CAThetaCutForward", 0.003f)->setComment("Cut on RZ alignment for Forward");
0114   desc.add<double>("hardCurvCut", 1.f / (0.35 * 87.f))->setComment("Cut on minimum curvature");
0115   desc.add<double>("dcaCutInnerTriplet", 0.15f)->setComment("Cut on origin radius when the inner hit is on BPix1");
0116   desc.add<double>("dcaCutOuterTriplet", 0.25f)->setComment("Cut on origin radius when the outer hit is on BPix1");
0117   desc.add<bool>("earlyFishbone", true);
0118   desc.add<bool>("lateFishbone", false);
0119   desc.add<bool>("idealConditions", true);
0120   desc.add<bool>("fillStatistics", false);
0121   desc.add<unsigned int>("minHitsPerNtuplet", 4);
0122   desc.add<unsigned int>("maxNumberOfDoublets", caConstants::maxNumberOfDoublets);
0123   desc.add<unsigned int>("minHitsForSharingCut", 10)
0124       ->setComment("Maximum number of hits in a tuple to clean also if the shared hit is on bpx1");
0125   desc.add<bool>("includeJumpingForwardDoublets", false);
0126   desc.add<bool>("fitNas4", false)->setComment("fit only 4 hits out of N");
0127   desc.add<bool>("doClusterCut", true);
0128   desc.add<bool>("doZ0Cut", true);
0129   desc.add<bool>("doPtCut", true);
0130   desc.add<bool>("useRiemannFit", false)->setComment("true for Riemann, false for BrokenLine");
0131   desc.add<bool>("doSharedHitCut", true)->setComment("Sharing hit nTuples cleaning");
0132   desc.add<bool>("dupPassThrough", false)->setComment("Do not reject duplicate");
0133   desc.add<bool>("useSimpleTripletCleaner", true)->setComment("use alternate implementation");
0134 
0135   edm::ParameterSetDescription trackQualityCuts;
0136   trackQualityCuts.add<double>("chi2MaxPt", 10.)->setComment("max pT used to determine the pT-dependent chi2 cut");
0137   trackQualityCuts.add<std::vector<double>>("chi2Coeff", {0.9, 1.8})->setComment("chi2 at 1GeV and at ptMax above");
0138   trackQualityCuts.add<double>("chi2Scale", 8.)
0139       ->setComment(
0140           "Factor to multiply the pT-dependent chi2 cut (currently: 8 for the broken line fit, ?? for the Riemann "
0141           "fit)");
0142   trackQualityCuts.add<double>("tripletMinPt", 0.5)->setComment("Min pT for triplets, in GeV");
0143   trackQualityCuts.add<double>("tripletMaxTip", 0.3)->setComment("Max |Tip| for triplets, in cm");
0144   trackQualityCuts.add<double>("tripletMaxZip", 12.)->setComment("Max |Zip| for triplets, in cm");
0145   trackQualityCuts.add<double>("quadrupletMinPt", 0.3)->setComment("Min pT for quadruplets, in GeV");
0146   trackQualityCuts.add<double>("quadrupletMaxTip", 0.5)->setComment("Max |Tip| for quadruplets, in cm");
0147   trackQualityCuts.add<double>("quadrupletMaxZip", 12.)->setComment("Max |Zip| for quadruplets, in cm");
0148   desc.add<edm::ParameterSetDescription>("trackQualityCuts", trackQualityCuts)
0149       ->setComment(
0150           "Quality cuts based on the results of the track fit:\n  - apply a pT-dependent chi2 cut;\n  - apply \"region "
0151           "cuts\" based on the fit results (pT, Tip, Zip).");
0152 }
0153 
0154 void CAHitNtupletGeneratorOnGPU::beginJob() {
0155   if (m_params.onGPU_) {
0156     // allocate pinned host memory only if CUDA is available
0157     edm::Service<CUDAService> cs;
0158     if (cs and cs->enabled()) {
0159       cudaCheck(cudaMalloc(&m_counters, sizeof(Counters)));
0160       cudaCheck(cudaMemset(m_counters, 0, sizeof(Counters)));
0161     }
0162   } else {
0163     m_counters = new Counters();
0164     memset(m_counters, 0, sizeof(Counters));
0165   }
0166 }
0167 
0168 void CAHitNtupletGeneratorOnGPU::endJob() {
0169   if (m_params.onGPU_) {
0170     // print the gpu statistics and free pinned host memory only if CUDA is available
0171     edm::Service<CUDAService> cs;
0172     if (cs and cs->enabled()) {
0173       if (m_params.doStats_) {
0174         // crash on multi-gpu processes
0175         CAHitNtupletGeneratorKernelsGPU::printCounters(m_counters);
0176       }
0177       cudaFree(m_counters);
0178     }
0179   } else {
0180     if (m_params.doStats_) {
0181       CAHitNtupletGeneratorKernelsCPU::printCounters(m_counters);
0182     }
0183     delete m_counters;
0184   }
0185 }
0186 
0187 PixelTrackHeterogeneous CAHitNtupletGeneratorOnGPU::makeTuplesAsync(TrackingRecHit2DGPU const& hits_d,
0188                                                                     float bfield,
0189                                                                     cudaStream_t stream) const {
0190   PixelTrackHeterogeneous tracks(cms::cuda::make_device_unique<pixelTrack::TrackSoA>(stream));
0191 
0192   auto* soa = tracks.get();
0193   assert(soa);
0194 
0195   CAHitNtupletGeneratorKernelsGPU kernels(m_params);
0196   kernels.setCounters(m_counters);
0197   kernels.allocateOnGPU(hits_d.nHits(), stream);
0198 
0199   kernels.buildDoublets(hits_d, stream);
0200   kernels.launchKernels(hits_d, soa, stream);
0201 
0202   HelixFitOnGPU fitter(bfield, m_params.fitNas4_);
0203   fitter.allocateOnGPU(&(soa->hitIndices), kernels.tupleMultiplicity(), soa);
0204   if (m_params.useRiemannFit_) {
0205     fitter.launchRiemannKernels(hits_d.view(), hits_d.nHits(), caConstants::maxNumberOfQuadruplets, stream);
0206   } else {
0207     fitter.launchBrokenLineKernels(hits_d.view(), hits_d.nHits(), caConstants::maxNumberOfQuadruplets, stream);
0208   }
0209   kernels.classifyTuples(hits_d, soa, stream);
0210 
0211 #ifdef GPU_DEBUG
0212   cudaDeviceSynchronize();
0213   cudaCheck(cudaGetLastError());
0214   std::cout << "finished building pixel tracks on GPU" << std::endl;
0215 #endif
0216 
0217   return tracks;
0218 }
0219 
0220 PixelTrackHeterogeneous CAHitNtupletGeneratorOnGPU::makeTuples(TrackingRecHit2DCPU const& hits_d, float bfield) const {
0221   PixelTrackHeterogeneous tracks(std::make_unique<pixelTrack::TrackSoA>());
0222 
0223   auto* soa = tracks.get();
0224   assert(soa);
0225 
0226   CAHitNtupletGeneratorKernelsCPU kernels(m_params);
0227   kernels.setCounters(m_counters);
0228   kernels.allocateOnGPU(hits_d.nHits(), nullptr);
0229 
0230   kernels.buildDoublets(hits_d, nullptr);
0231   kernels.launchKernels(hits_d, soa, nullptr);
0232 
0233   if (0 == hits_d.nHits())
0234     return tracks;
0235 
0236   // now fit
0237   HelixFitOnGPU fitter(bfield, m_params.fitNas4_);
0238   fitter.allocateOnGPU(&(soa->hitIndices), kernels.tupleMultiplicity(), soa);
0239 
0240   if (m_params.useRiemannFit_) {
0241     fitter.launchRiemannKernelsOnCPU(hits_d.view(), hits_d.nHits(), caConstants::maxNumberOfQuadruplets);
0242   } else {
0243     fitter.launchBrokenLineKernelsOnCPU(hits_d.view(), hits_d.nHits(), caConstants::maxNumberOfQuadruplets);
0244   }
0245 
0246   kernels.classifyTuples(hits_d, soa, nullptr);
0247 
0248 #ifdef GPU_DEBUG
0249   std::cout << "finished building pixel tracks on CPU" << std::endl;
0250 #endif
0251 
0252   // check that the fixed-size SoA does not overflow
0253   auto const& tsoa = *soa;
0254   auto maxTracks = tsoa.stride();
0255   auto nTracks = tsoa.nTracks();
0256   assert(nTracks < maxTracks);
0257   if (nTracks == maxTracks - 1) {
0258     edm::LogWarning("PixelTracks") << "Unsorted reconstructed pixel tracks truncated to " << maxTracks - 1
0259                                    << " candidates";
0260   }
0261 
0262   return tracks;
0263 }