Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2022-04-04 00:15:51

0001 #ifndef RecoPixelVertexing_PixelTriplets_plugins_CAHitNtupletGeneratorKernels_h
0002 #define RecoPixelVertexing_PixelTriplets_plugins_CAHitNtupletGeneratorKernels_h
0003 
0004 // #define GPU_DEBUG
0005 
0006 #include "CUDADataFormats/Track/interface/PixelTrackHeterogeneous.h"
0007 #include "GPUCACell.h"
0008 
0009 // #define DUMP_GPU_TK_TUPLES
0010 
0011 namespace cAHitNtupletGenerator {
0012 
0013   // counters
0014   struct Counters {
0015     unsigned long long nEvents;
0016     unsigned long long nHits;
0017     unsigned long long nCells;
0018     unsigned long long nTuples;
0019     unsigned long long nFitTracks;
0020     unsigned long long nLooseTracks;
0021     unsigned long long nGoodTracks;
0022     unsigned long long nUsedHits;
0023     unsigned long long nDupHits;
0024     unsigned long long nFishCells;
0025     unsigned long long nKilledCells;
0026     unsigned long long nEmptyCells;
0027     unsigned long long nZeroTrackCells;
0028   };
0029 
0030   using HitsView = TrackingRecHit2DSOAView;
0031   using HitsOnGPU = TrackingRecHit2DSOAView;
0032 
0033   using HitToTuple = caConstants::HitToTuple;
0034   using TupleMultiplicity = caConstants::TupleMultiplicity;
0035 
0036   using Quality = pixelTrack::Quality;
0037   using TkSoA = pixelTrack::TrackSoA;
0038   using HitContainer = pixelTrack::HitContainer;
0039 
0040   struct QualityCuts {
0041     // chi2 cut = chi2Scale * (chi2Coeff[0] + pT/GeV * (chi2Coeff[1] + pT/GeV * (chi2Coeff[2] + pT/GeV * chi2Coeff[3])))
0042     float chi2Coeff[4];
0043     float chi2MaxPt;  // GeV
0044     float chi2Scale;
0045 
0046     struct Region {
0047       float maxTip;  // cm
0048       float minPt;   // GeV
0049       float maxZip;  // cm
0050     };
0051 
0052     Region triplet;
0053     Region quadruplet;
0054   };
0055 
0056   // params (FIXME: thi si a POD: so no constructor no traling _  and no const as params_ is already const)
0057   struct Params {
0058     Params(bool onGPU,
0059            uint32_t minHitsPerNtuplet,
0060            uint32_t maxNumberOfDoublets,
0061            uint16_t minHitsForSharingCuts,
0062            bool useRiemannFit,
0063            bool fitNas4,
0064            bool includeJumpingForwardDoublets,
0065            bool earlyFishbone,
0066            bool lateFishbone,
0067            bool idealConditions,
0068            bool doStats,
0069            bool doClusterCut,
0070            bool doZ0Cut,
0071            bool doPtCut,
0072            bool doSharedHitCut,
0073            bool dupPassThrough,
0074            bool useSimpleTripletCleaner,
0075            float ptmin,
0076            float CAThetaCutBarrel,
0077            float CAThetaCutForward,
0078            float hardCurvCut,
0079            float dcaCutInnerTriplet,
0080            float dcaCutOuterTriplet,
0081 
0082            QualityCuts const& cuts)
0083         : onGPU_(onGPU),
0084           minHitsPerNtuplet_(minHitsPerNtuplet),
0085           maxNumberOfDoublets_(maxNumberOfDoublets),
0086           minHitsForSharingCut_(minHitsForSharingCuts),
0087           useRiemannFit_(useRiemannFit),
0088           fitNas4_(fitNas4),
0089           includeJumpingForwardDoublets_(includeJumpingForwardDoublets),
0090           earlyFishbone_(earlyFishbone),
0091           lateFishbone_(lateFishbone),
0092           idealConditions_(idealConditions),
0093           doStats_(doStats),
0094           doClusterCut_(doClusterCut),
0095           doZ0Cut_(doZ0Cut),
0096           doPtCut_(doPtCut),
0097           doSharedHitCut_(doSharedHitCut),
0098           dupPassThrough_(dupPassThrough),
0099           useSimpleTripletCleaner_(useSimpleTripletCleaner),
0100           ptmin_(ptmin),
0101           CAThetaCutBarrel_(CAThetaCutBarrel),
0102           CAThetaCutForward_(CAThetaCutForward),
0103           hardCurvCut_(hardCurvCut),
0104           dcaCutInnerTriplet_(dcaCutInnerTriplet),
0105           dcaCutOuterTriplet_(dcaCutOuterTriplet),
0106           cuts_(cuts) {}
0107 
0108     const bool onGPU_;
0109     const uint32_t minHitsPerNtuplet_;
0110     const uint32_t maxNumberOfDoublets_;
0111     const uint16_t minHitsForSharingCut_;
0112     const bool useRiemannFit_;
0113     const bool fitNas4_;
0114     const bool includeJumpingForwardDoublets_;
0115     const bool earlyFishbone_;
0116     const bool lateFishbone_;
0117     const bool idealConditions_;
0118     const bool doStats_;
0119     const bool doClusterCut_;
0120     const bool doZ0Cut_;
0121     const bool doPtCut_;
0122     const bool doSharedHitCut_;
0123     const bool dupPassThrough_;
0124     const bool useSimpleTripletCleaner_;
0125     const float ptmin_;
0126     const float CAThetaCutBarrel_;
0127     const float CAThetaCutForward_;
0128     const float hardCurvCut_;
0129     const float dcaCutInnerTriplet_;
0130     const float dcaCutOuterTriplet_;
0131 
0132     // quality cuts
0133     QualityCuts cuts_{// polynomial coefficients for the pT-dependent chi2 cut
0134                       {0.68177776, 0.74609577, -0.08035491, 0.00315399},
0135                       // max pT used to determine the chi2 cut
0136                       10.,
0137                       // chi2 scale factor: 30 for broken line fit, 45 for Riemann fit
0138                       30.,
0139                       // regional cuts for triplets
0140                       {
0141                           0.3,  // |Tip| < 0.3 cm
0142                           0.5,  // pT > 0.5 GeV
0143                           12.0  // |Zip| < 12.0 cm
0144                       },
0145                       // regional cuts for quadruplets
0146                       {
0147                           0.5,  // |Tip| < 0.5 cm
0148                           0.3,  // pT > 0.3 GeV
0149                           12.0  // |Zip| < 12.0 cm
0150                       }};
0151 
0152   };  // Params
0153 
0154 }  // namespace cAHitNtupletGenerator
0155 
0156 template <typename TTraits>
0157 class CAHitNtupletGeneratorKernels {
0158 public:
0159   using Traits = TTraits;
0160 
0161   using QualityCuts = cAHitNtupletGenerator::QualityCuts;
0162   using Params = cAHitNtupletGenerator::Params;
0163   using Counters = cAHitNtupletGenerator::Counters;
0164 
0165   template <typename T>
0166   using unique_ptr = typename Traits::template unique_ptr<T>;
0167 
0168   using HitsView = TrackingRecHit2DSOAView;
0169   using HitsOnGPU = TrackingRecHit2DSOAView;
0170   using HitsOnCPU = TrackingRecHit2DHeterogeneous<Traits>;
0171 
0172   using HitToTuple = caConstants::HitToTuple;
0173   using TupleMultiplicity = caConstants::TupleMultiplicity;
0174 
0175   using Quality = pixelTrack::Quality;
0176   using TkSoA = pixelTrack::TrackSoA;
0177   using HitContainer = pixelTrack::HitContainer;
0178 
0179   CAHitNtupletGeneratorKernels(Params const& params)
0180       : params_(params), paramsMaxDoubletes3Quarters_(3 * params.maxNumberOfDoublets_ / 4) {}
0181   ~CAHitNtupletGeneratorKernels() = default;
0182 
0183   TupleMultiplicity const* tupleMultiplicity() const { return device_tupleMultiplicity_.get(); }
0184 
0185   void launchKernels(HitsOnCPU const& hh, TkSoA* tuples_d, cudaStream_t cudaStream);
0186 
0187   void classifyTuples(HitsOnCPU const& hh, TkSoA* tuples_d, cudaStream_t cudaStream);
0188 
0189   void buildDoublets(HitsOnCPU const& hh, cudaStream_t stream);
0190   void allocateOnGPU(int32_t nHits, cudaStream_t stream);
0191   void cleanup(cudaStream_t cudaStream);
0192 
0193   static void printCounters(Counters const* counters);
0194   void setCounters(Counters* counters) { counters_ = counters; }
0195 
0196 private:
0197   Counters* counters_ = nullptr;
0198 
0199   // workspace
0200   unique_ptr<unsigned char[]> cellStorage_;
0201   unique_ptr<caConstants::CellNeighborsVector> device_theCellNeighbors_;
0202   caConstants::CellNeighbors* device_theCellNeighborsContainer_;
0203   unique_ptr<caConstants::CellTracksVector> device_theCellTracks_;
0204   caConstants::CellTracks* device_theCellTracksContainer_;
0205 
0206   unique_ptr<GPUCACell[]> device_theCells_;
0207   unique_ptr<GPUCACell::OuterHitOfCellContainer[]> device_isOuterHitOfCell_;
0208   GPUCACell::OuterHitOfCell isOuterHitOfCell_;
0209   uint32_t* device_nCells_ = nullptr;
0210 
0211   unique_ptr<HitToTuple> device_hitToTuple_;
0212   unique_ptr<HitToTuple::Counter[]> device_hitToTupleStorage_;
0213   HitToTuple::View hitToTupleView_;
0214 
0215   cms::cuda::AtomicPairCounter* device_hitToTuple_apc_ = nullptr;
0216 
0217   cms::cuda::AtomicPairCounter* device_hitTuple_apc_ = nullptr;
0218 
0219   unique_ptr<TupleMultiplicity> device_tupleMultiplicity_;
0220 
0221   unique_ptr<cms::cuda::AtomicPairCounter::c_type[]> device_storage_;
0222   // params
0223   Params const& params_;
0224   /// Intermediate result avoiding repeated computations.
0225   const uint32_t paramsMaxDoubletes3Quarters_;
0226   /// Compute the number of doublet blocks for block size
0227   inline uint32_t nDoubletBlocks(uint32_t blockSize) {
0228     // We want (3 * params_.maxNumberOfDoublets_ / 4 + blockSize - 1) / blockSize, but first part is pre-computed.
0229     return (paramsMaxDoubletes3Quarters_ + blockSize - 1) / blockSize;
0230   }
0231 
0232   /// Compute the number of quadruplet blocks for block size
0233   inline uint32_t nQuadrupletBlocks(uint32_t blockSize) {
0234     // caConstants::maxNumberOfQuadruplets is a constexpr, so the compiler will pre compute the 3*max/4
0235     return (3 * caConstants::maxNumberOfQuadruplets / 4 + blockSize - 1) / blockSize;
0236   }
0237 };
0238 
0239 using CAHitNtupletGeneratorKernelsGPU = CAHitNtupletGeneratorKernels<cms::cudacompat::GPUTraits>;
0240 using CAHitNtupletGeneratorKernelsCPU = CAHitNtupletGeneratorKernels<cms::cudacompat::CPUTraits>;
0241 
0242 #endif  // RecoPixelVertexing_PixelTriplets_plugins_CAHitNtupletGeneratorKernels_h