Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2023-10-25 09:58:46

0001 #include "RecoBTag/FeatureTools/interface/deep_helpers.h"
0002 #include "DataFormats/PatCandidates/interface/PackedCandidate.h"
0003 #include "FWCore/Utilities/interface/isFinite.h"
0004 #include "TrackingTools/Records/interface/TransientTrackRecord.h"
0005 #include "TrackingTools/TrajectoryState/interface/TrajectoryStateOnSurface.h"
0006 #include "TrackingTools/GeomPropagators/interface/AnalyticalTrajectoryExtrapolatorToLine.h"
0007 #include "TrackingTools/GeomPropagators/interface/AnalyticalImpactPointExtrapolator.h"
0008 #include "DataFormats/VertexReco/interface/VertexFwd.h"
0009 #include "DataFormats/VertexReco/interface/Vertex.h"
0010 #include "DataFormats/Candidate/interface/VertexCompositePtrCandidate.h"
0011 #include "DataFormats/TrackReco/interface/Track.h"
0012 #include "TLorentzVector.h"
0013 #include "DataFormats/Math/interface/deltaR.h"
0014 #include "RecoVertex/VertexTools/interface/VertexDistance3D.h"
0015 #include "RecoVertex/VertexTools/interface/VertexDistanceXY.h"
0016 #include "RecoVertex/VertexPrimitives/interface/ConvertToFromReco.h"
0017 #include "RecoVertex/VertexPrimitives/interface/VertexState.h"
0018 namespace btagbtvdeep {
0019 
0020   constexpr static int qualityMap[8] = {1, 0, 1, 1, 4, 4, 5, 6};
0021 
0022   enum qualityFlagsShiftsAndMasks {
0023     assignmentQualityMask = 0x7,
0024     assignmentQualityShift = 0,
0025     trackHighPurityMask = 0x8,
0026     trackHighPurityShift = 3,
0027     lostInnerHitsMask = 0x30,
0028     lostInnerHitsShift = 4,
0029     muonFlagsMask = 0x0600,
0030     muonFlagsShift = 9
0031   };
0032 
0033   // remove infs and NaNs with value  (adapted from DeepNTuples)
0034   const float catch_infs(const float in, const float replace_value) {
0035     if (edm::isNotFinite(in))
0036       return replace_value;
0037     if (in < -1e32 || in > 1e32)
0038       return replace_value;
0039     return in;
0040   }
0041 
0042   // remove infs/NaN and bound (adapted from DeepNTuples)
0043   const float catch_infs_and_bound(const float in,
0044                                    const float replace_value,
0045                                    const float lowerbound,
0046                                    const float upperbound,
0047                                    const float offset,
0048                                    const bool use_offsets) {
0049     float withoutinfs = catch_infs(in, replace_value);
0050     if (withoutinfs + offset < lowerbound)
0051       return lowerbound;
0052     if (withoutinfs + offset > upperbound)
0053       return upperbound;
0054     if (use_offsets)
0055       withoutinfs += offset;
0056     return withoutinfs;
0057   }
0058 
0059   // 2D distance between SV and PV (adapted from DeepNTuples)
0060   Measurement1D vertexDxy(const reco::VertexCompositePtrCandidate &svcand, const reco::Vertex &pv) {
0061     VertexDistanceXY dist;
0062     reco::Vertex::CovarianceMatrix csv;
0063     svcand.fillVertexCovariance(csv);
0064     reco::Vertex svtx(svcand.vertex(), csv);
0065     return dist.distance(svtx, pv);
0066   }
0067 
0068   //3D distance between SV and PV (adapted from DeepNTuples)
0069   Measurement1D vertexD3d(const reco::VertexCompositePtrCandidate &svcand, const reco::Vertex &pv) {
0070     VertexDistance3D dist;
0071     reco::Vertex::CovarianceMatrix csv;
0072     svcand.fillVertexCovariance(csv);
0073     reco::Vertex svtx(svcand.vertex(), csv);
0074     return dist.distance(svtx, pv);
0075   }
0076 
0077   // dot product between SV and PV (adapted from DeepNTuples)
0078   float vertexDdotP(const reco::VertexCompositePtrCandidate &sv, const reco::Vertex &pv) {
0079     reco::Candidate::Vector p = sv.momentum();
0080     reco::Candidate::Vector d(sv.vx() - pv.x(), sv.vy() - pv.y(), sv.vz() - pv.z());
0081     return p.Unit().Dot(d.Unit());
0082   }
0083 
0084   // compute minimum dr between SVs and a candidate (from DeepNTuples, now polymorphic)
0085   float mindrsvpfcand(const std::vector<reco::VertexCompositePtrCandidate> &svs,
0086                       const reco::Candidate *cand,
0087                       float mindr) {
0088     for (unsigned int i0 = 0; i0 < svs.size(); ++i0) {
0089       float tempdr = reco::deltaR(svs[i0], *cand);
0090       if (tempdr < mindr) {
0091         mindr = tempdr;
0092       }
0093     }
0094     return mindr;
0095   }
0096 
0097   // compute minimum distance between SVs and a candidate (from DeepNTuples, now polymorphic)
0098   float mindistsvpfcand(const std::vector<reco::VertexCompositePtrCandidate> &svs, const reco::TransientTrack track) {
0099     float mindist_ = 999.999;
0100     float out_dist = 0.0;
0101     for (unsigned int i = 0; i < svs.size(); ++i) {
0102       if (!track.isValid()) {
0103         continue;
0104       }
0105       reco::Vertex::CovarianceMatrix csv;
0106       svs[i].fillVertexCovariance(csv);
0107       reco::Vertex vertex(svs[i].vertex(), csv);
0108       if (!vertex.isValid()) {
0109         continue;
0110       }
0111 
0112       GlobalVector direction(svs[i].px(), svs[i].py(), svs[i].pz());
0113 
0114       AnalyticalImpactPointExtrapolator extrapolator(track.field());
0115       TrajectoryStateOnSurface tsos =
0116           extrapolator.extrapolate(track.impactPointState(), RecoVertex::convertPos(vertex.position()));
0117 
0118       VertexDistance3D dist;
0119 
0120       if (!tsos.isValid()) {
0121         continue;
0122       }
0123       GlobalPoint refPoint = tsos.globalPosition();
0124       GlobalError refPointErr = tsos.cartesianError().position();
0125       GlobalPoint vertexPosition = RecoVertex::convertPos(vertex.position());
0126       GlobalError vertexPositionErr = RecoVertex::convertError(vertex.error());
0127 
0128       std::pair<bool, Measurement1D> result(
0129           true, dist.distance(VertexState(vertexPosition, vertexPositionErr), VertexState(refPoint, refPointErr)));
0130       if (!result.first) {
0131         continue;
0132       }
0133 
0134       GlobalPoint impactPoint = tsos.globalPosition();
0135       GlobalVector IPVec(impactPoint.x() - vertex.x(), impactPoint.y() - vertex.y(), impactPoint.z() - vertex.z());
0136       double prod = IPVec.dot(direction);
0137       double sign = (prod >= 0) ? 1. : -1.;
0138 
0139       if (result.second.value() < mindist_) {
0140         out_dist = sign * result.second.value();
0141         mindist_ = result.second.value();
0142       }
0143     }
0144     return out_dist;
0145   }
0146 
0147   // instantiate template
0148   template bool sv_vertex_comparator<reco::VertexCompositePtrCandidate, reco::Vertex>(
0149       const reco::VertexCompositePtrCandidate &, const reco::VertexCompositePtrCandidate &, const reco::Vertex &);
0150 
0151   float vtx_ass_from_pfcand(const reco::PFCandidate &pfcand, int pv_ass_quality, const reco::VertexRef &pv) {
0152     float vtx_ass = pat::PackedCandidate::PVAssociationQuality(qualityMap[pv_ass_quality]);
0153     if (pfcand.trackRef().isNonnull() && pv->trackWeight(pfcand.trackRef()) > 0.5 && pv_ass_quality == 7) {
0154       vtx_ass = pat::PackedCandidate::UsedInFitTight;
0155     }
0156     return vtx_ass;
0157   }
0158 
0159   float quality_from_pfcand(const reco::PFCandidate &pfcand) {
0160     const auto &pseudo_track = (pfcand.bestTrack()) ? *pfcand.bestTrack() : reco::Track();
0161     // conditions from PackedCandidate producer
0162     bool highPurity = pfcand.trackRef().isNonnull() && pseudo_track.quality(reco::Track::highPurity);
0163     // do same bit operations than in PackedCandidate
0164     uint16_t qualityFlags = 0;
0165     qualityFlags = (qualityFlags & ~trackHighPurityMask) | ((highPurity << trackHighPurityShift) & trackHighPurityMask);
0166     bool isHighPurity = (qualityFlags & trackHighPurityMask) >> trackHighPurityShift;
0167     // to do as in TrackBase
0168     uint8_t quality = (1 << reco::TrackBase::loose);
0169     if (isHighPurity) {
0170       quality |= (1 << reco::TrackBase::highPurity);
0171     }
0172     return quality;
0173   }
0174 
0175   float lost_inner_hits_from_pfcand(const reco::PFCandidate &pfcand) {
0176     const auto &pseudo_track = (pfcand.bestTrack()) ? *pfcand.bestTrack() : reco::Track();
0177     // conditions from PackedCandidate producer
0178     bool highPurity = pfcand.trackRef().isNonnull() && pseudo_track.quality(reco::Track::highPurity);
0179     // do same bit operations than in PackedCandidate
0180     uint16_t qualityFlags = 0;
0181     qualityFlags = (qualityFlags & ~trackHighPurityMask) | ((highPurity << trackHighPurityShift) & trackHighPurityMask);
0182     return int16_t((qualityFlags & lostInnerHitsMask) >> lostInnerHitsShift) - 1;
0183   }
0184 
0185   std::pair<float, float> getDRSubjetFeatures(const reco::Jet &jet, const reco::Candidate *cand) {
0186     const auto *patJet = dynamic_cast<const pat::Jet *>(&jet);
0187     std::pair<float, float> features;
0188     // Do Subjets
0189     if (patJet) {
0190       if (patJet->nSubjetCollections() > 0) {
0191         auto subjets = patJet->subjets();
0192         std::nth_element(
0193             subjets.begin(),
0194             subjets.begin() + 1,
0195             subjets.end(),
0196             [](const edm::Ptr<pat::Jet> &p1, const edm::Ptr<pat::Jet> &p2) { return p1->pt() > p2->pt(); });
0197         features.first = !subjets.empty() ? reco::deltaR(*cand, *subjets[0]) : -1;
0198         features.second = subjets.size() > 1 ? reco::deltaR(*cand, *subjets[1]) : -1;
0199       } else {
0200         features.first = -1;
0201         features.second = -1;
0202       }
0203     } else {
0204       features.first = -1;
0205       features.second = -1;
0206     }
0207     return features;
0208   }
0209 
0210   int center_norm_pad(const std::vector<float> &input,
0211                       float center,
0212                       float norm_factor,
0213                       unsigned min_length,
0214                       unsigned max_length,
0215                       std::vector<float> &datavec,
0216                       int startval,
0217                       float pad_value,
0218                       float replace_inf_value,
0219                       float min,
0220                       float max) {
0221     // do variable shifting/scaling/padding/clipping in one go
0222 
0223     assert(min <= pad_value && pad_value <= max);
0224     assert(min_length <= max_length);
0225 
0226     unsigned target_length = std::clamp((unsigned)input.size(), min_length, max_length);
0227     for (unsigned i = 0; i < target_length; ++i) {
0228       if (i < input.size()) {
0229         datavec[i + startval] = std::clamp((catch_infs(input[i], replace_inf_value) - center) * norm_factor, min, max);
0230       } else {
0231         datavec[i + startval] = pad_value;
0232       }
0233     }
0234     return target_length;
0235   }
0236 
0237   int center_norm_pad_halfRagged(const std::vector<float> &input,
0238                                  float center,
0239                                  float norm_factor,
0240                                  unsigned target_length,
0241                                  std::vector<float> &datavec,
0242                                  int startval,
0243                                  float pad_value,
0244                                  float replace_inf_value,
0245                                  float min,
0246                                  float max) {
0247     // do variable shifting/scaling/padding/clipping in one go
0248 
0249     assert(min <= pad_value && pad_value <= max);
0250 
0251     for (unsigned i = 0; i < std::min(static_cast<unsigned int>(input.size()), target_length); ++i) {
0252       datavec.push_back(std::clamp((catch_infs(input[i], replace_inf_value) - center) * norm_factor, min, max));
0253     }
0254     if (input.size() < target_length)
0255       datavec.insert(datavec.end(), target_length - input.size(), pad_value);
0256 
0257     return target_length;
0258   }
0259 
0260   void ParticleNetConstructor(const edm::ParameterSet &Config_,
0261                               bool doExtra,
0262                               std::vector<std::string> &input_names_,
0263                               std::unordered_map<std::string, PreprocessParams> &prep_info_map_,
0264                               std::vector<std::vector<int64_t>> &input_shapes_,
0265                               std::vector<unsigned> &input_sizes_,
0266                               cms::Ort::FloatArrays *data_) {
0267     // load preprocessing info
0268     auto json_path = Config_.getParameter<std::string>("preprocess_json");
0269     if (!json_path.empty()) {
0270       // use preprocessing json file if available
0271       std::ifstream ifs(edm::FileInPath(json_path).fullPath());
0272       nlohmann::json js = nlohmann::json::parse(ifs);
0273       js.at("input_names").get_to(input_names_);
0274       for (const auto &group_name : input_names_) {
0275         const auto &group_pset = js.at(group_name);
0276         auto &prep_params = prep_info_map_[group_name];
0277         group_pset.at("var_names").get_to(prep_params.var_names);
0278         if (group_pset.contains("var_length")) {
0279           prep_params.min_length = group_pset.at("var_length");
0280           prep_params.max_length = prep_params.min_length;
0281         } else {
0282           prep_params.min_length = group_pset.at("min_length");
0283           prep_params.max_length = group_pset.at("max_length");
0284           input_shapes_.push_back({1, (int64_t)prep_params.var_names.size(), -1});
0285         }
0286         const auto &var_info_pset = group_pset.at("var_infos");
0287         for (const auto &var_name : prep_params.var_names) {
0288           const auto &var_pset = var_info_pset.at(var_name);
0289           double median = var_pset.at("median");
0290           double norm_factor = var_pset.at("norm_factor");
0291           double replace_inf_value = var_pset.at("replace_inf_value");
0292           double lower_bound = var_pset.at("lower_bound");
0293           double upper_bound = var_pset.at("upper_bound");
0294           double pad = var_pset.contains("pad") ? double(var_pset.at("pad")) : 0;
0295           prep_params.var_info_map[var_name] =
0296               PreprocessParams::VarInfo(median, norm_factor, replace_inf_value, lower_bound, upper_bound, pad);
0297         }
0298 
0299         if (doExtra && data_ != nullptr) {
0300           // create data storage with a fixed size vector initialized w/ 0
0301           const auto &len = input_sizes_.emplace_back(prep_params.max_length * prep_params.var_names.size());
0302           data_->emplace_back(len, 0);
0303         }
0304       }
0305     } else {
0306       // otherwise use the PSet in the python config file
0307       const auto &prep_pset = Config_.getParameterSet("preprocessParams");
0308       input_names_ = prep_pset.getParameter<std::vector<std::string>>("input_names");
0309       for (const auto &group_name : input_names_) {
0310         const edm::ParameterSet &group_pset = prep_pset.getParameterSet(group_name);
0311         auto &prep_params = prep_info_map_[group_name];
0312         prep_params.var_names = group_pset.getParameter<std::vector<std::string>>("var_names");
0313         prep_params.min_length = group_pset.getParameter<unsigned>("var_length");
0314         prep_params.max_length = prep_params.min_length;
0315         const auto &var_info_pset = group_pset.getParameterSet("var_infos");
0316         for (const auto &var_name : prep_params.var_names) {
0317           const edm::ParameterSet &var_pset = var_info_pset.getParameterSet(var_name);
0318           double median = var_pset.getParameter<double>("median");
0319           double norm_factor = var_pset.getParameter<double>("norm_factor");
0320           double replace_inf_value = var_pset.getParameter<double>("replace_inf_value");
0321           double lower_bound = var_pset.getParameter<double>("lower_bound");
0322           double upper_bound = var_pset.getParameter<double>("upper_bound");
0323           prep_params.var_info_map[var_name] =
0324               PreprocessParams::VarInfo(median, norm_factor, replace_inf_value, lower_bound, upper_bound, 0);
0325         }
0326 
0327         if (doExtra && data_ != nullptr) {
0328           // create data storage with a fixed size vector initialized w/ 0
0329           const auto &len = input_sizes_.emplace_back(prep_params.max_length * prep_params.var_names.size());
0330           data_->emplace_back(len, 0);
0331         }
0332       }
0333     }
0334   }
0335 
0336 }  // namespace btagbtvdeep