Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2022-06-30 23:17:12

0001 #include "RecoBTag/FeatureTools/interface/deep_helpers.h"
0002 #include "DataFormats/PatCandidates/interface/PackedCandidate.h"
0003 #include "FWCore/Utilities/interface/isFinite.h"
0004 
0005 namespace btagbtvdeep {
0006 
0007   constexpr static int qualityMap[8] = {1, 0, 1, 1, 4, 4, 5, 6};
0008 
0009   enum qualityFlagsShiftsAndMasks {
0010     assignmentQualityMask = 0x7,
0011     assignmentQualityShift = 0,
0012     trackHighPurityMask = 0x8,
0013     trackHighPurityShift = 3,
0014     lostInnerHitsMask = 0x30,
0015     lostInnerHitsShift = 4,
0016     muonFlagsMask = 0x0600,
0017     muonFlagsShift = 9
0018   };
0019 
0020   // remove infs and NaNs with value  (adapted from DeepNTuples)
0021   const float catch_infs(const float in, const float replace_value) {
0022     if (edm::isNotFinite(in))
0023       return replace_value;
0024     if (in < -1e32 || in > 1e32)
0025       return replace_value;
0026     return in;
0027   }
0028 
0029   // remove infs/NaN and bound (adapted from DeepNTuples)
0030   const float catch_infs_and_bound(const float in,
0031                                    const float replace_value,
0032                                    const float lowerbound,
0033                                    const float upperbound,
0034                                    const float offset,
0035                                    const bool use_offsets) {
0036     float withoutinfs = catch_infs(in, replace_value);
0037     if (withoutinfs + offset < lowerbound)
0038       return lowerbound;
0039     if (withoutinfs + offset > upperbound)
0040       return upperbound;
0041     if (use_offsets)
0042       withoutinfs += offset;
0043     return withoutinfs;
0044   }
0045 
0046   // 2D distance between SV and PV (adapted from DeepNTuples)
0047   Measurement1D vertexDxy(const reco::VertexCompositePtrCandidate &svcand, const reco::Vertex &pv) {
0048     VertexDistanceXY dist;
0049     reco::Vertex::CovarianceMatrix csv;
0050     svcand.fillVertexCovariance(csv);
0051     reco::Vertex svtx(svcand.vertex(), csv);
0052     return dist.distance(svtx, pv);
0053   }
0054 
0055   //3D distance between SV and PV (adapted from DeepNTuples)
0056   Measurement1D vertexD3d(const reco::VertexCompositePtrCandidate &svcand, const reco::Vertex &pv) {
0057     VertexDistance3D dist;
0058     reco::Vertex::CovarianceMatrix csv;
0059     svcand.fillVertexCovariance(csv);
0060     reco::Vertex svtx(svcand.vertex(), csv);
0061     return dist.distance(svtx, pv);
0062   }
0063 
0064   // dot product between SV and PV (adapted from DeepNTuples)
0065   float vertexDdotP(const reco::VertexCompositePtrCandidate &sv, const reco::Vertex &pv) {
0066     reco::Candidate::Vector p = sv.momentum();
0067     reco::Candidate::Vector d(sv.vx() - pv.x(), sv.vy() - pv.y(), sv.vz() - pv.z());
0068     return p.Unit().Dot(d.Unit());
0069   }
0070 
0071   // compute minimum dr between SVs and a candidate (from DeepNTuples, now polymorphic)
0072   float mindrsvpfcand(const std::vector<reco::VertexCompositePtrCandidate> &svs,
0073                       const reco::Candidate *cand,
0074                       float mindr) {
0075     for (unsigned int i0 = 0; i0 < svs.size(); ++i0) {
0076       float tempdr = reco::deltaR(svs[i0], *cand);
0077       if (tempdr < mindr) {
0078         mindr = tempdr;
0079       }
0080     }
0081     return mindr;
0082   }
0083 
0084   // instantiate template
0085   template bool sv_vertex_comparator<reco::VertexCompositePtrCandidate, reco::Vertex>(
0086       const reco::VertexCompositePtrCandidate &, const reco::VertexCompositePtrCandidate &, const reco::Vertex &);
0087 
0088   float vtx_ass_from_pfcand(const reco::PFCandidate &pfcand, int pv_ass_quality, const reco::VertexRef &pv) {
0089     float vtx_ass = pat::PackedCandidate::PVAssociationQuality(qualityMap[pv_ass_quality]);
0090     if (pfcand.trackRef().isNonnull() && pv->trackWeight(pfcand.trackRef()) > 0.5 && pv_ass_quality == 7) {
0091       vtx_ass = pat::PackedCandidate::UsedInFitTight;
0092     }
0093     return vtx_ass;
0094   }
0095 
0096   float quality_from_pfcand(const reco::PFCandidate &pfcand) {
0097     const auto &pseudo_track = (pfcand.bestTrack()) ? *pfcand.bestTrack() : reco::Track();
0098     // conditions from PackedCandidate producer
0099     bool highPurity = pfcand.trackRef().isNonnull() && pseudo_track.quality(reco::Track::highPurity);
0100     // do same bit operations than in PackedCandidate
0101     uint16_t qualityFlags = 0;
0102     qualityFlags = (qualityFlags & ~trackHighPurityMask) | ((highPurity << trackHighPurityShift) & trackHighPurityMask);
0103     bool isHighPurity = (qualityFlags & trackHighPurityMask) >> trackHighPurityShift;
0104     // to do as in TrackBase
0105     uint8_t quality = (1 << reco::TrackBase::loose);
0106     if (isHighPurity) {
0107       quality |= (1 << reco::TrackBase::highPurity);
0108     }
0109     return quality;
0110   }
0111 
0112   float lost_inner_hits_from_pfcand(const reco::PFCandidate &pfcand) {
0113     const auto &pseudo_track = (pfcand.bestTrack()) ? *pfcand.bestTrack() : reco::Track();
0114     // conditions from PackedCandidate producer
0115     bool highPurity = pfcand.trackRef().isNonnull() && pseudo_track.quality(reco::Track::highPurity);
0116     // do same bit operations than in PackedCandidate
0117     uint16_t qualityFlags = 0;
0118     qualityFlags = (qualityFlags & ~trackHighPurityMask) | ((highPurity << trackHighPurityShift) & trackHighPurityMask);
0119     return int16_t((qualityFlags & lostInnerHitsMask) >> lostInnerHitsShift) - 1;
0120   }
0121 
0122   std::pair<float, float> getDRSubjetFeatures(const reco::Jet &jet, const reco::Candidate *cand) {
0123     const auto *patJet = dynamic_cast<const pat::Jet *>(&jet);
0124     std::pair<float, float> features;
0125     // Do Subjets
0126     if (patJet) {
0127       if (patJet->nSubjetCollections() > 0) {
0128         auto subjets = patJet->subjets();
0129         std::nth_element(
0130             subjets.begin(),
0131             subjets.begin() + 1,
0132             subjets.end(),
0133             [](const edm::Ptr<pat::Jet> &p1, const edm::Ptr<pat::Jet> &p2) { return p1->pt() > p2->pt(); });
0134         features.first = !subjets.empty() ? reco::deltaR(*cand, *subjets[0]) : -1;
0135         features.second = subjets.size() > 1 ? reco::deltaR(*cand, *subjets[1]) : -1;
0136       } else {
0137         features.first = -1;
0138         features.second = -1;
0139       }
0140     } else {
0141       features.first = -1;
0142       features.second = -1;
0143     }
0144     return features;
0145   }
0146 
0147   int center_norm_pad(const std::vector<float> &input,
0148                       float center,
0149                       float norm_factor,
0150                       unsigned min_length,
0151                       unsigned max_length,
0152                       std::vector<float> &datavec,
0153                       int startval,
0154                       float pad_value,
0155                       float replace_inf_value,
0156                       float min,
0157                       float max) {
0158     // do variable shifting/scaling/padding/clipping in one go
0159 
0160     assert(min <= pad_value && pad_value <= max);
0161     assert(min_length <= max_length);
0162 
0163     unsigned target_length = std::clamp((unsigned)input.size(), min_length, max_length);
0164     for (unsigned i = 0; i < target_length; ++i) {
0165       if (i < input.size()) {
0166         datavec[i + startval] = std::clamp((catch_infs(input[i], replace_inf_value) - center) * norm_factor, min, max);
0167       } else {
0168         datavec[i + startval] = pad_value;
0169       }
0170     }
0171     return target_length;
0172   }
0173 
0174   int center_norm_pad_halfRagged(const std::vector<float> &input,
0175                                  float center,
0176                                  float norm_factor,
0177                                  unsigned target_length,
0178                                  std::vector<float> &datavec,
0179                                  int startval,
0180                                  float pad_value,
0181                                  float replace_inf_value,
0182                                  float min,
0183                                  float max) {
0184     // do variable shifting/scaling/padding/clipping in one go
0185 
0186     assert(min <= pad_value && pad_value <= max);
0187 
0188     for (unsigned i = 0; i < std::min(static_cast<unsigned int>(input.size()), target_length); ++i) {
0189       datavec.push_back(std::clamp((catch_infs(input[i], replace_inf_value) - center) * norm_factor, min, max));
0190     }
0191     if (input.size() < target_length)
0192       datavec.insert(datavec.end(), target_length - input.size(), pad_value);
0193 
0194     return target_length;
0195   }
0196 
0197   void ParticleNetConstructor(const edm::ParameterSet &Config_,
0198                               bool doExtra,
0199                               std::vector<std::string> &input_names_,
0200                               std::unordered_map<std::string, PreprocessParams> &prep_info_map_,
0201                               std::vector<std::vector<int64_t>> &input_shapes_,
0202                               std::vector<unsigned> &input_sizes_,
0203                               cms::Ort::FloatArrays *data_) {
0204     // load preprocessing info
0205     auto json_path = Config_.getParameter<std::string>("preprocess_json");
0206     if (!json_path.empty()) {
0207       // use preprocessing json file if available
0208       std::ifstream ifs(edm::FileInPath(json_path).fullPath());
0209       nlohmann::json js = nlohmann::json::parse(ifs);
0210       js.at("input_names").get_to(input_names_);
0211       for (const auto &group_name : input_names_) {
0212         const auto &group_pset = js.at(group_name);
0213         auto &prep_params = prep_info_map_[group_name];
0214         group_pset.at("var_names").get_to(prep_params.var_names);
0215         if (group_pset.contains("var_length")) {
0216           prep_params.min_length = group_pset.at("var_length");
0217           prep_params.max_length = prep_params.min_length;
0218         } else {
0219           prep_params.min_length = group_pset.at("min_length");
0220           prep_params.max_length = group_pset.at("max_length");
0221           input_shapes_.push_back({1, (int64_t)prep_params.var_names.size(), -1});
0222         }
0223         const auto &var_info_pset = group_pset.at("var_infos");
0224         for (const auto &var_name : prep_params.var_names) {
0225           const auto &var_pset = var_info_pset.at(var_name);
0226           double median = var_pset.at("median");
0227           double norm_factor = var_pset.at("norm_factor");
0228           double replace_inf_value = var_pset.at("replace_inf_value");
0229           double lower_bound = var_pset.at("lower_bound");
0230           double upper_bound = var_pset.at("upper_bound");
0231           double pad = var_pset.contains("pad") ? double(var_pset.at("pad")) : 0;
0232           prep_params.var_info_map[var_name] =
0233               PreprocessParams::VarInfo(median, norm_factor, replace_inf_value, lower_bound, upper_bound, pad);
0234         }
0235 
0236         if (doExtra && data_ != nullptr) {
0237           // create data storage with a fixed size vector initialized w/ 0
0238           const auto &len = input_sizes_.emplace_back(prep_params.max_length * prep_params.var_names.size());
0239           data_->emplace_back(len, 0);
0240         }
0241       }
0242     } else {
0243       // otherwise use the PSet in the python config file
0244       const auto &prep_pset = Config_.getParameterSet("preprocessParams");
0245       input_names_ = prep_pset.getParameter<std::vector<std::string>>("input_names");
0246       for (const auto &group_name : input_names_) {
0247         const edm::ParameterSet &group_pset = prep_pset.getParameterSet(group_name);
0248         auto &prep_params = prep_info_map_[group_name];
0249         prep_params.var_names = group_pset.getParameter<std::vector<std::string>>("var_names");
0250         prep_params.min_length = group_pset.getParameter<unsigned>("var_length");
0251         prep_params.max_length = prep_params.min_length;
0252         const auto &var_info_pset = group_pset.getParameterSet("var_infos");
0253         for (const auto &var_name : prep_params.var_names) {
0254           const edm::ParameterSet &var_pset = var_info_pset.getParameterSet(var_name);
0255           double median = var_pset.getParameter<double>("median");
0256           double norm_factor = var_pset.getParameter<double>("norm_factor");
0257           double replace_inf_value = var_pset.getParameter<double>("replace_inf_value");
0258           double lower_bound = var_pset.getParameter<double>("lower_bound");
0259           double upper_bound = var_pset.getParameter<double>("upper_bound");
0260           prep_params.var_info_map[var_name] =
0261               PreprocessParams::VarInfo(median, norm_factor, replace_inf_value, lower_bound, upper_bound, 0);
0262         }
0263 
0264         if (doExtra && data_ != nullptr) {
0265           // create data storage with a fixed size vector initialized w/ 0
0266           const auto &len = input_sizes_.emplace_back(prep_params.max_length * prep_params.var_names.size());
0267           data_->emplace_back(len, 0);
0268         }
0269       }
0270     }
0271   }
0272 
0273 }  // namespace btagbtvdeep