Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2023-03-17 11:02:20

0001 #include <algorithm>
0002 #include <iterator>
0003 #include <ostream>
0004 #include <cctype>
0005 
0006 #include "boost/algorithm/string.hpp"
0007 
0008 #include "DataFormats/Provenance/interface/BranchDescription.h"
0009 #include "FWCore/Framework/interface/ProductSelectorRules.h"
0010 #include "FWCore/ParameterSet/interface/ParameterSet.h"
0011 #include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
0012 #include "FWCore/Utilities/interface/EDMException.h"
0013 
0014 namespace edm {
0015   // The following typedef is used only in this implementation file, in
0016   // order to shorten several lines of code.
0017   typedef std::vector<edm::BranchDescription const*> VCBDP;
0018 
0019   namespace {
0020 
0021     //--------------------------------------------------
0022     // function partial_match is a helper for Rule. It encodes the
0023     // matching of std::strings, and knows about wildcarding rules.
0024     inline bool partial_match(const std::regex& regularExpression, const std::string& branchstring) {
0025       return std::regex_match(branchstring, regularExpression);
0026     }
0027   }  // namespace
0028 
0029   //--------------------------------------------------
0030   // Class Rule is used to determine whether or not a given branch
0031   // (really a ProductResolver, as described by the BranchDescription object
0032   // that specifies that ProductResolver) matches a 'rule' specified by the
0033   // configuration. Each Rule is configured with a single std::string from
0034   // the configuration file.
0035   //
0036   // The configuration std::string is of the form:
0037   //
0038   //   'keep <spec>'            ** or **
0039   //   'drop <spec>'
0040   //
0041   // where '<spec>' is of the form:
0042   //
0043   //   <product type>_<module label>_<instance name>_<process name>
0044   //
0045   // The 3 underscores must always be present.  The four fields can
0046   // be empty or composed of alphanumeric characters.  "*" is an
0047   // allowed wildcard that will match 0 or more of any characters.
0048   // "?" is the other allowed wilcard that will match exactly one
0049   // character.  There is one exception to this, the entire '<spec>'
0050   // can be one single "*" without any underscores and this is
0051   // interpreted as "*_*_*_*".  Anything else will lead to an exception
0052   // being thrown.
0053   //
0054   // This class has much room for optimization. This should be
0055   // revisited as soon as profiling data are available.
0056 
0057   ProductSelectorRules::Rule::Rule(std::string const& s, std::string const& parameterName, std::string const& owner)
0058       : selectflag_(), productType_(), moduleLabel_(), instanceName_(), processName_() {
0059     if (s.size() < 6)
0060       throw edm::Exception(edm::errors::Configuration)
0061           << "Invalid statement in configuration file\n"
0062           << "In " << owner << " parameter named '" << parameterName << "'\n"
0063           << "Rule must have at least 6 characters because it must\n"
0064           << "specify 'keep ' or 'drop ' and also supply a pattern.\n"
0065           << "This is the invalid output configuration rule:\n"
0066           << "    " << s << "\n"
0067           << "Exception thrown from ProductSelectorRules::Rule\n";
0068 
0069     if (s.substr(0, 4) == "keep")
0070       selectflag_ = true;
0071     else if (s.substr(0, 4) == "drop")
0072       selectflag_ = false;
0073     else
0074       throw edm::Exception(edm::errors::Configuration)
0075           << "Invalid statement in configuration file\n"
0076           << "In " << owner << " parameter named '" << parameterName << "'\n"
0077           << "Rule must specify 'keep ' or 'drop ' and also supply a pattern.\n"
0078           << "This is the invalid output configuration rule:\n"
0079           << "    " << s << "\n"
0080           << "Exception thrown from ProductSelectorRules::Rule\n";
0081 
0082     if (!std::isspace(s[4])) {
0083       throw edm::Exception(edm::errors::Configuration)
0084           << "Invalid statement in configuration file\n"
0085           << "In " << owner << " parameter named '" << parameterName << "'\n"
0086           << "In each rule, 'keep' or 'drop' must be followed by a space\n"
0087           << "This is the invalid output configuration rule:\n"
0088           << "    " << s << "\n"
0089           << "Exception thrown from ProductSelectorRules::Rule\n";
0090     }
0091 
0092     // Now pull apart the std::string to get at the bits and pieces of the
0093     // specification...
0094 
0095     // Grab from after 'keep/drop ' (note the space!) to the end of
0096     // the std::string...
0097     std::string spec(s.begin() + 5, s.end());
0098 
0099     // Trim any leading and trailing whitespace from spec
0100     boost::trim(spec);
0101 
0102     if (spec == "*")  // special case for wildcard
0103     {
0104       productType_ = ".*";
0105       moduleLabel_ = ".*";
0106       instanceName_ = ".*";
0107       processName_ = ".*";
0108       return;
0109     } else {
0110       std::vector<std::string> parts;
0111       boost::split(parts, spec, boost::is_any_of("_"));
0112 
0113       // The std::vector must contain at least 4 parts
0114       // and none may be empty.
0115       bool good = (parts.size() == 4);
0116 
0117       // Require all the std::strings to contain only alphanumberic
0118       // characters or "*" or "?"
0119       if (good) {
0120         for (int i = 0; i < 4; ++i) {
0121           std::string& field = parts[i];
0122           int size = field.size();
0123           for (int j = 0; j < size; ++j) {
0124             if (!(isalnum(field[j]) || field[j] == '*' || field[j] == '?')) {
0125               good = false;
0126             }
0127           }
0128 
0129           // We are using the boost regex library to deal with the wildcards.
0130           // The configuration file uses a syntax that accepts "*" and "?"
0131           // as wildcards so we need to convert these to the syntax used in
0132           // regular expressions.
0133           boost::replace_all(parts[i], "*", ".*");
0134           boost::replace_all(parts[i], "?", ".");
0135         }
0136       }
0137 
0138       if (!good) {
0139         throw edm::Exception(edm::errors::Configuration)
0140             << "Invalid statement in configuration file\n"
0141             << "In " << owner << " parameter named '" << parameterName << "'\n"
0142             << "In each rule, after 'keep ' or 'drop ' there must\n"
0143             << "be a branch specification of the form 'type_label_instance_process'\n"
0144             << "There must be 4 fields separated by underscores\n"
0145             << "The fields can only contain alphanumeric characters and the wildcards * or ?\n"
0146             << "Alternately, a single * is also allowed for the branch specification\n"
0147             << "This is the invalid output configuration rule:\n"
0148             << "    " << s << "\n"
0149             << "Exception thrown from ProductSelectorRules::Rule\n";
0150       }
0151 
0152       productType_ = parts[0];
0153       moduleLabel_ = parts[1];
0154       instanceName_ = parts[2];
0155       processName_ = parts[3];
0156     }
0157   }
0158 
0159   void ProductSelectorRules::Rule::applyToAll(std::vector<BranchSelectState>& branchstates) const {
0160     std::vector<BranchSelectState>::iterator it = branchstates.begin();
0161     std::vector<BranchSelectState>::iterator end = branchstates.end();
0162     for (; it != end; ++it)
0163       applyToOne(it->desc, it->selectMe);
0164   }
0165 
0166   void ProductSelectorRules::applyToAll(std::vector<BranchSelectState>& branchstates) const {
0167     std::vector<Rule>::const_iterator it = rules_.begin();
0168     std::vector<Rule>::const_iterator end = rules_.end();
0169     for (; it != end; ++it)
0170       it->applyToAll(branchstates);
0171   }
0172 
0173   //   bool
0174   //   Rule::applyToOne(edm::BranchDescription const* branch) const
0175   //   {
0176   //     bool match =
0177   //       partial_match(productType_, branch->friendlyClassName()) &&
0178   //       partial_match(moduleLabel_, branch->moduleLabel()) &&
0179   //       partial_match(instanceName_, branch->productInstanceName()) &&
0180   //       partial_match(processName_, branch->processName());
0181 
0182   //     return match ? selectflag_ : !selectflag_;
0183   //   }
0184 
0185   void ProductSelectorRules::Rule::applyToOne(edm::BranchDescription const* branch, bool& result) const {
0186     if (this->appliesTo(branch))
0187       result = selectflag_;
0188   }
0189 
0190   bool ProductSelectorRules::Rule::appliesTo(edm::BranchDescription const* branch) const {
0191     return partial_match(productType_, branch->friendlyClassName()) &&
0192            partial_match(moduleLabel_, branch->moduleLabel()) &&
0193            partial_match(instanceName_, branch->productInstanceName()) &&
0194            partial_match(processName_, branch->processName());
0195   }
0196 
0197   const std::vector<std::string>& ProductSelectorRules::defaultSelectionStrings() {
0198     static const std::vector<std::string> s_defaultStrings(1U, std::string("keep *"));
0199     return s_defaultStrings;
0200   }
0201 
0202   void ProductSelectorRules::fillDescription(ParameterSetDescription& desc,
0203                                              char const* parameterName,
0204                                              std::vector<std::string> const& defaultStrings) {
0205     ;
0206     desc.addUntracked<std::vector<std::string> >(parameterName, defaultStrings)
0207         ->setComment("Specifies which branches are kept or dropped.");
0208   }
0209 
0210   ProductSelectorRules::ProductSelectorRules(ParameterSet const& pset,
0211                                              std::string const& parameterName,
0212                                              std::string const& parameterOwnerName)
0213       : rules_(), parameterName_(parameterName), parameterOwnerName_(parameterOwnerName) {
0214     // Fill the rules.
0215     // If there is no parameter whose name is parameterName_ in the
0216     // ParameterSet we are given, we use the following default.
0217     std::vector<std::string> defaultCommands(1U, std::string("keep *"));
0218 
0219     std::vector<std::string> commands =
0220         pset.getUntrackedParameter<std::vector<std::string> >(parameterName, defaultCommands);
0221     if (commands.empty()) {
0222       commands.push_back(defaultCommands[0]);
0223     }
0224     rules_.reserve(commands.size());
0225     for (std::vector<std::string>::const_iterator it = commands.begin(), end = commands.end(); it != end; ++it) {
0226       rules_.push_back(Rule(*it, parameterName, parameterOwnerName));
0227     }
0228     keepAll_ = commands.size() == 1 && commands[0] == defaultCommands[0];
0229   }
0230 
0231 }  // namespace edm