Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2021-02-14 13:27:36

0001 #include "FWCore/Catalog/interface/FileLocator.h"
0002 #include "FWCore/Catalog/interface/SiteLocalConfig.h"
0003 #include "FWCore/ServiceRegistry/interface/Service.h"
0004 
0005 #include <boost/algorithm/string.hpp>
0006 #include <boost/algorithm/string/replace.hpp>
0007 
0008 #include <cstdlib>
0009 #include <stdexcept>
0010 #include <fstream>
0011 #include <iostream>
0012 #include <sstream>
0013 
0014 namespace {
0015 
0016   std::string replaceWithRegexp(std::smatch const& matches, std::string const& outputFormat) {
0017     std::string result = outputFormat;
0018     std::stringstream str;
0019 
0020     // std::cerr << "Output format: "<< outputFormat << std::endl;
0021     for (size_t i = 1; i < matches.size(); ++i) {
0022       str.str("");
0023       str << "$" << i;
0024       // std::cerr << "Current match: " << matches[i] << std::endl;
0025       std::string const matchedString(matches[i].first, matches[i].second);
0026       if (!matchedString.empty())
0027         boost::algorithm::replace_all(result, str.str(), matchedString);
0028     }
0029     // std::cerr << "Final string: " << result << std::endl;
0030     return result;
0031   }
0032 
0033   constexpr char const* const kEmptyString = "";
0034 
0035   const char* safe(const char* iCheck) {
0036     if (iCheck == nullptr) {
0037       return kEmptyString;
0038     }
0039     return iCheck;
0040   }
0041 
0042 }  // namespace
0043 
0044 namespace edm {
0045   FileLocator::FileLocator(std::string const& catUrl, unsigned iCatalog) : m_destination("any") {
0046     init(catUrl, iCatalog);
0047 
0048     // std::cout << m_protocols.size() << " protocols" << std::endl;
0049     // std::cout << m_directRules[m_protocols[0]].size() << " rules" << std::endl;
0050   }
0051 
0052   FileLocator::~FileLocator() {}
0053 
0054   std::string FileLocator::pfn(std::string const& ilfn) const { return convert(ilfn, m_directRules, true); }
0055 
0056   std::string FileLocator::lfn(std::string const& ipfn) const { return convert(ipfn, m_inverseRules, false); }
0057 
0058   std::string FileLocator::convert(std::string const& input, ProtocolRules const& rules, bool direct) const {
0059     std::string out = "";
0060 
0061     for (size_t pi = 0, pe = m_protocols.size(); pi != pe; ++pi) {
0062       out = applyRules(rules, m_protocols[pi], m_destination, direct, input);
0063       if (!out.empty())
0064         return out;
0065     }
0066     return out;
0067   }
0068 
0069   void FileLocator::parseRule(tinyxml2::XMLElement* ruleElement, ProtocolRules& rules) {
0070     if (!ruleElement) {
0071       throw cms::Exception("TrivialFileCatalog", std::string("TrivialFileCatalog::connect: Malformed trivial catalog"));
0072     }
0073 
0074     auto const protocol = safe(ruleElement->Attribute("protocol"));
0075     auto destinationMatchRegexp = ruleElement->Attribute("destination-match");
0076     if (destinationMatchRegexp == nullptr or destinationMatchRegexp[0] == 0) {
0077       destinationMatchRegexp = ".*";
0078     }
0079 
0080     auto const pathMatchRegexp = safe(ruleElement->Attribute("path-match"));
0081     auto const result = safe(ruleElement->Attribute("result"));
0082     auto const chain = safe(ruleElement->Attribute("chain"));
0083 
0084     Rule rule;
0085     rule.pathMatch.assign(pathMatchRegexp);
0086     rule.destinationMatch.assign(destinationMatchRegexp);
0087     rule.result = result;
0088     rule.chain = chain;
0089     rules[protocol].emplace_back(std::move(rule));
0090   }
0091 
0092   void FileLocator::init(std::string const& catUrl, unsigned iCatalog) {
0093     std::string m_url = catUrl;
0094 
0095     if (m_url.empty()) {
0096       Service<SiteLocalConfig> localconfservice;
0097       if (!localconfservice.isAvailable())
0098         throw cms::Exception("TrivialFileCatalog", "edm::SiteLocalConfigService is not available");
0099       if (iCatalog >= localconfservice->dataCatalogs().size())
0100         throw cms::Exception("TrivialFileCatalog", "edm::FileLocator: Request nonexistence data catalog");
0101       m_url = localconfservice->dataCatalogs()[iCatalog];
0102     }
0103 
0104     if (m_url.find("file:") == std::string::npos) {
0105       throw cms::Exception("TrivialFileCatalog",
0106                            "TrivialFileCatalog::connect: Malformed url for file catalog configuration");
0107     }
0108 
0109     m_url = m_url.erase(0, m_url.find(':') + 1);
0110 
0111     std::vector<std::string> tokens;
0112     boost::algorithm::split(tokens, m_url, boost::is_any_of(std::string("?")));
0113     m_filename = tokens[0];
0114 
0115     if (tokens.size() == 2) {
0116       std::string const options = tokens[1];
0117       std::vector<std::string> optionTokens;
0118       boost::algorithm::split(optionTokens, options, boost::is_any_of(std::string("&")));
0119 
0120       std::string const equalSign("=");
0121       std::string const comma(",");
0122 
0123       for (size_t oi = 0, oe = optionTokens.size(); oi != oe; ++oi) {
0124         std::string const option = optionTokens[oi];
0125         std::vector<std::string> argTokens;
0126         boost::algorithm::split(argTokens, option, boost::is_any_of(equalSign));
0127 
0128         if (argTokens.size() != 2) {
0129           throw cms::Exception("TrivialFileCatalog",
0130                                "TrivialFileCatalog::connect: Malformed url for file catalog configuration");
0131         }
0132 
0133         if (argTokens[0] == "protocol") {
0134           boost::algorithm::split(m_protocols, argTokens[1], boost::is_any_of(comma));
0135         } else if (argTokens[0] == "destination") {
0136           m_destination = argTokens[1];
0137         }
0138       }
0139     }
0140 
0141     if (m_protocols.empty()) {
0142       throw cms::Exception("TrivialFileCatalog",
0143                            "TrivialFileCatalog::connect: protocol was not supplied in the contact string");
0144     }
0145 
0146     std::ifstream configFile;
0147     configFile.open(m_filename.c_str());
0148 
0149     //
0150     // std::cout << "Using catalog configuration " << m_filename << std::endl;
0151 
0152     if (!configFile.good() || !configFile.is_open()) {
0153       throw cms::Exception("TrivialFileCatalog",
0154                            "TrivialFileCatalog::connect: Unable to open trivial file catalog " + m_filename);
0155     }
0156 
0157     configFile.close();
0158 
0159     tinyxml2::XMLDocument doc;
0160     auto loadErr = doc.LoadFile(m_filename.c_str());
0161     if (loadErr != tinyxml2::XML_SUCCESS) {
0162       throw cms::Exception("TrivialFileCatalog")
0163           << "tinyxml file load failed with error : " << doc.ErrorStr() << std::endl;
0164     }
0165     /* trivialFileCatalog matches the following xml schema
0166      FIXME: write a proper DTD
0167      <storage-mapping>
0168      <lfn-to-pfn protocol="direct" destination-match=".*"
0169      path-match="lfn/guid match regular expression"
0170      result="/castor/cern.ch/cms/$1"/>
0171      <pfn-to-lfn protocol="srm"
0172      path-match="lfn/guid match regular expression"
0173      result="$1"/>
0174      </storage-mapping>
0175     */
0176     auto rootElement = doc.RootElement();
0177     /*first of all do the lfn-to-pfn bit*/
0178     for (auto el = rootElement->FirstChildElement("lfn-to-pfn"); el != nullptr;
0179          el = el->NextSiblingElement("lfn-to-pfn")) {
0180       parseRule(el, m_directRules);
0181     }
0182 
0183     /*Then we handle the pfn-to-lfn bit*/
0184     for (auto el = rootElement->FirstChildElement("pfn-to-lfn"); el != nullptr;
0185          el = el->NextSiblingElement("pfn-to-lfn")) {
0186       parseRule(el, m_inverseRules);
0187     }
0188   }
0189 
0190   std::string FileLocator::applyRules(ProtocolRules const& protocolRules,
0191                                       std::string const& protocol,
0192                                       std::string const& destination,
0193                                       bool direct,
0194                                       std::string name) const {
0195     // std::cerr << "Calling apply rules with protocol: " << protocol << "\n destination: " << destination << "\n " << " on name " << name << std::endl;
0196 
0197     ProtocolRules::const_iterator const rulesIterator = protocolRules.find(protocol);
0198     if (rulesIterator == protocolRules.end()) {
0199       return "";
0200     }
0201 
0202     Rules const& rules = (*(rulesIterator)).second;
0203 
0204     std::smatch destinationMatches;
0205     std::smatch nameMatches;
0206 
0207     /* Look up for a matching rule*/
0208     for (Rules::const_iterator i = rules.begin(); i != rules.end(); ++i) {
0209       if (!std::regex_match(destination, destinationMatches, i->destinationMatch)) {
0210         continue;
0211       }
0212 
0213       if (!std::regex_match(name, i->pathMatch)) {
0214         continue;
0215       }
0216 
0217       // std::cerr << "Rule " << i->pathMatch << "matched! " << std::endl;
0218 
0219       std::string const chain = i->chain;
0220       if ((direct == true) && (!chain.empty())) {
0221         name = applyRules(protocolRules, chain, destination, direct, name);
0222         if (name.empty()) {
0223           return "";
0224         }
0225       }
0226 
0227       std::regex_match(name, nameMatches, i->pathMatch);
0228       name = replaceWithRegexp(nameMatches, i->result);
0229 
0230       if ((direct == false) && (!chain.empty())) {
0231         name = applyRules(protocolRules, chain, destination, direct, name);
0232       }
0233       return name;
0234     }
0235     return "";
0236   }
0237 }  // namespace edm