Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:11:54

0001 #include "FWCore/Catalog/interface/FileLocator.h"
0002 #include "FWCore/ServiceRegistry/interface/Service.h"
0003 #include "FWCore/Utilities/interface/Exception.h"
0004 
0005 #include <boost/algorithm/string.hpp>
0006 #include <boost/algorithm/string/replace.hpp>
0007 #include <boost/property_tree/json_parser.hpp>
0008 
0009 #include <filesystem>
0010 #include <cstdlib>
0011 #include <stdexcept>
0012 #include <fstream>
0013 #include <sstream>
0014 
0015 namespace pt = boost::property_tree;
0016 
0017 namespace {
0018 
0019   std::string replaceWithRegexp(std::smatch const& matches, std::string const& outputFormat) {
0020     std::string result = outputFormat;
0021     std::stringstream str;
0022 
0023     for (size_t i = 1; i < matches.size(); ++i) {
0024       str.str("");
0025       str << "$" << i;
0026       std::string const matchedString(matches[i].first, matches[i].second);
0027       if (!matchedString.empty())
0028         boost::algorithm::replace_all(result, str.str(), matchedString);
0029     }
0030     return result;
0031   }
0032 
0033   constexpr char const* const kEmptyString = "";
0034   constexpr char const* const kLFNPrefix = "/store/";
0035 
0036   const char* safe(const char* iCheck) {
0037     if (iCheck == nullptr) {
0038       return kEmptyString;
0039     }
0040     return iCheck;
0041   }
0042 
0043 }  // namespace
0044 
0045 namespace pt = boost::property_tree;
0046 
0047 namespace edm {
0048 
0049   FileLocator::FileLocator(std::string const& catUrl, unsigned iCatalog) : m_destination("any") {
0050     init_trivialCatalog(catUrl, iCatalog);
0051   }
0052 
0053   FileLocator::FileLocator(edm::CatalogAttributes const& catAttr,
0054                            unsigned iCatalog,
0055                            std::string const& storageDescriptionPath)
0056       : m_destination("any") {
0057     init(catAttr, iCatalog, storageDescriptionPath);
0058   }
0059 
0060   FileLocator::~FileLocator() {}
0061 
0062   std::string FileLocator::pfn(std::string const& ilfn, edm::CatalogType catType) const {
0063     if (catType == edm::CatalogType::TrivialCatalog)
0064       return convert(ilfn, m_directRules_trivialCatalog, true);
0065     return convert(ilfn, m_directRules, true);
0066   }
0067 
0068   std::string FileLocator::convert(std::string const& input, ProtocolRules const& rules, bool direct) const {
0069     std::string out = "";
0070     //check if input is an authentic LFN
0071     if (input.compare(0, 7, kLFNPrefix) != 0)
0072       return out;
0073     for (size_t pi = 0, pe = m_protocols.size(); pi != pe; ++pi) {
0074       out = applyRules(rules, m_protocols[pi], m_destination, direct, input);
0075       if (!out.empty()) {
0076         return out;
0077       }
0078     }
0079     return out;
0080   }
0081 
0082   void FileLocator::parseRuleTrivialCatalog(tinyxml2::XMLElement* ruleElement, ProtocolRules& rules) {
0083     if (!ruleElement) {
0084       throw cms::Exception("TrivialFileCatalog", std::string("TrivialFileCatalog::connect: Malformed trivial catalog"));
0085     }
0086 
0087     auto const protocol = safe(ruleElement->Attribute("protocol"));
0088     auto destinationMatchRegexp = ruleElement->Attribute("destination-match");
0089     if (destinationMatchRegexp == nullptr or destinationMatchRegexp[0] == 0) {
0090       destinationMatchRegexp = ".*";
0091     }
0092 
0093     auto const pathMatchRegexp = safe(ruleElement->Attribute("path-match"));
0094     auto const result = safe(ruleElement->Attribute("result"));
0095     auto const chain = safe(ruleElement->Attribute("chain"));
0096 
0097     Rule rule;
0098     rule.pathMatch.assign(pathMatchRegexp);
0099     rule.destinationMatch.assign(destinationMatchRegexp);
0100     rule.result = result;
0101     rule.chain = chain;
0102     rules[protocol].emplace_back(std::move(rule));
0103   }
0104 
0105   void FileLocator::parseRule(pt::ptree::value_type const& storageRule,
0106                               std::string const& protocol,
0107                               ProtocolRules& rules) {
0108     if (storageRule.second.empty()) {
0109       throw cms::Exception("RucioFileCatalog", "edm::FileLocator::parseRule Malformed storage rule");
0110     }
0111     auto const pathMatchRegexp = storageRule.second.get<std::string>("lfn");
0112     auto const result = storageRule.second.get<std::string>("pfn");
0113     auto const chain = storageRule.second.get("chain", kEmptyString);
0114     Rule rule;
0115     rule.pathMatch.assign(pathMatchRegexp);
0116     rule.destinationMatch.assign(".*");
0117     rule.result = result;
0118     rule.chain = chain;
0119     rules[protocol].emplace_back(std::move(rule));
0120   }
0121 
0122   void FileLocator::init_trivialCatalog(std::string const& catUrl, unsigned iCatalog) {
0123     std::string url = catUrl;
0124     if (url.empty()) {
0125       Service<SiteLocalConfig> localconfservice;
0126       if (!localconfservice.isAvailable())
0127         throw cms::Exception("TrivialFileCatalog", "edm::SiteLocalConfigService is not available");
0128       if (iCatalog >= localconfservice->trivialDataCatalogs().size())
0129         throw cms::Exception("TrivialFileCatalog", "edm::FileLocator: Request nonexistence data catalog");
0130       url = localconfservice->trivialDataCatalogs()[iCatalog];
0131     }
0132 
0133     if (url.find("file:") == std::string::npos) {
0134       throw cms::Exception("TrivialFileCatalog",
0135                            "TrivialFileCatalog::connect: Malformed url for file catalog configuration");
0136     }
0137 
0138     url = url.erase(0, url.find(':') + 1);
0139 
0140     std::vector<std::string> tokens;
0141     boost::algorithm::split(tokens, url, boost::is_any_of(std::string("?")));
0142     m_filename = tokens[0];
0143 
0144     if (tokens.size() == 2) {
0145       std::string const options = tokens[1];
0146       std::vector<std::string> optionTokens;
0147       boost::algorithm::split(optionTokens, options, boost::is_any_of(std::string("&")));
0148 
0149       std::string const equalSign("=");
0150       std::string const comma(",");
0151 
0152       for (size_t oi = 0, oe = optionTokens.size(); oi != oe; ++oi) {
0153         std::string const option = optionTokens[oi];
0154         std::vector<std::string> argTokens;
0155         boost::algorithm::split(argTokens, option, boost::is_any_of(equalSign));
0156 
0157         if (argTokens.size() != 2) {
0158           throw cms::Exception("TrivialFileCatalog",
0159                                "TrivialFileCatalog::connect: Malformed url for file catalog configuration");
0160         }
0161 
0162         if (argTokens[0] == "protocol") {
0163           boost::algorithm::split(m_protocols, argTokens[1], boost::is_any_of(comma));
0164         } else if (argTokens[0] == "destination") {
0165           m_destination = argTokens[1];
0166         }
0167       }
0168     }
0169 
0170     if (m_protocols.empty()) {
0171       throw cms::Exception("TrivialFileCatalog",
0172                            "TrivialFileCatalog::connect: protocol was not supplied in the contact string");
0173     }
0174 
0175     std::ifstream configFile;
0176     configFile.open(m_filename.c_str());
0177 
0178     if (!configFile.good() || !configFile.is_open()) {
0179       throw cms::Exception("TrivialFileCatalog",
0180                            "TrivialFileCatalog::connect: Unable to open trivial file catalog " + m_filename);
0181     }
0182 
0183     configFile.close();
0184 
0185     tinyxml2::XMLDocument doc;
0186     auto loadErr = doc.LoadFile(m_filename.c_str());
0187     if (loadErr != tinyxml2::XML_SUCCESS) {
0188       throw cms::Exception("TrivialFileCatalog")
0189           << "tinyxml file load failed with error : " << doc.ErrorStr() << std::endl;
0190     }
0191     /* trivialFileCatalog matches the following xml schema
0192      FIXME: write a proper DTD
0193      <storage-mapping>
0194      <lfn-to-pfn protocol="direct" destination-match=".*"
0195      path-match="lfn/guid match regular expression"
0196      result="/castor/cern.ch/cms/$1"/>
0197      <pfn-to-lfn protocol="srm"
0198      path-match="lfn/guid match regular expression"
0199      result="$1"/>
0200      </storage-mapping>
0201     */
0202     auto rootElement = doc.RootElement();
0203     /*first of all do the lfn-to-pfn bit*/
0204     for (auto el = rootElement->FirstChildElement("lfn-to-pfn"); el != nullptr;
0205          el = el->NextSiblingElement("lfn-to-pfn")) {
0206       parseRuleTrivialCatalog(el, m_directRules_trivialCatalog);
0207     }
0208 
0209     /*Then we handle the pfn-to-lfn bit*/
0210     for (auto el = rootElement->FirstChildElement("pfn-to-lfn"); el != nullptr;
0211          el = el->NextSiblingElement("pfn-to-lfn")) {
0212       parseRuleTrivialCatalog(el, m_inverseRules);
0213     }
0214   }
0215 
0216   void FileLocator::init(edm::CatalogAttributes const& input_dataCatalog,
0217                          unsigned iCatalog,
0218                          std::string const& storageDescriptionPath) {
0219     Service<SiteLocalConfig> localconfservice;
0220     edm::CatalogAttributes aCatalog = input_dataCatalog;
0221     if (input_dataCatalog.empty()) {
0222       if (!localconfservice.isAvailable()) {
0223         cms::Exception ex("FileCatalog");
0224         ex << "edm::SiteLocalConfigService is not available";
0225         ex.addContext("Calling edm::FileLocator::init()");
0226         throw ex;
0227       }
0228       if (iCatalog >= localconfservice->dataCatalogs().size()) {
0229         cms::Exception ex("FileCatalog");
0230         ex << "Request nonexistence data catalog";
0231         ex.addContext("Calling edm::FileLocator::init()");
0232         throw ex;
0233       }
0234       aCatalog = localconfservice->dataCatalogs()[iCatalog];
0235     }
0236 
0237     std::filesystem::path filename_storage = localconfservice->storageDescriptionPath(aCatalog);
0238 
0239     //use path to storage description from input parameter
0240     if (!storageDescriptionPath.empty())
0241       filename_storage = storageDescriptionPath;
0242 
0243     //now read json
0244     pt::ptree json;
0245     try {
0246       boost::property_tree::read_json(filename_storage.string(), json);
0247     } catch (std::exception& e) {
0248       cms::Exception ex("FileCatalog");
0249       ex << "Can not open storage.json (" << filename_storage.string()
0250          << "). Check SITECONFIG_PATH and site-local-config.xml <data-access>";
0251       ex.addContext("edm::FileLocator:init()");
0252       throw ex;
0253     }
0254     auto found_site = std::find_if(json.begin(), json.end(), [&](pt::ptree::value_type const& site) {
0255       //get site name
0256       std::string siteName = site.second.get("site", kEmptyString);
0257       //get volume name
0258       std::string volName = site.second.get("volume", kEmptyString);
0259       return aCatalog.storageSite == siteName && aCatalog.volume == volName;
0260     });
0261 
0262     //let enforce that site-local-config.xml and storage.json contains valid catalogs in <data-access>, in which site defined in site-local-config.xml <data-access> should be found in storage.json
0263     if (found_site == json.end()) {
0264       cms::Exception ex("FileCatalog");
0265       ex << "Can not find storage site \"" << aCatalog.storageSite << "\" and volume \"" << aCatalog.volume
0266          << "\" in storage.json. Check site-local-config.xml <data-access> and storage.json";
0267       ex.addContext("edm::FileLocator:init()");
0268       throw ex;
0269     }
0270 
0271     const pt::ptree& protocols = found_site->second.find("protocols")->second;
0272     auto found_protocol = std::find_if(protocols.begin(), protocols.end(), [&](pt::ptree::value_type const& protocol) {
0273       std::string protName = protocol.second.get("protocol", kEmptyString);
0274       return aCatalog.protocol == protName;
0275     });
0276 
0277     //let enforce that site-local-config.xml and storage.json contains valid catalogs, in which protocol defined in site-local-config.xml <data-access> should be found in storage.json
0278     if (found_protocol == protocols.end()) {
0279       cms::Exception ex("FileCatalog");
0280       ex << "Can not find protocol \"" << aCatalog.protocol << "\" for the storage site \"" << aCatalog.storageSite
0281          << "\" and volume \"" << aCatalog.volume
0282          << "\" in storage.json. Check site-local-config.xml <data-access> and storage.json";
0283       ex.addContext("edm::FileLocator:init()");
0284       throw ex;
0285     }
0286 
0287     std::string protName = found_protocol->second.get("protocol", kEmptyString);
0288     m_protocols.push_back(protName);
0289 
0290     //store all prefixes and rules to m_directRules. We need to do this so that "applyRules" can find the rule in case chaining is used
0291     //loop over protocols
0292     for (pt::ptree::value_type const& protocol : protocols) {
0293       std::string protName = protocol.second.get("protocol", kEmptyString);
0294       //loop over rules
0295       std::string prefixTmp = protocol.second.get("prefix", kEmptyString);
0296       if (prefixTmp == kEmptyString) {
0297         const pt::ptree& rules = protocol.second.find("rules")->second;
0298         for (pt::ptree::value_type const& storageRule : rules) {
0299           parseRule(storageRule, protName, m_directRules);
0300         }
0301       }
0302       //now convert prefix to a rule and save it
0303       else {
0304         Rule rule;
0305         rule.pathMatch.assign("/?(.*)");
0306         rule.destinationMatch.assign(".*");
0307         rule.result = prefixTmp + "/$1";
0308         rule.chain = kEmptyString;
0309         m_directRules[protName].emplace_back(std::move(rule));
0310       }
0311     }
0312   }
0313 
0314   std::string FileLocator::applyRules(ProtocolRules const& protocolRules,
0315                                       std::string const& protocol,
0316                                       std::string const& destination,
0317                                       bool direct,
0318                                       std::string name) const {
0319     ProtocolRules::const_iterator const rulesIterator = protocolRules.find(protocol);
0320     if (rulesIterator == protocolRules.end()) {
0321       return "";
0322     }
0323 
0324     Rules const& rules = (*(rulesIterator)).second;
0325 
0326     std::smatch destinationMatches;
0327     std::smatch nameMatches;
0328 
0329     /* Look up for a matching rule*/
0330     for (Rules::const_iterator i = rules.begin(); i != rules.end(); ++i) {
0331       if (!std::regex_match(destination, destinationMatches, i->destinationMatch)) {
0332         continue;
0333       }
0334 
0335       if (!std::regex_match(name, i->pathMatch)) {
0336         continue;
0337       }
0338 
0339       // std::cerr << "Rule " << i->pathMatch << "matched! " << std::endl;
0340 
0341       std::string const chain = i->chain;
0342       if ((direct == true) && (!chain.empty())) {
0343         name = applyRules(protocolRules, chain, destination, direct, name);
0344         if (name.empty()) {
0345           return "";
0346         }
0347       }
0348 
0349       std::regex_match(name, nameMatches, i->pathMatch);
0350       name = replaceWithRegexp(nameMatches, i->result);
0351 
0352       if ((direct == false) && (!chain.empty())) {
0353         name = applyRules(protocolRules, chain, destination, direct, name);
0354       }
0355       return name;
0356     }
0357     return "";
0358   }
0359 }  // namespace edm