File indexing completed on 2024-04-06 12:11:54
0001 #include "FWCore/Catalog/interface/FileLocator.h"
0002 #include "FWCore/ServiceRegistry/interface/Service.h"
0003 #include "FWCore/Utilities/interface/Exception.h"
0004
0005 #include <boost/algorithm/string.hpp>
0006 #include <boost/algorithm/string/replace.hpp>
0007 #include <boost/property_tree/json_parser.hpp>
0008
0009 #include <filesystem>
0010 #include <cstdlib>
0011 #include <stdexcept>
0012 #include <fstream>
0013 #include <sstream>
0014
0015 namespace pt = boost::property_tree;
0016
0017 namespace {
0018
0019 std::string replaceWithRegexp(std::smatch const& matches, std::string const& outputFormat) {
0020 std::string result = outputFormat;
0021 std::stringstream str;
0022
0023 for (size_t i = 1; i < matches.size(); ++i) {
0024 str.str("");
0025 str << "$" << i;
0026 std::string const matchedString(matches[i].first, matches[i].second);
0027 if (!matchedString.empty())
0028 boost::algorithm::replace_all(result, str.str(), matchedString);
0029 }
0030 return result;
0031 }
0032
0033 constexpr char const* const kEmptyString = "";
0034 constexpr char const* const kLFNPrefix = "/store/";
0035
0036 const char* safe(const char* iCheck) {
0037 if (iCheck == nullptr) {
0038 return kEmptyString;
0039 }
0040 return iCheck;
0041 }
0042
0043 }
0044
0045 namespace pt = boost::property_tree;
0046
0047 namespace edm {
0048
0049 FileLocator::FileLocator(std::string const& catUrl, unsigned iCatalog) : m_destination("any") {
0050 init_trivialCatalog(catUrl, iCatalog);
0051 }
0052
0053 FileLocator::FileLocator(edm::CatalogAttributes const& catAttr,
0054 unsigned iCatalog,
0055 std::string const& storageDescriptionPath)
0056 : m_destination("any") {
0057 init(catAttr, iCatalog, storageDescriptionPath);
0058 }
0059
0060 FileLocator::~FileLocator() {}
0061
0062 std::string FileLocator::pfn(std::string const& ilfn, edm::CatalogType catType) const {
0063 if (catType == edm::CatalogType::TrivialCatalog)
0064 return convert(ilfn, m_directRules_trivialCatalog, true);
0065 return convert(ilfn, m_directRules, true);
0066 }
0067
0068 std::string FileLocator::convert(std::string const& input, ProtocolRules const& rules, bool direct) const {
0069 std::string out = "";
0070
0071 if (input.compare(0, 7, kLFNPrefix) != 0)
0072 return out;
0073 for (size_t pi = 0, pe = m_protocols.size(); pi != pe; ++pi) {
0074 out = applyRules(rules, m_protocols[pi], m_destination, direct, input);
0075 if (!out.empty()) {
0076 return out;
0077 }
0078 }
0079 return out;
0080 }
0081
0082 void FileLocator::parseRuleTrivialCatalog(tinyxml2::XMLElement* ruleElement, ProtocolRules& rules) {
0083 if (!ruleElement) {
0084 throw cms::Exception("TrivialFileCatalog", std::string("TrivialFileCatalog::connect: Malformed trivial catalog"));
0085 }
0086
0087 auto const protocol = safe(ruleElement->Attribute("protocol"));
0088 auto destinationMatchRegexp = ruleElement->Attribute("destination-match");
0089 if (destinationMatchRegexp == nullptr or destinationMatchRegexp[0] == 0) {
0090 destinationMatchRegexp = ".*";
0091 }
0092
0093 auto const pathMatchRegexp = safe(ruleElement->Attribute("path-match"));
0094 auto const result = safe(ruleElement->Attribute("result"));
0095 auto const chain = safe(ruleElement->Attribute("chain"));
0096
0097 Rule rule;
0098 rule.pathMatch.assign(pathMatchRegexp);
0099 rule.destinationMatch.assign(destinationMatchRegexp);
0100 rule.result = result;
0101 rule.chain = chain;
0102 rules[protocol].emplace_back(std::move(rule));
0103 }
0104
0105 void FileLocator::parseRule(pt::ptree::value_type const& storageRule,
0106 std::string const& protocol,
0107 ProtocolRules& rules) {
0108 if (storageRule.second.empty()) {
0109 throw cms::Exception("RucioFileCatalog", "edm::FileLocator::parseRule Malformed storage rule");
0110 }
0111 auto const pathMatchRegexp = storageRule.second.get<std::string>("lfn");
0112 auto const result = storageRule.second.get<std::string>("pfn");
0113 auto const chain = storageRule.second.get("chain", kEmptyString);
0114 Rule rule;
0115 rule.pathMatch.assign(pathMatchRegexp);
0116 rule.destinationMatch.assign(".*");
0117 rule.result = result;
0118 rule.chain = chain;
0119 rules[protocol].emplace_back(std::move(rule));
0120 }
0121
0122 void FileLocator::init_trivialCatalog(std::string const& catUrl, unsigned iCatalog) {
0123 std::string url = catUrl;
0124 if (url.empty()) {
0125 Service<SiteLocalConfig> localconfservice;
0126 if (!localconfservice.isAvailable())
0127 throw cms::Exception("TrivialFileCatalog", "edm::SiteLocalConfigService is not available");
0128 if (iCatalog >= localconfservice->trivialDataCatalogs().size())
0129 throw cms::Exception("TrivialFileCatalog", "edm::FileLocator: Request nonexistence data catalog");
0130 url = localconfservice->trivialDataCatalogs()[iCatalog];
0131 }
0132
0133 if (url.find("file:") == std::string::npos) {
0134 throw cms::Exception("TrivialFileCatalog",
0135 "TrivialFileCatalog::connect: Malformed url for file catalog configuration");
0136 }
0137
0138 url = url.erase(0, url.find(':') + 1);
0139
0140 std::vector<std::string> tokens;
0141 boost::algorithm::split(tokens, url, boost::is_any_of(std::string("?")));
0142 m_filename = tokens[0];
0143
0144 if (tokens.size() == 2) {
0145 std::string const options = tokens[1];
0146 std::vector<std::string> optionTokens;
0147 boost::algorithm::split(optionTokens, options, boost::is_any_of(std::string("&")));
0148
0149 std::string const equalSign("=");
0150 std::string const comma(",");
0151
0152 for (size_t oi = 0, oe = optionTokens.size(); oi != oe; ++oi) {
0153 std::string const option = optionTokens[oi];
0154 std::vector<std::string> argTokens;
0155 boost::algorithm::split(argTokens, option, boost::is_any_of(equalSign));
0156
0157 if (argTokens.size() != 2) {
0158 throw cms::Exception("TrivialFileCatalog",
0159 "TrivialFileCatalog::connect: Malformed url for file catalog configuration");
0160 }
0161
0162 if (argTokens[0] == "protocol") {
0163 boost::algorithm::split(m_protocols, argTokens[1], boost::is_any_of(comma));
0164 } else if (argTokens[0] == "destination") {
0165 m_destination = argTokens[1];
0166 }
0167 }
0168 }
0169
0170 if (m_protocols.empty()) {
0171 throw cms::Exception("TrivialFileCatalog",
0172 "TrivialFileCatalog::connect: protocol was not supplied in the contact string");
0173 }
0174
0175 std::ifstream configFile;
0176 configFile.open(m_filename.c_str());
0177
0178 if (!configFile.good() || !configFile.is_open()) {
0179 throw cms::Exception("TrivialFileCatalog",
0180 "TrivialFileCatalog::connect: Unable to open trivial file catalog " + m_filename);
0181 }
0182
0183 configFile.close();
0184
0185 tinyxml2::XMLDocument doc;
0186 auto loadErr = doc.LoadFile(m_filename.c_str());
0187 if (loadErr != tinyxml2::XML_SUCCESS) {
0188 throw cms::Exception("TrivialFileCatalog")
0189 << "tinyxml file load failed with error : " << doc.ErrorStr() << std::endl;
0190 }
0191
0192
0193
0194
0195
0196
0197
0198
0199
0200
0201
0202 auto rootElement = doc.RootElement();
0203
0204 for (auto el = rootElement->FirstChildElement("lfn-to-pfn"); el != nullptr;
0205 el = el->NextSiblingElement("lfn-to-pfn")) {
0206 parseRuleTrivialCatalog(el, m_directRules_trivialCatalog);
0207 }
0208
0209
0210 for (auto el = rootElement->FirstChildElement("pfn-to-lfn"); el != nullptr;
0211 el = el->NextSiblingElement("pfn-to-lfn")) {
0212 parseRuleTrivialCatalog(el, m_inverseRules);
0213 }
0214 }
0215
0216 void FileLocator::init(edm::CatalogAttributes const& input_dataCatalog,
0217 unsigned iCatalog,
0218 std::string const& storageDescriptionPath) {
0219 Service<SiteLocalConfig> localconfservice;
0220 edm::CatalogAttributes aCatalog = input_dataCatalog;
0221 if (input_dataCatalog.empty()) {
0222 if (!localconfservice.isAvailable()) {
0223 cms::Exception ex("FileCatalog");
0224 ex << "edm::SiteLocalConfigService is not available";
0225 ex.addContext("Calling edm::FileLocator::init()");
0226 throw ex;
0227 }
0228 if (iCatalog >= localconfservice->dataCatalogs().size()) {
0229 cms::Exception ex("FileCatalog");
0230 ex << "Request nonexistence data catalog";
0231 ex.addContext("Calling edm::FileLocator::init()");
0232 throw ex;
0233 }
0234 aCatalog = localconfservice->dataCatalogs()[iCatalog];
0235 }
0236
0237 std::filesystem::path filename_storage = localconfservice->storageDescriptionPath(aCatalog);
0238
0239
0240 if (!storageDescriptionPath.empty())
0241 filename_storage = storageDescriptionPath;
0242
0243
0244 pt::ptree json;
0245 try {
0246 boost::property_tree::read_json(filename_storage.string(), json);
0247 } catch (std::exception& e) {
0248 cms::Exception ex("FileCatalog");
0249 ex << "Can not open storage.json (" << filename_storage.string()
0250 << "). Check SITECONFIG_PATH and site-local-config.xml <data-access>";
0251 ex.addContext("edm::FileLocator:init()");
0252 throw ex;
0253 }
0254 auto found_site = std::find_if(json.begin(), json.end(), [&](pt::ptree::value_type const& site) {
0255
0256 std::string siteName = site.second.get("site", kEmptyString);
0257
0258 std::string volName = site.second.get("volume", kEmptyString);
0259 return aCatalog.storageSite == siteName && aCatalog.volume == volName;
0260 });
0261
0262
0263 if (found_site == json.end()) {
0264 cms::Exception ex("FileCatalog");
0265 ex << "Can not find storage site \"" << aCatalog.storageSite << "\" and volume \"" << aCatalog.volume
0266 << "\" in storage.json. Check site-local-config.xml <data-access> and storage.json";
0267 ex.addContext("edm::FileLocator:init()");
0268 throw ex;
0269 }
0270
0271 const pt::ptree& protocols = found_site->second.find("protocols")->second;
0272 auto found_protocol = std::find_if(protocols.begin(), protocols.end(), [&](pt::ptree::value_type const& protocol) {
0273 std::string protName = protocol.second.get("protocol", kEmptyString);
0274 return aCatalog.protocol == protName;
0275 });
0276
0277
0278 if (found_protocol == protocols.end()) {
0279 cms::Exception ex("FileCatalog");
0280 ex << "Can not find protocol \"" << aCatalog.protocol << "\" for the storage site \"" << aCatalog.storageSite
0281 << "\" and volume \"" << aCatalog.volume
0282 << "\" in storage.json. Check site-local-config.xml <data-access> and storage.json";
0283 ex.addContext("edm::FileLocator:init()");
0284 throw ex;
0285 }
0286
0287 std::string protName = found_protocol->second.get("protocol", kEmptyString);
0288 m_protocols.push_back(protName);
0289
0290
0291
0292 for (pt::ptree::value_type const& protocol : protocols) {
0293 std::string protName = protocol.second.get("protocol", kEmptyString);
0294
0295 std::string prefixTmp = protocol.second.get("prefix", kEmptyString);
0296 if (prefixTmp == kEmptyString) {
0297 const pt::ptree& rules = protocol.second.find("rules")->second;
0298 for (pt::ptree::value_type const& storageRule : rules) {
0299 parseRule(storageRule, protName, m_directRules);
0300 }
0301 }
0302
0303 else {
0304 Rule rule;
0305 rule.pathMatch.assign("/?(.*)");
0306 rule.destinationMatch.assign(".*");
0307 rule.result = prefixTmp + "/$1";
0308 rule.chain = kEmptyString;
0309 m_directRules[protName].emplace_back(std::move(rule));
0310 }
0311 }
0312 }
0313
0314 std::string FileLocator::applyRules(ProtocolRules const& protocolRules,
0315 std::string const& protocol,
0316 std::string const& destination,
0317 bool direct,
0318 std::string name) const {
0319 ProtocolRules::const_iterator const rulesIterator = protocolRules.find(protocol);
0320 if (rulesIterator == protocolRules.end()) {
0321 return "";
0322 }
0323
0324 Rules const& rules = (*(rulesIterator)).second;
0325
0326 std::smatch destinationMatches;
0327 std::smatch nameMatches;
0328
0329
0330 for (Rules::const_iterator i = rules.begin(); i != rules.end(); ++i) {
0331 if (!std::regex_match(destination, destinationMatches, i->destinationMatch)) {
0332 continue;
0333 }
0334
0335 if (!std::regex_match(name, i->pathMatch)) {
0336 continue;
0337 }
0338
0339
0340
0341 std::string const chain = i->chain;
0342 if ((direct == true) && (!chain.empty())) {
0343 name = applyRules(protocolRules, chain, destination, direct, name);
0344 if (name.empty()) {
0345 return "";
0346 }
0347 }
0348
0349 std::regex_match(name, nameMatches, i->pathMatch);
0350 name = replaceWithRegexp(nameMatches, i->result);
0351
0352 if ((direct == false) && (!chain.empty())) {
0353 name = applyRules(protocolRules, chain, destination, direct, name);
0354 }
0355 return name;
0356 }
0357 return "";
0358 }
0359 }