Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2023-03-31 23:01:47

0001 #include <algorithm>
0002 #include <iomanip>
0003 #include <iostream>
0004 #include <memory>
0005 
0006 #include <cstdio>
0007 #include <cstring>
0008 #include <fstream>
0009 #include <sstream>
0010 #include <string>
0011 #include <vector>
0012 
0013 #include <xercesc/sax2/Attributes.hpp>
0014 #include <xercesc/dom/DOM.hpp>
0015 
0016 #include "FWCore/MessageLogger/interface/MessageLogger.h"
0017 #include "FWCore/ParameterSet/interface/ParameterSet.h"
0018 #include "FWCore/Utilities/interface/Exception.h"
0019 #include "FWCore/Utilities/interface/TimeOfDay.h"
0020 
0021 #include "GeneratorInterface/LHEInterface/interface/LHEReader.h"
0022 #include "GeneratorInterface/LHEInterface/interface/LHERunInfo.h"
0023 #include "GeneratorInterface/LHEInterface/interface/LHEEvent.h"
0024 
0025 #include "Utilities/StorageFactory/interface/IOTypes.h"
0026 #include "Utilities/StorageFactory/interface/Storage.h"
0027 #include "Utilities/StorageFactory/interface/StorageFactory.h"
0028 
0029 #include "XMLUtils.h"
0030 
0031 XERCES_CPP_NAMESPACE_USE
0032 
0033 namespace lhef {
0034 
0035   static void logFileAction(char const *msg, std::string const &fileName) {
0036     edm::LogAbsolute("fileAction") << std::setprecision(0) << edm::TimeOfDay() << msg << fileName;
0037     edm::FlushMessageLog();
0038   }
0039 
0040   class LHEReader::Source {
0041   public:
0042     Source() {}
0043     virtual ~Source() {}
0044     virtual XMLDocument *createReader(XMLDocument::Handler &handler) = 0;
0045   };
0046 
0047   class LHEReader::FileSource : public LHEReader::Source {
0048   public:
0049     FileSource(const std::string &fileURL) {
0050       using namespace edm::storage;
0051       auto storage = StorageFactory::get()->open(fileURL, IOFlags::OpenRead);
0052 
0053       if (!storage)
0054         throw cms::Exception("FileOpenError")
0055             << "Could not open LHE file \"" << fileURL << "\" for reading" << std::endl;
0056 
0057       fileStream = std::make_unique<StorageWrap>(std::move(storage));
0058     }
0059 
0060     ~FileSource() override {}
0061 
0062     XMLDocument *createReader(XMLDocument::Handler &handler) override { return new XMLDocument(fileStream, handler); }
0063 
0064   private:
0065     std::unique_ptr<StorageWrap> fileStream;
0066   };
0067 
0068   class LHEReader::StringSource : public LHEReader::Source {
0069   public:
0070     StringSource(const std::string &inputs) {
0071       if (inputs.empty())
0072         throw cms::Exception("StreamOpenError") << "Empty LHE file string name \"" << std::endl;
0073 
0074       std::stringstream *tmpis = new std::stringstream(inputs);
0075       fileStream.reset(tmpis);
0076     }
0077 
0078     ~StringSource() override {}
0079 
0080     XMLDocument *createReader(XMLDocument::Handler &handler) override { return new XMLDocument(fileStream, handler); }
0081 
0082   private:
0083     std::unique_ptr<std::istream> fileStream;
0084   };
0085 
0086   class LHEReader::XMLHandler : public XMLDocument::Handler {
0087   public:
0088     typedef std::vector<std::pair<std::string, std::string> > wgt_info;
0089     XMLHandler()
0090         : impl(nullptr),
0091           gotObject(kNone),
0092           mode(kNone),
0093           xmlHeader(nullptr),
0094           xmlEvent(nullptr),
0095           headerOk(false),
0096           npLO(-99),
0097           npNLO(-99) {}
0098     ~XMLHandler() override {
0099       if (xmlHeader)
0100         xmlHeader->release();
0101       if (xmlEvent)
0102         xmlEvent->release();
0103     }
0104 
0105     enum Object { kNone = 0, kHeader, kInit, kComment, kEvent };
0106 
0107     void reset() {
0108       headerOk = false;
0109       weightsinevent.clear();
0110       gotObject = kNone;
0111       mode = kNone;
0112     }
0113 
0114     const wgt_info &weightInfo() const { return weightsinevent; }
0115 
0116   protected:
0117     void startElement(const XMLCh *const uri,
0118                       const XMLCh *const localname,
0119                       const XMLCh *const qname,
0120                       const Attributes &attributes) override;
0121 
0122     void endElement(const XMLCh *const uri, const XMLCh *const localname, const XMLCh *const qname) override;
0123 
0124     void characters(const XMLCh *const chars, const XMLSize_t length) override;
0125     void comment(const XMLCh *const chars, const XMLSize_t length) override;
0126 
0127   private:
0128     friend class LHEReader;
0129 
0130     bool skipEvent = false;
0131     std::unique_ptr<DOMImplementation> impl;
0132     std::string buffer;
0133     Object gotObject;
0134     Object mode;
0135     DOMDocument *xmlHeader;
0136     DOMDocument *xmlEvent;
0137     std::vector<DOMElement *> xmlNodes, xmlEventNodes;
0138     bool headerOk;
0139     std::vector<LHERunInfo::Header> headers;
0140     wgt_info weightsinevent;
0141     int npLO;
0142     int npNLO;
0143     std::vector<float> scales;
0144   };
0145 
0146   static void attributesToDom(DOMElement *dom, const Attributes &attributes) {
0147     for (unsigned int i = 0; i < attributes.getLength(); i++) {
0148       const XMLCh *name = attributes.getQName(i);
0149       const XMLCh *value = attributes.getValue(i);
0150 
0151       dom->setAttribute(name, value);
0152     }
0153   }
0154 
0155   static void fillHeader(LHERunInfo::Header &header, const char *data, int len = -1) {
0156     const char *end = len >= 0 ? (data + len) : nullptr;
0157     while (*data && (!end || data < end)) {
0158       std::size_t len = std::strcspn(data, "\r\n");
0159       if (end && data + len > end)
0160         len = end - data;
0161       if (data[len] == '\r' && data[len + 1] == '\n')
0162         len += 2;
0163       else if (data[len])
0164         len++;
0165       header.addLine(std::string(data, len));
0166       data += len;
0167     }
0168   }
0169 
0170   void LHEReader::XMLHandler::startElement(const XMLCh *const uri,
0171                                            const XMLCh *const localname,
0172                                            const XMLCh *const qname,
0173                                            const Attributes &attributes) {
0174     std::string name((const char *)XMLSimpleStr(qname));
0175 
0176     if (!headerOk) {
0177       if (name != "LesHouchesEvents")
0178         throw cms::Exception("InvalidFormat") << "LHE file has invalid header" << std::endl;
0179       headerOk = true;
0180       return;
0181     }
0182 
0183     if (mode == kHeader) {
0184       DOMElement *elem = xmlHeader->createElement(qname);
0185       attributesToDom(elem, attributes);
0186       xmlNodes.back()->appendChild(elem);
0187       xmlNodes.push_back(elem);
0188       return;
0189     } else if (mode == kEvent) {
0190       if (skipEvent) {
0191         return;
0192       }
0193 
0194       DOMElement *elem = xmlEvent->createElement(qname);
0195       attributesToDom(elem, attributes);
0196 
0197       //TODO this is a hack (even more than the rest of this class)
0198       if (name == "rwgt") {
0199         xmlEventNodes[0]->appendChild(elem);
0200       } else if (name == "wgt") {
0201         xmlEventNodes[1]->appendChild(elem);
0202       } else if (name == "scales") {
0203         for (XMLSize_t iscale = 0; iscale < attributes.getLength(); ++iscale) {
0204           int ipart = 0;
0205           const char *scalename = XMLSimpleStr(attributes.getQName(iscale));
0206           int nmatch = sscanf(scalename, "pt_clust_%d", &ipart);
0207 
0208           if (nmatch != 1) {
0209             edm::LogError("Generator|LHEInterface") << "invalid attribute in <scales> tag" << std::endl;
0210           }
0211 
0212           float scaleval;
0213           const char *scalevalstr = XMLSimpleStr(attributes.getValue(iscale));
0214           sscanf(scalevalstr, "%e", &scaleval);
0215 
0216           scales.push_back(scaleval);
0217         }
0218       }
0219       xmlEventNodes.push_back(elem);
0220       return;
0221     } else if (mode == kInit) {
0222       //skip unknown tags in init block as well
0223       return;
0224     } else if (mode != kNone) {
0225       throw cms::Exception("InvalidFormat") << "LHE file has invalid format" << std::endl;
0226     }
0227 
0228     if (name == "header") {
0229       if (!impl)
0230         impl.reset(DOMImplementationRegistry::getDOMImplementation(XMLUniStr("Core")));
0231 
0232       xmlHeader = impl->createDocument(nullptr, qname, nullptr);
0233       xmlNodes.resize(1);
0234       xmlNodes[0] = xmlHeader->getDocumentElement();
0235       mode = kHeader;
0236     }
0237     if (name == "init") {
0238       mode = kInit;
0239     } else if (name == "event") {
0240       if (!skipEvent) {
0241         if (!impl)
0242           impl.reset(DOMImplementationRegistry::getDOMImplementation(XMLUniStr("Core")));
0243 
0244         if (xmlEvent)
0245           xmlEvent->release();
0246         xmlEvent = impl->createDocument(nullptr, qname, nullptr);
0247         weightsinevent.resize(0);
0248         scales.clear();
0249 
0250         npLO = -99;
0251         npNLO = -99;
0252         const XMLCh *npLOval = attributes.getValue(XMLString::transcode("npLO"));
0253         if (npLOval) {
0254           const char *npLOs = XMLSimpleStr(npLOval);
0255           sscanf(npLOs, "%d", &npLO);
0256         }
0257         const XMLCh *npNLOval = attributes.getValue(XMLString::transcode("npNLO"));
0258         if (npNLOval) {
0259           const char *npNLOs = XMLSimpleStr(npNLOval);
0260           sscanf(npNLOs, "%d", &npNLO);
0261         }
0262 
0263         xmlEventNodes.resize(1);
0264         xmlEventNodes[0] = xmlEvent->getDocumentElement();
0265       }
0266       mode = kEvent;
0267     }
0268 
0269     if (mode == kNone)
0270       throw cms::Exception("InvalidFormat") << "LHE file has invalid format" << std::endl;
0271 
0272     buffer.clear();
0273   }
0274 
0275   void LHEReader::XMLHandler::endElement(const XMLCh *const uri,
0276                                          const XMLCh *const localname,
0277                                          const XMLCh *const qname) {
0278     std::string name((const char *)XMLSimpleStr(qname));
0279 
0280     if (mode) {
0281       if (mode == kHeader && xmlNodes.size() > 1) {
0282         xmlNodes.resize(xmlNodes.size() - 1);
0283         return;
0284       } else if (mode == kHeader) {
0285         std::unique_ptr<DOMLSSerializer> writer(impl->createLSSerializer());
0286         std::unique_ptr<DOMLSOutput> outputDesc(impl->createLSOutput());
0287         assert(outputDesc.get());
0288         outputDesc->setEncoding(XMLUniStr("UTF-8"));
0289 
0290         for (DOMNode *node = xmlNodes[0]->getFirstChild(); node; node = node->getNextSibling()) {
0291           XMLSimpleStr buffer(writer->writeToString(node));
0292 
0293           std::string type;
0294           const char *p, *q;
0295           DOMElement *elem;
0296 
0297           switch (node->getNodeType()) {
0298             case DOMNode::ELEMENT_NODE:
0299               elem = static_cast<DOMElement *>(node);
0300               type = (const char *)XMLSimpleStr(elem->getTagName());
0301               p = std::strchr((const char *)buffer, '>') + 1;
0302               q = std::strrchr(p, '<');
0303               break;
0304             case DOMNode::COMMENT_NODE:
0305               type = "";
0306               p = buffer + 4;
0307               q = buffer + strlen(buffer) - 3;
0308               break;
0309             default:
0310               type = "<>";
0311               p = buffer + std::strspn(buffer, " \t\r\n");
0312               if (!*p)
0313                 continue;
0314               q = p + strlen(p);
0315           }
0316           LHERunInfo::Header header(type);
0317           fillHeader(header, p, q - p);
0318           headers.push_back(header);
0319         }
0320 
0321         xmlHeader->release();
0322         xmlHeader = nullptr;
0323       } else if (name == "event" && mode == kEvent &&
0324                  (skipEvent || (!xmlEventNodes.empty()))) {  // handling of weights in LHE file
0325 
0326         if (skipEvent) {
0327           gotObject = mode;
0328           mode = kNone;
0329           return;
0330         }
0331 
0332         for (DOMNode *node = xmlEventNodes[0]->getFirstChild(); node; node = node->getNextSibling()) {
0333           switch (node->getNodeType()) {
0334             case DOMNode::ELEMENT_NODE:  // rwgt
0335               for (DOMNode *rwgt = xmlEventNodes[1]->getFirstChild(); rwgt; rwgt = rwgt->getNextSibling()) {
0336                 DOMNode *attr = rwgt->getAttributes()->item(0);
0337                 XMLSimpleStr atname(attr->getNodeValue());
0338                 XMLSimpleStr weight(rwgt->getFirstChild()->getNodeValue());
0339                 switch (rwgt->getNodeType()) {
0340                   case DOMNode::ELEMENT_NODE:
0341                     weightsinevent.push_back(std::make_pair((const char *)atname, (const char *)weight));
0342                     break;
0343                   default:
0344                     break;
0345                 }
0346               }
0347               break;
0348             case DOMNode::TEXT_NODE:  // event information
0349             {
0350               XMLSimpleStr data(node->getNodeValue());
0351               buffer.append(data);
0352             } break;
0353             default:
0354               break;
0355           }
0356         }
0357       } else if (mode == kEvent) {
0358         //skip unknown tags
0359         return;
0360       }
0361 
0362       if (gotObject != kNone)
0363         throw cms::Exception("InvalidState") << "Unexpected pileup in"
0364                                                 " LHEReader::XMLHandler::endElement"
0365                                              << std::endl;
0366 
0367       gotObject = mode;
0368       mode = kNone;
0369     }
0370   }
0371 
0372   void LHEReader::XMLHandler::characters(const XMLCh *const data_, const XMLSize_t length) {
0373     if (mode == kHeader) {
0374       DOMText *text = xmlHeader->createTextNode(data_);
0375       xmlNodes.back()->appendChild(text);
0376       return;
0377     }
0378 
0379     if (XMLSimpleStr::isAllSpaces(data_, length))
0380       return;
0381 
0382     unsigned int offset = 0;
0383     while (offset < length && XMLSimpleStr::isSpace(data_[offset]))
0384       offset++;
0385 
0386     if (mode == kEvent) {
0387       if (!skipEvent) {
0388         DOMText *text = xmlEvent->createTextNode(data_ + offset);
0389         xmlEventNodes.back()->appendChild(text);
0390       }
0391       return;
0392     }
0393 
0394     if (mode == kNone)
0395       throw cms::Exception("InvalidFormat") << "LHE file has invalid format" << std::endl;
0396 
0397     XMLSimpleStr data(data_ + offset);
0398     buffer.append(data);
0399   }
0400 
0401   void LHEReader::XMLHandler::comment(const XMLCh *const data_, const XMLSize_t length) {
0402     if (mode == kHeader) {
0403       DOMComment *comment = xmlHeader->createComment(data_);
0404       xmlNodes.back()->appendChild(comment);
0405       return;
0406     }
0407 
0408     XMLSimpleStr data(data_);
0409 
0410     LHERunInfo::Header header;
0411     fillHeader(header, data);
0412     headers.push_back(header);
0413   }
0414 
0415   LHEReader::LHEReader(const edm::ParameterSet &params)
0416       : fileURLs(params.getUntrackedParameter<std::vector<std::string> >("fileNames")),
0417         strName(""),
0418         firstEvent(params.getUntrackedParameter<unsigned int>("skipEvents", 0)),
0419         maxEvents(params.getUntrackedParameter<int>("limitEvents", -1)),
0420         curIndex(0),
0421         handler(new XMLHandler()) {}
0422 
0423   LHEReader::LHEReader(const std::vector<std::string> &fileNames, unsigned int firstEvent)
0424       : fileURLs(fileNames),
0425         strName(""),
0426         firstEvent(firstEvent),
0427         maxEvents(-1),
0428         curIndex(0),
0429         handler(new XMLHandler()) {}
0430 
0431   LHEReader::LHEReader(const std::string &inputs, unsigned int firstEvent)
0432       : strName(inputs), firstEvent(firstEvent), maxEvents(-1), curIndex(0), handler(new XMLHandler()) {}
0433 
0434   LHEReader::~LHEReader() {
0435     // Explicitly release "orphaned" resources
0436     // that were created through DOM implementation
0437     // createXXXX factory method *before* last
0438     // XMLPlatformUtils::Terminate is called.
0439     handler.release();
0440     curDoc.release();
0441     curSource.release();
0442   }
0443 
0444   std::shared_ptr<LHEEvent> LHEReader::next(bool *newFileOpened) {
0445     while (curDoc.get() || curIndex < fileURLs.size() || (fileURLs.empty() && !strName.empty())) {
0446       if (!curDoc.get()) {
0447         if (!platform) {
0448           //If we read multiple files, the XercesPlatform must live longer than any one
0449           // XMLDocument.
0450           platform = XMLDocument::platformHandle();
0451         }
0452         if (!fileURLs.empty()) {
0453           logFileAction("  Initiating request to open LHE file ", fileURLs[curIndex]);
0454           curSource = std::make_unique<FileSource>(fileURLs[curIndex]);
0455           logFileAction("  Successfully opened LHE file ", fileURLs[curIndex]);
0456           if (newFileOpened != nullptr)
0457             *newFileOpened = true;
0458           ++curIndex;
0459         } else if (!strName.empty()) {
0460           curSource = std::make_unique<StringSource>(strName);
0461         }
0462         handler->reset();
0463         curDoc.reset(curSource->createReader(*handler));
0464         curRunInfo.reset();
0465       }
0466       handler->skipEvent = firstEvent > 0;
0467 
0468       XMLHandler::Object event = handler->gotObject;
0469       handler->gotObject = XMLHandler::kNone;
0470 
0471       switch (event) {
0472         case XMLHandler::kNone:
0473           if (!curDoc->parse()) {
0474             curDoc.reset();
0475             logFileAction("  Closed LHE file ", fileURLs[curIndex - 1]);
0476             return std::shared_ptr<LHEEvent>();
0477           }
0478           break;
0479 
0480         case XMLHandler::kHeader:
0481           break;
0482 
0483         case XMLHandler::kInit: {
0484           std::istringstream data;
0485           data.str(handler->buffer);
0486           handler->buffer.clear();
0487 
0488           curRunInfo.reset(new LHERunInfo(data));
0489 
0490           std::for_each(handler->headers.begin(),
0491                         handler->headers.end(),
0492                         std::bind(&LHERunInfo::addHeader, curRunInfo.get(), std::placeholders::_1));
0493           handler->headers.clear();
0494         } break;
0495 
0496         case XMLHandler::kComment:
0497           break;
0498 
0499         case XMLHandler::kEvent: {
0500           if (!curRunInfo.get())
0501             throw cms::Exception("InvalidState") << "Got LHE event without"
0502                                                     " initialization."
0503                                                  << std::endl;
0504 
0505           if (firstEvent > 0) {
0506             firstEvent--;
0507             continue;
0508           }
0509 
0510           if (maxEvents == 0)
0511             return std::shared_ptr<LHEEvent>();
0512           else if (maxEvents > 0)
0513             maxEvents--;
0514 
0515           std::istringstream data;
0516           data.str(handler->buffer);
0517           handler->buffer.clear();
0518 
0519           std::shared_ptr<LHEEvent> lheevent;
0520           lheevent.reset(new LHEEvent(curRunInfo, data));
0521           const XMLHandler::wgt_info &info = handler->weightsinevent;
0522           for (size_t i = 0; i < info.size(); ++i) {
0523             double num = -1.0;
0524             sscanf(info[i].second.c_str(), "%le", &num);
0525             lheevent->addWeight(gen::WeightsInfo(info[i].first, num));
0526           }
0527           lheevent->setNpLO(handler->npLO);
0528           lheevent->setNpNLO(handler->npNLO);
0529           //fill scales
0530           if (!handler->scales.empty()) {
0531             lheevent->setScales(handler->scales);
0532           }
0533           return lheevent;
0534         }
0535       }
0536     }
0537 
0538     return std::shared_ptr<LHEEvent>();
0539   }
0540 
0541 }  // namespace lhef