Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2025-05-23 02:05:08

0001 /*----------------------------------------------------------------------
0002 ----------------------------------------------------------------------*/
0003 #include "RootFile.h"
0004 #include "RootInputFileSequence.h"
0005 
0006 #include "DataFormats/Provenance/interface/BranchID.h"
0007 #include "DataFormats/Provenance/interface/IndexIntoFile.h"
0008 #include "DataFormats/Provenance/interface/ProductRegistry.h"
0009 #include "FWCore/MessageLogger/interface/MessageLogger.h"
0010 #include "FWCore/ParameterSet/interface/ParameterSet.h"
0011 #include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
0012 #include "Utilities/StorageFactory/interface/StorageFactory.h"
0013 #include "Utilities/StorageFactory/interface/StatisticsSenderService.h"
0014 #include "FWCore/ServiceRegistry/interface/Service.h"
0015 
0016 #include "TSystem.h"
0017 
0018 namespace edm {
0019   class BranchIDListHelper;
0020   class EventPrincipal;
0021   class LuminosityBlockPrincipal;
0022   class RunPrincipal;
0023 
0024   RootInputFileSequence::RootInputFileSequence(ParameterSet const& pset, InputFileCatalog const& catalog)
0025       : catalog_(catalog),
0026         lfn_("unknown"),
0027         lfnHash_(0U),
0028         usedFallback_(false),
0029         findFileForSpecifiedID_(nullptr),
0030         fileIterBegin_(fileCatalogItems().begin()),
0031         fileIterEnd_(fileCatalogItems().end()),
0032         fileIter_(fileIterEnd_),
0033         fileIterLastOpened_(fileIterEnd_),
0034         rootFile_(),
0035         indexesIntoFiles_(fileCatalogItems().size()) {}
0036 
0037   std::vector<FileCatalogItem> const& RootInputFileSequence::fileCatalogItems() const {
0038     return catalog_.fileCatalogItems();
0039   }
0040 
0041   std::shared_ptr<ProductRegistry const> RootInputFileSequence::fileProductRegistry() const {
0042     assert(rootFile());
0043     return rootFile()->productRegistry();
0044   }
0045 
0046   std::shared_ptr<BranchIDListHelper const> RootInputFileSequence::fileBranchIDListHelper() const {
0047     assert(rootFile());
0048     return rootFile()->branchIDListHelper();
0049   }
0050 
0051   RootInputFileSequence::~RootInputFileSequence() {}
0052 
0053   std::shared_ptr<RunAuxiliary> RootInputFileSequence::readRunAuxiliary_() {
0054     assert(rootFile());
0055     return rootFile()->readRunAuxiliary_();
0056   }
0057 
0058   std::shared_ptr<LuminosityBlockAuxiliary> RootInputFileSequence::readLuminosityBlockAuxiliary_() {
0059     assert(rootFile());
0060     return rootFile()->readLuminosityBlockAuxiliary_();
0061   }
0062 
0063   bool RootInputFileSequence::readRun_(RunPrincipal& runPrincipal) {
0064     assert(rootFile());
0065     return rootFile()->readRun_(runPrincipal);
0066   }
0067 
0068   void RootInputFileSequence::fillProcessBlockHelper_() {
0069     assert(rootFile());
0070     return rootFile()->fillProcessBlockHelper_();
0071   }
0072 
0073   bool RootInputFileSequence::nextProcessBlock_(ProcessBlockPrincipal& processBlockPrincipal) {
0074     assert(rootFile());
0075     return rootFile()->nextProcessBlock_(processBlockPrincipal);
0076   }
0077 
0078   void RootInputFileSequence::readProcessBlock_(ProcessBlockPrincipal& processBlockPrincipal) {
0079     assert(rootFile());
0080     rootFile()->readProcessBlock_(processBlockPrincipal);
0081   }
0082 
0083   bool RootInputFileSequence::readLuminosityBlock_(LuminosityBlockPrincipal& lumiPrincipal) {
0084     assert(rootFile());
0085     return rootFile()->readLuminosityBlock_(lumiPrincipal);
0086   }
0087 
0088   // readEvent() is responsible for setting up the EventPrincipal.
0089   //
0090   //   1. fill an EventPrincipal with a unique EventID
0091   //   2. For each entry in the provenance, put in one ProductResolver,
0092   //      holding the Provenance for the corresponding EDProduct.
0093   //   3. set up the caches in the EventPrincipal to know about this
0094   //      ProductResolver.
0095   //
0096   // We do *not* create the EDProduct instance (the equivalent of reading
0097   // the branch containing this EDProduct. That will be done by the Delayed Reader,
0098   //  when it is asked to do so.
0099   //
0100 
0101   bool RootInputFileSequence::readEvent(EventPrincipal& eventPrincipal, bool readAllProducts) {
0102     assert(rootFile());
0103     return rootFile()->readEvent(eventPrincipal, readAllProducts);
0104   }
0105 
0106   bool RootInputFileSequence::containedInCurrentFile(RunNumber_t run,
0107                                                      LuminosityBlockNumber_t lumi,
0108                                                      EventNumber_t event) const {
0109     if (!rootFile())
0110       return false;
0111     return rootFile()->containsItem(run, lumi, event);
0112   }
0113 
0114   bool RootInputFileSequence::skipToItemInNewFile(RunNumber_t run,
0115                                                   LuminosityBlockNumber_t lumi,
0116                                                   EventNumber_t event,
0117                                                   size_t fileNameHash) {
0118     // Look for item in files not yet opened. We have a hash of the logical file name
0119     assert(fileNameHash != 0U);
0120     // If the lookup table is not yet filled in, fill it.
0121     if (!findFileForSpecifiedID_) {
0122       // We use a multimap because there may be hash collisions (Two different LFNs could have the same hash).
0123       // We map the hash of the LFN to the index into the list of files.
0124       findFileForSpecifiedID_ =
0125           std::make_unique<std::unordered_multimap<size_t, size_t>>();  // propagate_const<T> has no reset() function
0126       auto hasher = std::hash<std::string>();
0127       for (auto fileIter = fileIterBegin_; fileIter != fileIterEnd_; ++fileIter) {
0128         findFileForSpecifiedID_->insert(std::make_pair(hasher(fileIter->logicalFileName()), fileIter - fileIterBegin_));
0129       }
0130     }
0131     // Look up the logical file name in the table
0132     auto range = findFileForSpecifiedID_->equal_range(fileNameHash);
0133     for (auto iter = range.first; iter != range.second; ++iter) {
0134       // Don't look in files previously opened, because those have already been searched.
0135       if (!indexesIntoFiles_[iter->second]) {
0136         setAtFileSequenceNumber(iter->second);
0137         initFile_(false);
0138         assert(rootFile());
0139         bool found = rootFile()->setEntryAtItem(run, lumi, event);
0140         if (found) {
0141           return true;
0142         }
0143       }
0144     }
0145     // Not found
0146     return false;
0147   }
0148 
0149   bool RootInputFileSequence::skipToItemInNewFile(RunNumber_t run, LuminosityBlockNumber_t lumi, EventNumber_t event) {
0150     // Look for item in files not yet opened.  We do not have a valid hash of the logical file name.
0151     for (auto it = indexesIntoFiles_.begin(), itEnd = indexesIntoFiles_.end(); it != itEnd; ++it) {
0152       if (!*it) {
0153         // File not yet opened.
0154         setAtFileSequenceNumber(it - indexesIntoFiles_.begin());
0155         initFile_(false);
0156         assert(rootFile());
0157         bool found = rootFile()->setEntryAtItem(run, lumi, event);
0158         if (found) {
0159           return true;
0160         }
0161       }
0162     }
0163     // Not found
0164     return false;
0165   }
0166 
0167   bool RootInputFileSequence::skipToItem(
0168       RunNumber_t run, LuminosityBlockNumber_t lumi, EventNumber_t event, size_t fileNameHash, bool currentFileFirst) {
0169     // Attempt to find item in currently open input file.
0170     bool found = currentFileFirst && rootFile() && rootFile()->setEntryAtItem(run, lumi, event);
0171     if (!found) {
0172       // If only one input file, give up now, to save time.
0173       if (currentFileFirst && rootFile() && indexesIntoFiles_.size() == 1) {
0174         return false;
0175       }
0176       // Look for item (run/lumi/event) in files previously opened without reopening unnecessary files.
0177       for (auto it = indexesIntoFiles_.begin(), itEnd = indexesIntoFiles_.end(); it != itEnd; ++it) {
0178         if (*it && (*it)->containsItem(run, lumi, event)) {
0179           // We found it. Close the currently open file, and open the correct one.
0180           std::vector<FileCatalogItem>::const_iterator currentIter = fileIter_;
0181           setAtFileSequenceNumber(it - indexesIntoFiles_.begin());
0182           if (fileIter_ != currentIter) {
0183             initFile(false);
0184           }
0185           // Now get the item from the correct file.
0186           assert(rootFile());
0187           found = rootFile()->setEntryAtItem(run, lumi, event);
0188           assert(found);
0189           return true;
0190         }
0191       }
0192       return (fileNameHash != 0U && skipToItemInNewFile(run, lumi, event, fileNameHash)) ||
0193              skipToItemInNewFile(run, lumi, event);
0194     }
0195     return true;
0196   }
0197 
0198   //Initiate the file using multiple data catalogs
0199   void RootInputFileSequence::initTheFile(
0200       bool skipBadFiles, bool deleteIndexIntoFile, InputSource* input, char const* inputTypeName, InputType inputType) {
0201     // We are really going to close the open file.
0202 
0203     if (fileIterLastOpened_ != fileIterEnd_) {
0204       size_t currentIndexIntoFile = fileIterLastOpened_ - fileIterBegin_;
0205       if (deleteIndexIntoFile) {
0206         indexesIntoFiles_[currentIndexIntoFile].reset();
0207       } else {
0208         if (indexesIntoFiles_[currentIndexIntoFile])
0209           indexesIntoFiles_[currentIndexIntoFile]->inputFileClosed();
0210       }
0211       fileIterLastOpened_ = fileIterEnd_;
0212     }
0213     closeFile();
0214 
0215     if (noMoreFiles()) {
0216       // No files specified
0217       return;
0218     }
0219 
0220     // Check if the logical file name was found.
0221     if (fileNames()[0].empty()) {
0222       // LFN not found in catalog.
0223       InputFile::reportSkippedFile(fileNames()[0], logicalFileName());
0224       if (!skipBadFiles) {
0225         throw cms::Exception("LogicalFileNameNotFound", "RootFileSequenceBase::initTheFile()\n")
0226             << "Logical file name '" << logicalFileName() << "' was not found in the file catalog.\n"
0227             << "If you wanted a local file, you forgot the 'file:' prefix\n"
0228             << "before the file name in your configuration file.\n";
0229       }
0230       LogWarning("") << "Input logical file: " << logicalFileName()
0231                      << " was not found in the catalog, and will be skipped.\n";
0232       return;
0233     }
0234 
0235     lfn_ = logicalFileName().empty() ? fileNames()[0] : logicalFileName();
0236     lfnHash_ = std::hash<std::string>()(lfn_);
0237     usedFallback_ = false;
0238 
0239     std::shared_ptr<InputFile> filePtr;
0240     std::list<std::string> originalInfo;
0241 
0242     std::vector<std::string> const& fNames = fileNames();
0243 
0244     //this tries to open the file using multiple PFNs corresponding to different data catalogs
0245     {
0246       std::list<std::string> exInfo;
0247       std::list<std::string> additionalMessage;
0248       std::unique_ptr<InputSource::FileOpenSentry> sentry(
0249           input ? std::make_unique<InputSource::FileOpenSentry>(*input, lfn_) : nullptr);
0250       edm::Service<edm::storage::StatisticsSenderService> service;
0251       if (service.isAvailable()) {
0252         service->openingFile(lfn(), inputType, -1);
0253       }
0254       for (std::vector<std::string>::const_iterator it = fNames.begin(); it != fNames.end(); ++it) {
0255         try {
0256           usedFallback_ = (it != fNames.begin());
0257           std::unique_ptr<char[]> name(gSystem->ExpandPathName(it->c_str()));
0258           filePtr = std::make_shared<InputFile>(name.get(), "  Initiating request to open file ", inputType);
0259           break;
0260         } catch (cms::Exception const& e) {
0261           if (!skipBadFiles && std::next(it) == fNames.end()) {
0262             InputFile::reportSkippedFile((*it), logicalFileName());
0263             errors::ErrorCodes errorCode = usedFallback_ ? errors::FallbackFileOpenError : errors::FileOpenError;
0264             Exception ex(errorCode, "", e);
0265             ex.addContext("Calling RootInputFileSequence::initTheFile()");
0266             std::ostringstream out;
0267             out << "Input file " << (*it) << " could not be opened.";
0268             ex.addAdditionalInfo(out.str());
0269             //report previous exceptions when use other names to open file
0270             for (auto const& s : exInfo)
0271               ex.addAdditionalInfo(s);
0272             //report more information of the earlier file open failures in a log message
0273             if (not additionalMessage.empty()) {
0274               edm::LogWarning l("RootInputFileSequence");
0275               for (auto const& msg : additionalMessage) {
0276                 l << msg << "\n";
0277               }
0278             }
0279             throw ex;
0280           } else {
0281             exInfo.push_back("Calling RootInputFileSequence::initTheFile(): fail to open the file with name " + (*it));
0282             additionalMessage.push_back(fmt::format(
0283                 "Input file {} could not be opened, and fallback was attempted.\nAdditional information:", *it));
0284             char c = 'a';
0285             for (auto const& ai : e.additionalInfo()) {
0286               additionalMessage.push_back(fmt::format("  [{}] {}", c, ai));
0287               ++c;
0288             }
0289           }
0290         }
0291       }
0292     }
0293     if (filePtr) {
0294       size_t currentIndexIntoFile = fileIter_ - fileIterBegin_;
0295       rootFile_ = makeRootFile(filePtr);
0296       assert(rootFile_);
0297       if (input) {
0298         rootFile_->setSignals(&(input->preEventReadFromSourceSignal_), &(input->postEventReadFromSourceSignal_));
0299       }
0300       fileIterLastOpened_ = fileIter_;
0301       setIndexIntoFile(currentIndexIntoFile);
0302       rootFile_->reportOpened(inputTypeName);
0303     } else {
0304       std::string fName = !fNames.empty() ? fNames[0] : "";
0305       InputFile::reportSkippedFile(fName, logicalFileName());  //0 cause exception?
0306       if (!skipBadFiles) {
0307         throw Exception(errors::FileOpenError) << "RootFileSequenceBase::initTheFile(): Input file " << fName
0308                                                << " was not found or could not be opened.\n";
0309       }
0310       LogWarning("RootInputFileSequence")
0311           << "Input file: " << fName << " was not found or could not be opened, and will be skipped.\n";
0312     }
0313   }
0314 
0315   void RootInputFileSequence::closeFile() {
0316     edm::Service<edm::storage::StatisticsSenderService> service;
0317     if (rootFile() and service.isAvailable()) {
0318       service->closedFile(lfn(), usedFallback());
0319     }
0320     closeFile_();
0321   }
0322 
0323   void RootInputFileSequence::setIndexIntoFile(size_t index) {
0324     indexesIntoFiles_[index] = rootFile()->indexIntoFileSharedPtr();
0325   }
0326 
0327 }  // namespace edm