Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:19:05

0001 
0002 #include "IOPool/Input/src/DuplicateChecker.h"
0003 #include "FWCore/ParameterSet/interface/ParameterSet.h"
0004 #include "FWCore/Utilities/interface/Exception.h"
0005 #include "FWCore/MessageLogger/interface/MessageLogger.h"
0006 #include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
0007 
0008 #include <cassert>
0009 #include <algorithm>
0010 
0011 namespace edm {
0012 
0013   DuplicateChecker::DuplicateChecker(ParameterSet const& pset)
0014       : dataType_(unknown), itIsKnownTheFileHasNoDuplicates_(false), disabled_(false) {
0015     // The default value provided as the second argument to the getUntrackedParameter function call
0016     // is not used when the ParameterSet has been validated and the parameters are not optional
0017     // in the description.  This is currently true when PoolSource is the primary input source.
0018     // The modules that use PoolSource as a SecSource have not defined their fillDescriptions function
0019     // yet, so the ParameterSet does not get validated yet.  As soon as all the modules with a SecSource
0020     // have defined descriptions, the defaults in the getUntrackedParameterSet function calls can
0021     // and should be deleted from the code.
0022     std::string duplicateCheckMode =
0023         pset.getUntrackedParameter<std::string>("duplicateCheckMode", std::string("checkAllFilesOpened"));
0024 
0025     if (duplicateCheckMode == std::string("noDuplicateCheck"))
0026       duplicateCheckMode_ = noDuplicateCheck;
0027     else if (duplicateCheckMode == std::string("checkEachFile"))
0028       duplicateCheckMode_ = checkEachFile;
0029     else if (duplicateCheckMode == std::string("checkEachRealDataFile"))
0030       duplicateCheckMode_ = checkEachRealDataFile;
0031     else if (duplicateCheckMode == std::string("checkAllFilesOpened"))
0032       duplicateCheckMode_ = checkAllFilesOpened;
0033     else {
0034       throw cms::Exception("Configuration")
0035           << "Illegal configuration parameter value passed to PoolSource for\n"
0036           << "the \"duplicateCheckMode\" parameter, legal values are:\n"
0037           << "\"noDuplicateCheck\", \"checkEachFile\", \"checkEachRealDataFile\", \"checkAllFilesOpened\"\n";
0038     }
0039   }
0040 
0041   void DuplicateChecker::disable() {
0042     disabled_ = true;
0043     dataType_ = unknown;
0044     relevantPreviousEvents_.clear();
0045     itIsKnownTheFileHasNoDuplicates_ = false;
0046   }
0047 
0048   void DuplicateChecker::inputFileOpened(bool realData,
0049                                          IndexIntoFile const& indexIntoFile,
0050                                          std::vector<std::shared_ptr<IndexIntoFile> > const& indexesIntoFiles,
0051                                          std::vector<std::shared_ptr<IndexIntoFile> >::size_type currentIndexIntoFile) {
0052     dataType_ = realData ? isRealData : isSimulation;
0053     if (checkDisabled())
0054       return;
0055 
0056     relevantPreviousEvents_.clear();
0057     itIsKnownTheFileHasNoDuplicates_ = false;
0058 
0059     if (duplicateCheckMode_ == checkAllFilesOpened) {
0060       // Compares the current IndexIntoFile to all the previous ones and saves any duplicates.
0061       // One unintended thing, it also saves the duplicate runs and lumis.
0062       for (std::vector<std::shared_ptr<IndexIntoFile> >::size_type i = 0; i < currentIndexIntoFile; ++i) {
0063         if (indexesIntoFiles[i].get() != nullptr) {
0064           indexIntoFile.set_intersection(*indexesIntoFiles[i], relevantPreviousEvents_);
0065         }
0066       }
0067     }
0068     if (relevantPreviousEvents_.empty()) {
0069       if (!indexIntoFile.containsDuplicateEvents()) {
0070         itIsKnownTheFileHasNoDuplicates_ = true;
0071       }
0072     }
0073   }
0074 
0075   void DuplicateChecker::inputFileClosed() {
0076     dataType_ = unknown;
0077     relevantPreviousEvents_.clear();
0078     itIsKnownTheFileHasNoDuplicates_ = false;
0079   }
0080 
0081   bool DuplicateChecker::isDuplicateAndCheckActive(
0082       int index, RunNumber_t run, LuminosityBlockNumber_t lumi, EventNumber_t event, std::string const& fileName) {
0083     if (itIsKnownTheFileHasNoDuplicates_)
0084       return false;
0085     if (checkDisabled())
0086       return false;
0087 
0088     IndexIntoFile::IndexRunLumiEventKey newEvent(index, run, lumi, event);
0089     bool duplicate = !relevantPreviousEvents_.insert(newEvent).second;
0090 
0091     if (duplicate) {
0092       if (duplicateCheckMode_ == checkAllFilesOpened) {
0093         LogWarning("DuplicateEvent") << "Duplicate Events found in entire set of input files.\n"
0094                                      << "Both events were from run " << run << " and luminosity block " << lumi
0095                                      << " with event number " << event << ".\n"
0096                                      << "The duplicate was from file " << fileName << ".\n"
0097                                      << "The duplicate will be skipped.\n";
0098       } else {
0099         LogWarning("DuplicateEvent") << "Duplicate Events found in file " << fileName << ".\n"
0100                                      << "Both events were from run " << run << " and luminosity block " << lumi
0101                                      << " with event number " << event << ".\n"
0102                                      << "The duplicate will be skipped.\n";
0103       }
0104       return true;
0105     }
0106     return false;
0107   }
0108 
0109   void DuplicateChecker::fillDescription(ParameterSetDescription& desc) {
0110     std::string defaultString("checkAllFilesOpened");
0111     desc.addUntracked<std::string>("duplicateCheckMode", defaultString)
0112         ->setComment(
0113             "'checkAllFilesOpened':   check across all input files\n"
0114             "'checkEachFile':         check each input file independently\n"
0115             "'checkEachRealDataFile': check each real data input file independently\n"
0116             "'noDuplicateCheck':      no duplicate checking\n");
0117   }
0118 }  // namespace edm