File indexing completed on 2024-04-06 12:19:05
0001
0002 #include "IOPool/Input/src/DuplicateChecker.h"
0003 #include "FWCore/ParameterSet/interface/ParameterSet.h"
0004 #include "FWCore/Utilities/interface/Exception.h"
0005 #include "FWCore/MessageLogger/interface/MessageLogger.h"
0006 #include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
0007
0008 #include <cassert>
0009 #include <algorithm>
0010
0011 namespace edm {
0012
0013 DuplicateChecker::DuplicateChecker(ParameterSet const& pset)
0014 : dataType_(unknown), itIsKnownTheFileHasNoDuplicates_(false), disabled_(false) {
0015
0016
0017
0018
0019
0020
0021
0022 std::string duplicateCheckMode =
0023 pset.getUntrackedParameter<std::string>("duplicateCheckMode", std::string("checkAllFilesOpened"));
0024
0025 if (duplicateCheckMode == std::string("noDuplicateCheck"))
0026 duplicateCheckMode_ = noDuplicateCheck;
0027 else if (duplicateCheckMode == std::string("checkEachFile"))
0028 duplicateCheckMode_ = checkEachFile;
0029 else if (duplicateCheckMode == std::string("checkEachRealDataFile"))
0030 duplicateCheckMode_ = checkEachRealDataFile;
0031 else if (duplicateCheckMode == std::string("checkAllFilesOpened"))
0032 duplicateCheckMode_ = checkAllFilesOpened;
0033 else {
0034 throw cms::Exception("Configuration")
0035 << "Illegal configuration parameter value passed to PoolSource for\n"
0036 << "the \"duplicateCheckMode\" parameter, legal values are:\n"
0037 << "\"noDuplicateCheck\", \"checkEachFile\", \"checkEachRealDataFile\", \"checkAllFilesOpened\"\n";
0038 }
0039 }
0040
0041 void DuplicateChecker::disable() {
0042 disabled_ = true;
0043 dataType_ = unknown;
0044 relevantPreviousEvents_.clear();
0045 itIsKnownTheFileHasNoDuplicates_ = false;
0046 }
0047
0048 void DuplicateChecker::inputFileOpened(bool realData,
0049 IndexIntoFile const& indexIntoFile,
0050 std::vector<std::shared_ptr<IndexIntoFile> > const& indexesIntoFiles,
0051 std::vector<std::shared_ptr<IndexIntoFile> >::size_type currentIndexIntoFile) {
0052 dataType_ = realData ? isRealData : isSimulation;
0053 if (checkDisabled())
0054 return;
0055
0056 relevantPreviousEvents_.clear();
0057 itIsKnownTheFileHasNoDuplicates_ = false;
0058
0059 if (duplicateCheckMode_ == checkAllFilesOpened) {
0060
0061
0062 for (std::vector<std::shared_ptr<IndexIntoFile> >::size_type i = 0; i < currentIndexIntoFile; ++i) {
0063 if (indexesIntoFiles[i].get() != nullptr) {
0064 indexIntoFile.set_intersection(*indexesIntoFiles[i], relevantPreviousEvents_);
0065 }
0066 }
0067 }
0068 if (relevantPreviousEvents_.empty()) {
0069 if (!indexIntoFile.containsDuplicateEvents()) {
0070 itIsKnownTheFileHasNoDuplicates_ = true;
0071 }
0072 }
0073 }
0074
0075 void DuplicateChecker::inputFileClosed() {
0076 dataType_ = unknown;
0077 relevantPreviousEvents_.clear();
0078 itIsKnownTheFileHasNoDuplicates_ = false;
0079 }
0080
0081 bool DuplicateChecker::isDuplicateAndCheckActive(
0082 int index, RunNumber_t run, LuminosityBlockNumber_t lumi, EventNumber_t event, std::string const& fileName) {
0083 if (itIsKnownTheFileHasNoDuplicates_)
0084 return false;
0085 if (checkDisabled())
0086 return false;
0087
0088 IndexIntoFile::IndexRunLumiEventKey newEvent(index, run, lumi, event);
0089 bool duplicate = !relevantPreviousEvents_.insert(newEvent).second;
0090
0091 if (duplicate) {
0092 if (duplicateCheckMode_ == checkAllFilesOpened) {
0093 LogWarning("DuplicateEvent") << "Duplicate Events found in entire set of input files.\n"
0094 << "Both events were from run " << run << " and luminosity block " << lumi
0095 << " with event number " << event << ".\n"
0096 << "The duplicate was from file " << fileName << ".\n"
0097 << "The duplicate will be skipped.\n";
0098 } else {
0099 LogWarning("DuplicateEvent") << "Duplicate Events found in file " << fileName << ".\n"
0100 << "Both events were from run " << run << " and luminosity block " << lumi
0101 << " with event number " << event << ".\n"
0102 << "The duplicate will be skipped.\n";
0103 }
0104 return true;
0105 }
0106 return false;
0107 }
0108
0109 void DuplicateChecker::fillDescription(ParameterSetDescription& desc) {
0110 std::string defaultString("checkAllFilesOpened");
0111 desc.addUntracked<std::string>("duplicateCheckMode", defaultString)
0112 ->setComment(
0113 "'checkAllFilesOpened': check across all input files\n"
0114 "'checkEachFile': check each input file independently\n"
0115 "'checkEachRealDataFile': check each real data input file independently\n"
0116 "'noDuplicateCheck': no duplicate checking\n");
0117 }
0118 }