Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2025-04-23 07:14:34

0001 //----------------------------------------------------------------------
0002 // EdmFileUtil.cpp
0003 //
0004 
0005 #include <algorithm>
0006 #include <unistd.h>
0007 #include <exception>
0008 #include <iostream>
0009 #include <fstream>
0010 #include <string>
0011 #include <vector>
0012 #include <boost/program_options.hpp>
0013 #include "IOPool/Common/bin/CollUtil.h"
0014 #include "DataFormats/Provenance/interface/BranchType.h"
0015 #include "FWCore/Catalog/interface/InputFileCatalog.h"
0016 #include "FWCore/Catalog/interface/SiteLocalConfig.h"
0017 #include "FWCore/ParameterSet/interface/ParameterSet.h"
0018 #include "FWCore/PluginManager/interface/PluginManager.h"
0019 #include "FWCore/PluginManager/interface/standard.h"
0020 #include "FWCore/Services/interface/setupSiteLocalConfig.h"
0021 #include "FWCore/Utilities/interface/Adler32Calculator.h"
0022 #include "FWCore/Utilities/interface/Exception.h"
0023 #include "FWCore/ServiceRegistry/interface/ServiceRegistry.h"
0024 
0025 #include "TFile.h"
0026 #include "TError.h"
0027 
0028 int main(int argc, char* argv[]) {
0029   gErrorIgnoreLevel = kError;
0030 
0031   // Add options here
0032 
0033   boost::program_options::options_description desc("Allowed options");
0034   desc.add_options()("help,h", "print help message")(
0035       "file,f", boost::program_options::value<std::vector<std::string> >(), "data file (-f or -F required)")(
0036       "Files,F",
0037       boost::program_options::value<std::string>(),
0038       "text file containing names of data files, one per line")(
0039       "catalog,c", boost::program_options::value<std::string>(), "catalog")("decodeLFN,d", "Convert LFN to PFN")(
0040       "uuid,u", "Print uuid")("adler32,a", "Print adler32 checksum.")("allowRecovery",
0041                                                                       "Allow root to auto-recover corrupted files")(
0042       "JSON,j", "JSON output format.  Any arguments listed below are ignored")("ls,l", "list file content")(
0043       "map,m", "Print TFile::Map(\"extended\"). The output can be HUGE.")("print,P", "Print all")(
0044       "verbose,v", "Verbose printout")("printBranchDetails,b", "Call Print()sc for all branches")(
0045       "tree,t", boost::program_options::value<std::string>(), "Select tree used with -P and -b options")(
0046       "events,e",
0047       "Print list of all Events, Runs, and LuminosityBlocks in the file sorted by run number, luminosity block number, "
0048       "and event number.  Also prints the entry numbers and whether it is possible to use fast copy with the file.")(
0049       "eventsInLumis", "Print how many Events are in each LuminosityBlock.");
0050 
0051   // What trees do we require for this to be a valid collection?
0052   std::vector<std::string> expectedTrees;
0053   expectedTrees.push_back(edm::poolNames::metaDataTreeName());
0054   expectedTrees.push_back(edm::poolNames::eventTreeName());
0055 
0056   boost::program_options::positional_options_description p;
0057   p.add("file", -1);
0058 
0059   boost::program_options::variables_map vm;
0060 
0061   try {
0062     boost::program_options::store(
0063         boost::program_options::command_line_parser(argc, argv).options(desc).positional(p).run(), vm);
0064   } catch (boost::program_options::error const& x) {
0065     std::cerr << "Option parsing failure:\n" << x.what() << "\n\n";
0066     std::cerr << desc << "\n";
0067     return 1;
0068   }
0069 
0070   boost::program_options::notify(vm);
0071 
0072   if (vm.count("help")) {
0073     std::cout << desc << "\n";
0074     return 1;
0075   }
0076 
0077   int rc = 0;
0078   try {
0079     auto operate = edm::setupSiteLocalConfig();
0080 
0081     std::vector<std::string> in =
0082         (vm.count("file") ? vm["file"].as<std::vector<std::string> >() : std::vector<std::string>());
0083     if (vm.count("Files")) {
0084       std::ifstream ifile(vm["Files"].as<std::string>().c_str());
0085       std::istream_iterator<std::string> beginItr(ifile);
0086       if (ifile.fail()) {
0087         std::cout << "File '" << vm["Files"].as<std::string>() << "' not found, not opened, or empty\n";
0088         return 1;
0089       }
0090       std::istream_iterator<std::string> endItr;
0091       copy(beginItr, endItr, back_inserter(in));
0092     }
0093     if (in.empty()) {
0094       std::cout << "Data file(s) not set.\n";
0095       std::cout << desc << "\n";
0096       return 1;
0097     }
0098     std::string catalogIn = (vm.count("catalog") ? vm["catalog"].as<std::string>() : std::string());
0099     bool decodeLFN = vm.count("decodeLFN");
0100     bool uuid = vm.count("uuid");
0101     bool adler32 = vm.count("adler32");
0102     bool allowRecovery = vm.count("allowRecovery");
0103     bool json = vm.count("JSON");
0104     bool more = !json;
0105     bool verbose = more && (vm.count("verbose") > 0 ? true : false);
0106     bool events = more && (vm.count("events") > 0 ? true : false);
0107     bool eventsInLumis = more && (vm.count("eventsInLumis") > 0 ? true : false);
0108     bool ls = more && (vm.count("ls") > 0 ? true : false);
0109     bool printMap = vm.count("map");
0110     bool tree = more && (vm.count("tree") > 0 ? true : false);
0111     bool print = more && (vm.count("print") > 0 ? true : false);
0112     bool printBranchDetails = more && (vm.count("printBranchDetails") > 0 ? true : false);
0113     bool onlyDecodeLFN =
0114         decodeLFN && !(uuid || adler32 || allowRecovery || json || events || tree || ls || print || printBranchDetails);
0115     std::string selectedTree = tree ? vm["tree"].as<std::string>() : edm::poolNames::eventTreeName();
0116 
0117     if (events || eventsInLumis) {
0118       try {
0119         edmplugin::PluginManager::configure(edmplugin::standard::config());
0120       } catch (std::exception& e) {
0121         std::cout << "exception caught in EdmFileUtil while configuring the PluginManager\n" << e.what();
0122         return 1;
0123       }
0124     }
0125 
0126     edm::InputFileCatalog catalog(in, catalogIn, true);
0127     std::vector<std::string> const& filesIn = catalog.fileNames(0);
0128 
0129     if (json) {
0130       std::cout << '[' << std::endl;
0131     }
0132 
0133     // now run..
0134     // Allow user to input multiple files
0135     for (unsigned int j = 0; j < in.size(); ++j) {
0136       // We _only_ want the LFN->PFN conversion. No need to open the file,
0137       // just check the catalog and move on
0138       if (onlyDecodeLFN) {
0139         std::cout << filesIn[j] << std::endl;
0140         continue;
0141       }
0142 
0143       // open a data file
0144       if (!json)
0145         std::cout << in[j] << "\n";
0146       std::string const& lfn = in[j];
0147       std::unique_ptr<TFile> tfile{edm::openFileHdl(filesIn[j])};
0148       if (tfile == nullptr)
0149         return 1;
0150 
0151       std::string const& pfn = filesIn[j];
0152 
0153       if (verbose)
0154         std::cout << "ECU:: Opened " << pfn << std::endl;
0155 
0156       std::string datafile = decodeLFN ? pfn : lfn;
0157 
0158       // First check that this file is not auto-recovered
0159       // Stop the job unless specified to do otherwise
0160 
0161       bool isRecovered = tfile->TestBit(TFile::kRecovered);
0162       if (isRecovered) {
0163         if (allowRecovery) {
0164           if (!json) {
0165             std::cout << pfn << " appears not to have been closed correctly and has been autorecovered \n";
0166             std::cout << "Proceeding anyway\n";
0167           }
0168         } else {
0169           std::cout << pfn << " appears not to have been closed correctly and has been autorecovered \n";
0170           std::cout << "Stopping. Use --allowRecovery to try ignoring this\n";
0171           return 1;
0172         }
0173       } else {
0174         if (verbose)
0175           std::cout << "ECU:: Collection not autorecovered. Continuing\n";
0176       }
0177 
0178       // Ok. Do we have the expected trees?
0179       for (unsigned int i = 0; i < expectedTrees.size(); ++i) {
0180         TTree* t = (TTree*)tfile->Get(expectedTrees[i].c_str());
0181         if (t == nullptr) {
0182           std::cout << "Tree " << expectedTrees[i] << " appears to be missing. Not a valid collection\n";
0183           std::cout << "Exiting\n";
0184           return 1;
0185         } else {
0186           if (verbose)
0187             std::cout << "ECU:: Found Tree " << expectedTrees[i] << std::endl;
0188         }
0189       }
0190 
0191       if (verbose)
0192         std::cout << "ECU:: Found all expected trees\n";
0193 
0194       std::ostringstream auout;
0195       if (adler32) {
0196         unsigned int const EDMFILEUTILADLERBUFSIZE = 10 * 1024 * 1024;  // 10MB buffer
0197         static char buffer[EDMFILEUTILADLERBUFSIZE];
0198         size_t bufToRead = EDMFILEUTILADLERBUFSIZE;
0199         uint32_t a = 1, b = 0;
0200         size_t fileSize = tfile->GetSize();
0201         tfile->Seek(0, TFile::kBeg);
0202 
0203         for (size_t offset = 0; offset < fileSize; offset += EDMFILEUTILADLERBUFSIZE) {
0204           // true on last loop
0205           if (fileSize - offset < EDMFILEUTILADLERBUFSIZE)
0206             bufToRead = fileSize - offset;
0207           tfile->ReadBuffer((char*)buffer, bufToRead);
0208           cms::Adler32(buffer, bufToRead, a, b);
0209         }
0210         uint32_t adler32sum = (b << 16) | a;
0211         if (json) {
0212           auout << ",\"adler32sum\":" << adler32sum;
0213         } else {
0214           auout << ", " << std::hex << adler32sum << " adler32sum";
0215         }
0216       }
0217 
0218       if (uuid) {
0219         TTree* paramsTree = (TTree*)tfile->Get(edm::poolNames::metaDataTreeName().c_str());
0220         if (json) {
0221           auout << ",\"uuid\":\"" << edm::getUuid(paramsTree) << '"';
0222         } else {
0223           auout << ", " << edm::getUuid(paramsTree) << " uuid";
0224         }
0225       }
0226 
0227       // Ok. How many events?
0228       int nruns = edm::numEntries(tfile.get(), edm::poolNames::runTreeName());
0229       int nlumis = edm::numEntries(tfile.get(), edm::poolNames::luminosityBlockTreeName());
0230       int nevents = edm::numEntries(tfile.get(), edm::poolNames::eventTreeName());
0231       if (json) {
0232         if (j > 0)
0233           std::cout << ',' << std::endl;
0234         std::cout << "{\"file\":\"" << datafile << '"' << ",\"runs\":" << nruns << ",\"lumis\":" << nlumis
0235                   << ",\"events\":" << nevents << ",\"bytes\":" << tfile->GetSize() << auout.str() << '}' << std::endl;
0236       } else {
0237         std::cout << datafile << " (" << nruns << " runs, " << nlumis << " lumis, " << nevents << " events, "
0238                   << tfile->GetSize() << " bytes" << auout.str() << ")" << std::endl;
0239       }
0240 
0241       if (json) {
0242         // Remainder of arguments not supported in JSON yet.
0243         continue;
0244       }
0245 
0246       // Look at the collection contents
0247       if (ls) {
0248         tfile->ls();
0249       }
0250 
0251       // Print Map()
0252       if (printMap) {
0253         tfile->Map("extended");
0254       }
0255 
0256       // Print out each tree
0257       if (print) {
0258         TTree* printTree = (TTree*)tfile->Get(selectedTree.c_str());
0259         if (printTree == nullptr) {
0260           std::cout << "Tree " << selectedTree << " appears to be missing. Could not find it in the file.\n";
0261           std::cout << "Exiting\n";
0262           return 1;
0263         }
0264         edm::printBranchNames(printTree);
0265       }
0266 
0267       if (printBranchDetails) {
0268         TTree* printTree = (TTree*)tfile->Get(selectedTree.c_str());
0269         if (printTree == nullptr) {
0270           std::cout << "Tree " << selectedTree << " appears to be missing. Could not find it in the file.\n";
0271           std::cout << "Exiting\n";
0272           return 1;
0273         }
0274         edm::longBranchPrint(printTree);
0275       }
0276 
0277       // Print out event lists
0278       if (events) {
0279         edm::printEventLists(tfile.get());
0280       }
0281 
0282       if (eventsInLumis) {
0283         edm::printEventsInLumis(tfile.get());
0284       }
0285 
0286       tfile->Close();
0287     }
0288     if (json) {
0289       std::cout << ']' << std::endl;
0290     }
0291   } catch (cms::Exception const& e) {
0292     std::cout << "cms::Exception caught in "
0293               << "EdmFileUtil" << '\n'
0294               << e.explainSelf();
0295     rc = 1;
0296   } catch (std::exception const& e) {
0297     std::cout << "Standard library exception caught in "
0298               << "EdmFileUtil" << '\n'
0299               << e.what();
0300     rc = 1;
0301   } catch (...) {
0302     std::cout << "Unknown exception caught in "
0303               << "EdmFileUtil";
0304     rc = 2;
0305   }
0306   return rc;
0307 }