Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:19:05

0001 //----------------------------------------------------------------------
0002 // EdmFileUtil.cpp
0003 //
0004 
0005 #include <algorithm>
0006 #include <unistd.h>
0007 #include <exception>
0008 #include <iostream>
0009 #include <fstream>
0010 #include <string>
0011 #include <vector>
0012 #include <boost/program_options.hpp>
0013 #include "IOPool/Common/bin/CollUtil.h"
0014 #include "DataFormats/Provenance/interface/BranchType.h"
0015 #include "FWCore/Catalog/interface/InputFileCatalog.h"
0016 #include "FWCore/Catalog/interface/SiteLocalConfig.h"
0017 #include "FWCore/ParameterSet/interface/ParameterSet.h"
0018 #include "FWCore/PluginManager/interface/PluginManager.h"
0019 #include "FWCore/PluginManager/interface/standard.h"
0020 #include "FWCore/Services/interface/setupSiteLocalConfig.h"
0021 #include "FWCore/Utilities/interface/Adler32Calculator.h"
0022 #include "FWCore/Utilities/interface/Exception.h"
0023 #include "FWCore/ServiceRegistry/interface/ServiceRegistry.h"
0024 
0025 #include "TFile.h"
0026 #include "TError.h"
0027 
0028 int main(int argc, char* argv[]) {
0029   gErrorIgnoreLevel = kError;
0030 
0031   // Add options here
0032 
0033   boost::program_options::options_description desc("Allowed options");
0034   desc.add_options()("help,h", "print help message")(
0035       "file,f", boost::program_options::value<std::vector<std::string> >(), "data file (-f or -F required)")(
0036       "Files,F",
0037       boost::program_options::value<std::string>(),
0038       "text file containing names of data files, one per line")(
0039       "catalog,c", boost::program_options::value<std::string>(), "catalog")("decodeLFN,d", "Convert LFN to PFN")(
0040       "uuid,u", "Print uuid")("adler32,a", "Print adler32 checksum.")("allowRecovery",
0041                                                                       "Allow root to auto-recover corrupted files")(
0042       "JSON,j", "JSON output format.  Any arguments listed below are ignored")("ls,l", "list file content")(
0043       "print,P", "Print all")("verbose,v", "Verbose printout")("printBranchDetails,b",
0044                                                                "Call Print()sc for all branches")(
0045       "tree,t", boost::program_options::value<std::string>(), "Select tree used with -P and -b options")(
0046       "events,e",
0047       "Print list of all Events, Runs, and LuminosityBlocks in the file sorted by run number, luminosity block number, "
0048       "and event number.  Also prints the entry numbers and whether it is possible to use fast copy with the file.")(
0049       "eventsInLumis", "Print how many Events are in each LuminosityBlock.");
0050 
0051   // What trees do we require for this to be a valid collection?
0052   std::vector<std::string> expectedTrees;
0053   expectedTrees.push_back(edm::poolNames::metaDataTreeName());
0054   expectedTrees.push_back(edm::poolNames::eventTreeName());
0055 
0056   boost::program_options::positional_options_description p;
0057   p.add("file", -1);
0058 
0059   boost::program_options::variables_map vm;
0060 
0061   try {
0062     boost::program_options::store(
0063         boost::program_options::command_line_parser(argc, argv).options(desc).positional(p).run(), vm);
0064   } catch (boost::program_options::error const& x) {
0065     std::cerr << "Option parsing failure:\n" << x.what() << "\n\n";
0066     std::cerr << desc << "\n";
0067     return 1;
0068   }
0069 
0070   boost::program_options::notify(vm);
0071 
0072   if (vm.count("help")) {
0073     std::cout << desc << "\n";
0074     return 1;
0075   }
0076 
0077   int rc = 0;
0078   try {
0079     auto operate = edm::setupSiteLocalConfig();
0080 
0081     std::vector<std::string> in =
0082         (vm.count("file") ? vm["file"].as<std::vector<std::string> >() : std::vector<std::string>());
0083     if (vm.count("Files")) {
0084       std::ifstream ifile(vm["Files"].as<std::string>().c_str());
0085       std::istream_iterator<std::string> beginItr(ifile);
0086       if (ifile.fail()) {
0087         std::cout << "File '" << vm["Files"].as<std::string>() << "' not found, not opened, or empty\n";
0088         return 1;
0089       }
0090       std::istream_iterator<std::string> endItr;
0091       copy(beginItr, endItr, back_inserter(in));
0092     }
0093     if (in.empty()) {
0094       std::cout << "Data file(s) not set.\n";
0095       std::cout << desc << "\n";
0096       return 1;
0097     }
0098     std::string catalogIn = (vm.count("catalog") ? vm["catalog"].as<std::string>() : std::string());
0099     bool decodeLFN = vm.count("decodeLFN");
0100     bool uuid = vm.count("uuid");
0101     bool adler32 = vm.count("adler32");
0102     bool allowRecovery = vm.count("allowRecovery");
0103     bool json = vm.count("JSON");
0104     bool more = !json;
0105     bool verbose = more && (vm.count("verbose") > 0 ? true : false);
0106     bool events = more && (vm.count("events") > 0 ? true : false);
0107     bool eventsInLumis = more && (vm.count("eventsInLumis") > 0 ? true : false);
0108     bool ls = more && (vm.count("ls") > 0 ? true : false);
0109     bool tree = more && (vm.count("tree") > 0 ? true : false);
0110     bool print = more && (vm.count("print") > 0 ? true : false);
0111     bool printBranchDetails = more && (vm.count("printBranchDetails") > 0 ? true : false);
0112     bool onlyDecodeLFN =
0113         decodeLFN && !(uuid || adler32 || allowRecovery || json || events || tree || ls || print || printBranchDetails);
0114     std::string selectedTree = tree ? vm["tree"].as<std::string>() : edm::poolNames::eventTreeName();
0115 
0116     if (events || eventsInLumis) {
0117       try {
0118         edmplugin::PluginManager::configure(edmplugin::standard::config());
0119       } catch (std::exception& e) {
0120         std::cout << "exception caught in EdmFileUtil while configuring the PluginManager\n" << e.what();
0121         return 1;
0122       }
0123     }
0124 
0125     edm::InputFileCatalog catalog(in, catalogIn, true);
0126     std::vector<std::string> const& filesIn = catalog.fileNames(0);
0127 
0128     if (json) {
0129       std::cout << '[' << std::endl;
0130     }
0131 
0132     // now run..
0133     // Allow user to input multiple files
0134     for (unsigned int j = 0; j < in.size(); ++j) {
0135       // We _only_ want the LFN->PFN conversion. No need to open the file,
0136       // just check the catalog and move on
0137       if (onlyDecodeLFN) {
0138         std::cout << filesIn[j] << std::endl;
0139         continue;
0140       }
0141 
0142       // open a data file
0143       if (!json)
0144         std::cout << in[j] << "\n";
0145       std::string const& lfn = in[j];
0146       std::unique_ptr<TFile> tfile{edm::openFileHdl(filesIn[j])};
0147       if (tfile == nullptr)
0148         return 1;
0149 
0150       std::string const& pfn = filesIn[j];
0151 
0152       if (verbose)
0153         std::cout << "ECU:: Opened " << pfn << std::endl;
0154 
0155       std::string datafile = decodeLFN ? pfn : lfn;
0156 
0157       // First check that this file is not auto-recovered
0158       // Stop the job unless specified to do otherwise
0159 
0160       bool isRecovered = tfile->TestBit(TFile::kRecovered);
0161       if (isRecovered) {
0162         if (allowRecovery) {
0163           if (!json) {
0164             std::cout << pfn << " appears not to have been closed correctly and has been autorecovered \n";
0165             std::cout << "Proceeding anyway\n";
0166           }
0167         } else {
0168           std::cout << pfn << " appears not to have been closed correctly and has been autorecovered \n";
0169           std::cout << "Stopping. Use --allowRecovery to try ignoring this\n";
0170           return 1;
0171         }
0172       } else {
0173         if (verbose)
0174           std::cout << "ECU:: Collection not autorecovered. Continuing\n";
0175       }
0176 
0177       // Ok. Do we have the expected trees?
0178       for (unsigned int i = 0; i < expectedTrees.size(); ++i) {
0179         TTree* t = (TTree*)tfile->Get(expectedTrees[i].c_str());
0180         if (t == nullptr) {
0181           std::cout << "Tree " << expectedTrees[i] << " appears to be missing. Not a valid collection\n";
0182           std::cout << "Exiting\n";
0183           return 1;
0184         } else {
0185           if (verbose)
0186             std::cout << "ECU:: Found Tree " << expectedTrees[i] << std::endl;
0187         }
0188       }
0189 
0190       if (verbose)
0191         std::cout << "ECU:: Found all expected trees\n";
0192 
0193       std::ostringstream auout;
0194       if (adler32) {
0195         unsigned int const EDMFILEUTILADLERBUFSIZE = 10 * 1024 * 1024;  // 10MB buffer
0196         static char buffer[EDMFILEUTILADLERBUFSIZE];
0197         size_t bufToRead = EDMFILEUTILADLERBUFSIZE;
0198         uint32_t a = 1, b = 0;
0199         size_t fileSize = tfile->GetSize();
0200         tfile->Seek(0, TFile::kBeg);
0201 
0202         for (size_t offset = 0; offset < fileSize; offset += EDMFILEUTILADLERBUFSIZE) {
0203           // true on last loop
0204           if (fileSize - offset < EDMFILEUTILADLERBUFSIZE)
0205             bufToRead = fileSize - offset;
0206           tfile->ReadBuffer((char*)buffer, bufToRead);
0207           cms::Adler32(buffer, bufToRead, a, b);
0208         }
0209         uint32_t adler32sum = (b << 16) | a;
0210         if (json) {
0211           auout << ",\"adler32sum\":" << adler32sum;
0212         } else {
0213           auout << ", " << std::hex << adler32sum << " adler32sum";
0214         }
0215       }
0216 
0217       if (uuid) {
0218         TTree* paramsTree = (TTree*)tfile->Get(edm::poolNames::metaDataTreeName().c_str());
0219         if (json) {
0220           auout << ",\"uuid\":\"" << edm::getUuid(paramsTree) << '"';
0221         } else {
0222           auout << ", " << edm::getUuid(paramsTree) << " uuid";
0223         }
0224       }
0225 
0226       // Ok. How many events?
0227       int nruns = edm::numEntries(tfile.get(), edm::poolNames::runTreeName());
0228       int nlumis = edm::numEntries(tfile.get(), edm::poolNames::luminosityBlockTreeName());
0229       int nevents = edm::numEntries(tfile.get(), edm::poolNames::eventTreeName());
0230       if (json) {
0231         if (j > 0)
0232           std::cout << ',' << std::endl;
0233         std::cout << "{\"file\":\"" << datafile << '"' << ",\"runs\":" << nruns << ",\"lumis\":" << nlumis
0234                   << ",\"events\":" << nevents << ",\"bytes\":" << tfile->GetSize() << auout.str() << '}' << std::endl;
0235       } else {
0236         std::cout << datafile << " (" << nruns << " runs, " << nlumis << " lumis, " << nevents << " events, "
0237                   << tfile->GetSize() << " bytes" << auout.str() << ")" << std::endl;
0238       }
0239 
0240       if (json) {
0241         // Remainder of arguments not supported in JSON yet.
0242         continue;
0243       }
0244 
0245       // Look at the collection contents
0246       if (ls) {
0247         if (tfile != nullptr)
0248           tfile->ls();
0249       }
0250 
0251       // Print out each tree
0252       if (print) {
0253         TTree* printTree = (TTree*)tfile->Get(selectedTree.c_str());
0254         if (printTree == nullptr) {
0255           std::cout << "Tree " << selectedTree << " appears to be missing. Could not find it in the file.\n";
0256           std::cout << "Exiting\n";
0257           return 1;
0258         }
0259         edm::printBranchNames(printTree);
0260       }
0261 
0262       if (printBranchDetails) {
0263         TTree* printTree = (TTree*)tfile->Get(selectedTree.c_str());
0264         if (printTree == nullptr) {
0265           std::cout << "Tree " << selectedTree << " appears to be missing. Could not find it in the file.\n";
0266           std::cout << "Exiting\n";
0267           return 1;
0268         }
0269         edm::longBranchPrint(printTree);
0270       }
0271 
0272       // Print out event lists
0273       if (events) {
0274         edm::printEventLists(tfile.get());
0275       }
0276 
0277       if (eventsInLumis) {
0278         edm::printEventsInLumis(tfile.get());
0279       }
0280 
0281       tfile->Close();
0282     }
0283     if (json) {
0284       std::cout << ']' << std::endl;
0285     }
0286   } catch (cms::Exception const& e) {
0287     std::cout << "cms::Exception caught in "
0288               << "EdmFileUtil" << '\n'
0289               << e.explainSelf();
0290     rc = 1;
0291   } catch (std::exception const& e) {
0292     std::cout << "Standard library exception caught in "
0293               << "EdmFileUtil" << '\n'
0294               << e.what();
0295     rc = 1;
0296   } catch (...) {
0297     std::cout << "Unknown exception caught in "
0298               << "EdmFileUtil";
0299     rc = 2;
0300   }
0301   return rc;
0302 }