Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2025-06-03 00:12:08

0001 //----------------------------------------------------------------------
0002 // EdmFileUtil.cpp
0003 //
0004 
0005 #include <algorithm>
0006 #include <unistd.h>
0007 #include <exception>
0008 #include <iostream>
0009 #include <fstream>
0010 #include <string>
0011 #include <vector>
0012 #include <boost/program_options.hpp>
0013 #include "IOPool/Common/bin/CollUtil.h"
0014 #include "DataFormats/Provenance/interface/BranchType.h"
0015 #include "FWCore/Catalog/interface/InputFileCatalog.h"
0016 #include "FWCore/Catalog/interface/SiteLocalConfig.h"
0017 #include "FWCore/ParameterSet/interface/ParameterSet.h"
0018 #include "FWCore/PluginManager/interface/PluginManager.h"
0019 #include "FWCore/PluginManager/interface/standard.h"
0020 #include "FWCore/Services/interface/setupSiteLocalConfig.h"
0021 #include "FWCore/Utilities/interface/Adler32Calculator.h"
0022 #include "FWCore/Utilities/interface/Exception.h"
0023 #include "FWCore/ServiceRegistry/interface/ServiceRegistry.h"
0024 
0025 #include "TFile.h"
0026 #include "TError.h"
0027 
0028 int main(int argc, char* argv[]) {
0029   gErrorIgnoreLevel = kError;
0030 
0031   // Add options here
0032 
0033   boost::program_options::options_description desc("Allowed options");
0034   desc.add_options()("help,h", "print help message")(
0035       "file,f", boost::program_options::value<std::vector<std::string> >(), "data file (-f or -F required)")(
0036       "Files,F",
0037       boost::program_options::value<std::string>(),
0038       "text file containing names of data files, one per line")(
0039       "catalog,c", boost::program_options::value<std::string>(), "catalog")("decodeLFN,d", "Convert LFN to PFN")(
0040       "uuid,u", "Print uuid")("adler32,a", "Print adler32 checksum.")("allowRecovery",
0041                                                                       "Allow root to auto-recover corrupted files")(
0042       "JSON,j", "JSON output format.  Any arguments listed below are ignored")("ls,l", "list file content")(
0043       "map,m", "Print TFile::Map(\"extended\"). The output can be HUGE.")("print,P", "Print all")(
0044       "verbose,v", "Verbose printout")("printBranchDetails,b", "Call Print()sc for all branches")(
0045       "printBaskets",
0046       boost::program_options::value<std::string>(),
0047       "Print detailed information about baskets and clusters for the given branch")(
0048       "printClusters", "Print detailed information about baskets and clusters for all branches")(
0049       "tree,t",
0050       boost::program_options::value<std::string>(),
0051       "Select tree used with -P, -b, --printClusters, and --printBaskets options")(
0052       "events,e",
0053       "Print list of all Events, Runs, and LuminosityBlocks in the file sorted by run number, luminosity block number, "
0054       "and event number.  Also prints the entry numbers and whether it is possible to use fast copy with the file.")(
0055       "eventsInLumis", "Print how many Events are in each LuminosityBlock.");
0056 
0057   // What trees do we require for this to be a valid collection?
0058   std::vector<std::string> expectedTrees;
0059   expectedTrees.push_back(edm::poolNames::metaDataTreeName());
0060   expectedTrees.push_back(edm::poolNames::eventTreeName());
0061 
0062   boost::program_options::positional_options_description p;
0063   p.add("file", -1);
0064 
0065   boost::program_options::variables_map vm;
0066 
0067   try {
0068     boost::program_options::store(
0069         boost::program_options::command_line_parser(argc, argv).options(desc).positional(p).run(), vm);
0070   } catch (boost::program_options::error const& x) {
0071     std::cerr << "Option parsing failure:\n" << x.what() << "\n\n";
0072     std::cerr << desc << "\n";
0073     return 1;
0074   }
0075 
0076   boost::program_options::notify(vm);
0077 
0078   if (vm.count("help")) {
0079     std::cout << desc << "\n";
0080     return 1;
0081   }
0082 
0083   int rc = 0;
0084   try {
0085     auto operate = edm::setupSiteLocalConfig();
0086 
0087     std::vector<std::string> in =
0088         (vm.count("file") ? vm["file"].as<std::vector<std::string> >() : std::vector<std::string>());
0089     if (vm.count("Files")) {
0090       std::ifstream ifile(vm["Files"].as<std::string>().c_str());
0091       std::istream_iterator<std::string> beginItr(ifile);
0092       if (ifile.fail()) {
0093         std::cout << "File '" << vm["Files"].as<std::string>() << "' not found, not opened, or empty\n";
0094         return 1;
0095       }
0096       std::istream_iterator<std::string> endItr;
0097       copy(beginItr, endItr, back_inserter(in));
0098     }
0099     if (in.empty()) {
0100       std::cout << "Data file(s) not set.\n";
0101       std::cout << desc << "\n";
0102       return 1;
0103     }
0104     std::string catalogIn = (vm.count("catalog") ? vm["catalog"].as<std::string>() : std::string());
0105     bool decodeLFN = vm.count("decodeLFN");
0106     bool uuid = vm.count("uuid");
0107     bool adler32 = vm.count("adler32");
0108     bool allowRecovery = vm.count("allowRecovery");
0109     bool json = vm.count("JSON");
0110     bool more = !json;
0111     bool verbose = more && (vm.count("verbose") > 0 ? true : false);
0112     bool events = more && (vm.count("events") > 0 ? true : false);
0113     bool eventsInLumis = more && (vm.count("eventsInLumis") > 0 ? true : false);
0114     bool ls = more && (vm.count("ls") > 0 ? true : false);
0115     bool printMap = vm.count("map");
0116     bool tree = more && (vm.count("tree") > 0 ? true : false);
0117     bool print = more && (vm.count("print") > 0 ? true : false);
0118     bool printBranchDetails = more && (vm.count("printBranchDetails") > 0 ? true : false);
0119     bool printClusters = more && (vm.count("printClusters") > 0 ? true : false);
0120     std::string printBaskets = (vm.count("printBaskets") ? vm["printBaskets"].as<std::string>() : std::string());
0121     bool onlyDecodeLFN =
0122         decodeLFN && !(uuid || adler32 || allowRecovery || json || events || tree || ls || print || printBranchDetails);
0123     std::string selectedTree = tree ? vm["tree"].as<std::string>() : edm::poolNames::eventTreeName();
0124 
0125     if (events || eventsInLumis) {
0126       try {
0127         edmplugin::PluginManager::configure(edmplugin::standard::config());
0128       } catch (std::exception& e) {
0129         std::cout << "exception caught in EdmFileUtil while configuring the PluginManager\n" << e.what();
0130         return 1;
0131       }
0132     }
0133 
0134     edm::InputFileCatalog catalog(in, catalogIn, true);
0135     std::vector<std::string> const& filesIn = catalog.fileNames(0);
0136 
0137     if (json) {
0138       std::cout << '[' << std::endl;
0139     }
0140 
0141     // now run..
0142     // Allow user to input multiple files
0143     for (unsigned int j = 0; j < in.size(); ++j) {
0144       // We _only_ want the LFN->PFN conversion. No need to open the file,
0145       // just check the catalog and move on
0146       if (onlyDecodeLFN) {
0147         std::cout << filesIn[j] << std::endl;
0148         continue;
0149       }
0150 
0151       // open a data file
0152       if (!json)
0153         std::cout << in[j] << "\n";
0154       std::string const& lfn = in[j];
0155       std::unique_ptr<TFile> tfile{edm::openFileHdl(filesIn[j])};
0156       if (tfile == nullptr)
0157         return 1;
0158 
0159       std::string const& pfn = filesIn[j];
0160 
0161       if (verbose)
0162         std::cout << "ECU:: Opened " << pfn << std::endl;
0163 
0164       std::string datafile = decodeLFN ? pfn : lfn;
0165 
0166       // First check that this file is not auto-recovered
0167       // Stop the job unless specified to do otherwise
0168 
0169       bool isRecovered = tfile->TestBit(TFile::kRecovered);
0170       if (isRecovered) {
0171         if (allowRecovery) {
0172           if (!json) {
0173             std::cout << pfn << " appears not to have been closed correctly and has been autorecovered \n";
0174             std::cout << "Proceeding anyway\n";
0175           }
0176         } else {
0177           std::cout << pfn << " appears not to have been closed correctly and has been autorecovered \n";
0178           std::cout << "Stopping. Use --allowRecovery to try ignoring this\n";
0179           return 1;
0180         }
0181       } else {
0182         if (verbose)
0183           std::cout << "ECU:: Collection not autorecovered. Continuing\n";
0184       }
0185 
0186       // Ok. Do we have the expected trees?
0187       for (unsigned int i = 0; i < expectedTrees.size(); ++i) {
0188         TTree* t = (TTree*)tfile->Get(expectedTrees[i].c_str());
0189         if (t == nullptr) {
0190           std::cout << "Tree " << expectedTrees[i] << " appears to be missing. Not a valid collection\n";
0191           std::cout << "Exiting\n";
0192           return 1;
0193         } else {
0194           if (verbose)
0195             std::cout << "ECU:: Found Tree " << expectedTrees[i] << std::endl;
0196         }
0197       }
0198 
0199       if (verbose)
0200         std::cout << "ECU:: Found all expected trees\n";
0201 
0202       std::ostringstream auout;
0203       if (adler32) {
0204         unsigned int const EDMFILEUTILADLERBUFSIZE = 10 * 1024 * 1024;  // 10MB buffer
0205         static char buffer[EDMFILEUTILADLERBUFSIZE];
0206         size_t bufToRead = EDMFILEUTILADLERBUFSIZE;
0207         uint32_t a = 1, b = 0;
0208         size_t fileSize = tfile->GetSize();
0209         tfile->Seek(0, TFile::kBeg);
0210 
0211         for (size_t offset = 0; offset < fileSize; offset += EDMFILEUTILADLERBUFSIZE) {
0212           // true on last loop
0213           if (fileSize - offset < EDMFILEUTILADLERBUFSIZE)
0214             bufToRead = fileSize - offset;
0215           tfile->ReadBuffer((char*)buffer, bufToRead);
0216           cms::Adler32(buffer, bufToRead, a, b);
0217         }
0218         uint32_t adler32sum = (b << 16) | a;
0219         if (json) {
0220           auout << ",\"adler32sum\":" << adler32sum;
0221         } else {
0222           auout << ", " << std::hex << adler32sum << " adler32sum";
0223         }
0224       }
0225 
0226       if (uuid) {
0227         TTree* paramsTree = (TTree*)tfile->Get(edm::poolNames::metaDataTreeName().c_str());
0228         if (json) {
0229           auout << ",\"uuid\":\"" << edm::getUuid(paramsTree) << '"';
0230         } else {
0231           auout << ", " << edm::getUuid(paramsTree) << " uuid";
0232         }
0233       }
0234 
0235       // Ok. How many events?
0236       int nruns = edm::numEntries(tfile.get(), edm::poolNames::runTreeName());
0237       int nlumis = edm::numEntries(tfile.get(), edm::poolNames::luminosityBlockTreeName());
0238       int nevents = edm::numEntries(tfile.get(), edm::poolNames::eventTreeName());
0239       if (json) {
0240         if (j > 0)
0241           std::cout << ',' << std::endl;
0242         std::cout << "{\"file\":\"" << datafile << '"' << ",\"runs\":" << nruns << ",\"lumis\":" << nlumis
0243                   << ",\"events\":" << nevents << ",\"bytes\":" << tfile->GetSize() << auout.str() << '}' << std::endl;
0244       } else {
0245         std::cout << datafile << " (" << nruns << " runs, " << nlumis << " lumis, " << nevents << " events, "
0246                   << tfile->GetSize() << " bytes" << auout.str() << ")" << std::endl;
0247       }
0248 
0249       if (json) {
0250         // Remainder of arguments not supported in JSON yet.
0251         continue;
0252       }
0253 
0254       // Look at the collection contents
0255       if (ls) {
0256         tfile->ls();
0257       }
0258 
0259       // Print Map()
0260       if (printMap) {
0261         tfile->Map("extended");
0262       }
0263 
0264       if (print or printBranchDetails or printClusters or not printBaskets.empty()) {
0265         TTree* printTree = (TTree*)tfile->Get(selectedTree.c_str());
0266         if (printTree == nullptr) {
0267           std::cout << "Tree " << selectedTree << " appears to be missing. Could not find it in the file.\n";
0268           std::cout << "Exiting\n";
0269           return 1;
0270         }
0271         // Print out each tree
0272         if (print) {
0273           edm::printBranchNames(printTree);
0274         }
0275 
0276         if (printBranchDetails) {
0277           edm::longBranchPrint(printTree);
0278         }
0279 
0280         if (printClusters) {
0281           edm::clusterPrint(printTree);
0282         }
0283 
0284         if (not printBaskets.empty()) {
0285           edm::basketPrint(printTree, printBaskets);
0286         }
0287       }
0288 
0289       // Print out event lists
0290       if (events) {
0291         edm::printEventLists(tfile.get());
0292       }
0293 
0294       if (eventsInLumis) {
0295         edm::printEventsInLumis(tfile.get());
0296       }
0297 
0298       tfile->Close();
0299     }
0300     if (json) {
0301       std::cout << ']' << std::endl;
0302     }
0303   } catch (cms::Exception const& e) {
0304     std::cout << "cms::Exception caught in "
0305               << "EdmFileUtil" << '\n'
0306               << e.explainSelf();
0307     rc = 1;
0308   } catch (std::exception const& e) {
0309     std::cout << "Standard library exception caught in "
0310               << "EdmFileUtil" << '\n'
0311               << e.what();
0312     rc = 1;
0313   } catch (...) {
0314     std::cout << "Unknown exception caught in "
0315               << "EdmFileUtil";
0316     rc = 2;
0317   }
0318   return rc;
0319 }