Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:23:21

0001 /** measure branch sizes
0002  *
0003  * author Luca Lista
0004  *
0005  */
0006 
0007 #include <boost/program_options.hpp>
0008 #include <string>
0009 #include <iostream>
0010 #include <vector>
0011 #include <utility>
0012 #include <cassert>
0013 #include <TROOT.h>
0014 #include <TFile.h>
0015 #include <TTree.h>
0016 #include <TSystem.h>
0017 #include <TStyle.h>
0018 #include <TObjArray.h>
0019 #include <TBranch.h>
0020 #include <TH1.h>
0021 #include <TCanvas.h>
0022 #include "FWCore/FWLite/interface/FWLiteEnabler.h"
0023 #include <utility>
0024 
0025 #include "TBufferFile.h"
0026 
0027 using namespace std;
0028 
0029 static const char *const kHelpOpt = "help";
0030 static const char *const kHelpCommandOpt = "help,h";
0031 static const char *const kDataFileOpt = "data-file";
0032 static const char *const kDataFileCommandOpt = "data-file,d";
0033 static const char *const kAutoLoadOpt = "auto-loader";
0034 static const char *const kAutoLoadCommandOpt = "auto-loader,a";
0035 static const char *const kPlotOpt = "plot";
0036 static const char *const kPlotCommandOpt = "plot,p";
0037 static const char *const kSavePlotOpt = "save-plot";
0038 static const char *const kSavePlotCommandOpt = "save-plot,s";
0039 static const char *const kPlotTopOpt = "plot-top";
0040 static const char *const kPlotTopCommandOpt = "plot-top,t";
0041 static const char *const kVerboseOpt = "verbose";
0042 static const char *const kVerboseCommandOpt = "verbose,v";
0043 static const char *const kAlphabeticOrderOpt = "alphabetic-order";
0044 static const char *const kAlphabeticOrderCommandOpt = "alphabetic-order,A";
0045 
0046 typedef pair<size_t, size_t> size_type;
0047 
0048 typedef pair<string, size_type> BranchRecord;
0049 
0050 typedef vector<BranchRecord> BranchVector;
0051 
0052 ostream &operator<<(ostream &out, const size_type &s) {
0053   out << s.first << '/' << s.second << " bytes";
0054   if (s.second != 0)
0055     out << " (compr: " << double(s.first) / double(s.second) << ")";
0056   return out;
0057 }
0058 
0059 size_type &operator+=(size_type &s1, const size_type &s2) {
0060   s1.first += s2.first;
0061   s1.second += s2.second;
0062   return s1;
0063 }
0064 
0065 size_type GetTotalSize(TBranch *, bool verbose);
0066 
0067 size_type GetBasketSize(TBranch *, bool verbose);
0068 
0069 size_type GetBasketSize(TObjArray *branches, bool verbose) {
0070   size_type result = make_pair(0, 0);
0071   size_t n = branches->GetEntries();
0072   for (size_t i = 0; i < n; ++i) {
0073     TBranch *b = dynamic_cast<TBranch *>(branches->At(i));
0074     assert(b != 0);
0075     result += GetBasketSize(b, verbose);
0076   }
0077   return result;
0078 }
0079 
0080 size_type GetBasketSize(TBranch *b, bool verbose) {
0081   size_type result = make_pair(0, 0);
0082   if (b != 0) {
0083     if (b->GetZipBytes() > 0) {
0084       result = make_pair(b->GetTotBytes(), b->GetZipBytes());
0085     } else {
0086       result = make_pair(b->GetTotalSize(), b->GetTotalSize());
0087     }
0088     if (verbose)
0089       cout << " branch: " << b->GetName() << ", size:" << result.first << "/" << result.second << endl;
0090     result += GetBasketSize(b->GetListOfBranches(), verbose);
0091   }
0092   return result;
0093 }
0094 
0095 size_type GetTotalSize(TBranch *br, bool verbose) {
0096   TBufferFile buf(TBuffer::kWrite, 10000);
0097   TBranch::Class()->WriteBuffer(buf, br);
0098   size_type size = GetBasketSize(br, verbose);
0099   if (br->GetZipBytes() > 0)
0100     size.first += buf.Length();
0101   if (verbose)
0102     cout << ">>> total branch size: " << br->GetName() << ":" << size.first << "/" << size.second << endl;
0103   return size;
0104 }
0105 
0106 size_type GetTotalSize(TObjArray *branches, bool verbose) {
0107   size_type result = make_pair(0, 0);
0108   size_t n = branches->GetEntries();
0109   for (size_t i = 0; i < n; ++i) {
0110     result += GetTotalSize(dynamic_cast<TBranch *>(branches->At(i)), verbose);
0111   }
0112   return result;
0113 }
0114 
0115 size_type GetTotalSize(TTree *t) {
0116   size_t total = t->GetTotBytes();
0117   TBufferFile b(TBuffer::kWrite, 10000);
0118   TTree::Class()->WriteBuffer(b, t);
0119   total += b.Length();
0120   return make_pair(total, t->GetZipBytes());
0121 }
0122 
0123 size_type GetTotalBranchSize(TTree *t, bool verbose) { return GetTotalSize(t->GetListOfBranches(), verbose); }
0124 
0125 struct sortByCompressedSize {
0126   bool operator()(const BranchRecord &t1, const BranchRecord &t2) const {
0127     size_t s1 = t1.second.second, s2 = t2.second.second;
0128     if (s1 == 0 && s2 == 0) {
0129       s1 = t1.second.first;
0130       s2 = t2.second.first;
0131     }
0132     return s1 > s2;
0133   }
0134 };
0135 
0136 struct sortByName {
0137   bool operator()(const BranchRecord &t1, const BranchRecord &t2) const { return t1.first < t2.first; }
0138 };
0139 
0140 int main(int argc, char *argv[]) {
0141   using namespace boost::program_options;
0142   using namespace std;
0143 
0144   string programName(argv[0]);
0145   string descString(programName);
0146   descString += " [options] ";
0147   descString += "data_file \nAllowed options";
0148   options_description desc(descString);
0149 
0150   // clang-format off
0151   desc.add_options()(kHelpCommandOpt, "produce help message")(kAutoLoadCommandOpt,
0152                                                               "automatic library loading (avoid root warnings)")(
0153       kDataFileCommandOpt, value<string>(), "data file")(kAlphabeticOrderCommandOpt,
0154                                                          "sort by alphabetic order (default: sort by size)")(
0155       kPlotCommandOpt, value<string>(), "produce a summary plot")(
0156       kPlotTopCommandOpt, value<int>(), "plot only the <arg> top size branches")(
0157       kSavePlotCommandOpt, value<string>(), "save plot into root file <arg>")(kVerboseCommandOpt, "verbose printout");
0158   // clang-format on
0159 
0160   positional_options_description p;
0161 
0162   p.add(kDataFileOpt, -1);
0163 
0164   variables_map vm;
0165   try {
0166     store(command_line_parser(argc, argv).options(desc).positional(p).run(), vm);
0167     notify(vm);
0168   } catch (const error &) {
0169     return 7000;
0170   }
0171 
0172   if (vm.count(kHelpOpt)) {
0173     cout << desc << std::endl;
0174     return 0;
0175   }
0176 
0177   if (!vm.count(kDataFileOpt)) {
0178     string shortDesc("ConfigFileNotFound");
0179     cerr << programName << ": no data file given" << endl;
0180     return 7001;
0181   }
0182 
0183   gROOT->SetBatch();
0184 
0185   if (vm.count(kAutoLoadOpt) != 0) {
0186     gSystem->Load("libFWCoreFWLite");
0187     FWLiteEnabler::enable();
0188   }
0189 
0190   string fileName = vm[kDataFileOpt].as<string>();
0191   TFile file(fileName.c_str());
0192   if (!file.IsOpen()) {
0193     cerr << programName << ": unable to open data file " << fileName << endl;
0194     return 7002;
0195   }
0196 
0197   TObject *o = file.Get("Events");
0198   if (o == 0) {
0199     cerr << programName << ": no object \"Events\" found in file: " << fileName << endl;
0200     return 7003;
0201   }
0202 
0203   TTree *events = dynamic_cast<TTree *>(o);
0204   if (events == 0) {
0205     cerr << programName << ": object \"Events\" is not a TTree in file: " << fileName << endl;
0206     return 7004;
0207   }
0208 
0209   TObjArray *branches = events->GetListOfBranches();
0210   if (branches == 0) {
0211     cerr << programName << ": tree \"Events\" in file " << fileName << " contains no branches" << endl;
0212     return 7004;
0213   }
0214 
0215   bool verbose = vm.count(kVerboseOpt) > 0;
0216 
0217   BranchVector v;
0218   const size_t n = branches->GetEntries();
0219   cout << fileName << " has " << n << " branches" << endl;
0220   for (size_t i = 0; i < n; ++i) {
0221     TBranch *b = dynamic_cast<TBranch *>(branches->At(i));
0222     assert(b != 0);
0223     string name(b->GetName());
0224     if (name == "EventAux")
0225       continue;
0226     size_type s = GetTotalSize(b, verbose);
0227     v.push_back(make_pair(b->GetName(), s));
0228   }
0229   if (vm.count(kAlphabeticOrderOpt)) {
0230     sort(v.begin(), v.end(), sortByName());
0231   } else {
0232     sort(v.begin(), v.end(), sortByCompressedSize());
0233   }
0234   bool plot = (vm.count(kPlotOpt) > 0);
0235   bool save = (vm.count(kSavePlotOpt) > 0);
0236   int top = n;
0237   if (vm.count(kPlotTopOpt) > 0)
0238     top = vm[kPlotTopOpt].as<int>();
0239   TH1F uncompressed("uncompressed", "branch sizes", top, -0.5, -0.5 + top);
0240   TH1F compressed("compressed", "branch sizes", top, -0.5, -0.5 + top);
0241   int x = 0;
0242   TAxis *cxAxis = compressed.GetXaxis();
0243   TAxis *uxAxis = uncompressed.GetXaxis();
0244 
0245   for (BranchVector::const_iterator b = v.begin(); b != v.end(); ++b) {
0246     const string &name = b->first;
0247     size_type size = b->second;
0248     cout << size << " " << name << endl;
0249     if (x < top) {
0250       cxAxis->SetBinLabel(x + 1, name.c_str());
0251       uxAxis->SetBinLabel(x + 1, name.c_str());
0252       compressed.Fill(x, size.second);
0253       uncompressed.Fill(x, size.first);
0254       x++;
0255     }
0256   }
0257   //  size_type branchSize = GetTotalBranchSize( events );
0258   //  cout << "total branches size: " << branchSize.first << " bytes (uncompressed), "
0259   //       << branchSize.second << " bytes (compressed)"<< endl;
0260   size_type totalSize = GetTotalSize(events);
0261   cout << "total tree size: " << totalSize.first << " bytes (uncompressed), " << totalSize.second
0262        << " bytes (compressed)" << endl;
0263   double mn = DBL_MAX;
0264   for (int i = 1; i <= top; ++i) {
0265     double cm = compressed.GetMinimum(i), um = uncompressed.GetMinimum(i);
0266     if (cm > 0 && cm < mn)
0267       mn = cm;
0268     if (um > 0 && um < mn)
0269       mn = um;
0270   }
0271   mn *= 0.8;
0272   double mx = max(compressed.GetMaximum(), uncompressed.GetMaximum());
0273   mx *= 1.2;
0274   uncompressed.SetMinimum(mn);
0275   uncompressed.SetMaximum(mx);
0276   compressed.SetMinimum(mn);
0277   //  compressed.SetMaximum( mx );
0278   cxAxis->SetLabelOffset(-0.32);
0279   cxAxis->LabelsOption("v");
0280   cxAxis->SetLabelSize(0.03);
0281   uxAxis->SetLabelOffset(-0.32);
0282   uxAxis->LabelsOption("v");
0283   uxAxis->SetLabelSize(0.03);
0284   compressed.GetYaxis()->SetTitle("Bytes");
0285   compressed.SetFillColor(kBlue);
0286   compressed.SetLineWidth(2);
0287   uncompressed.GetYaxis()->SetTitle("Bytes");
0288   uncompressed.SetFillColor(kRed);
0289   uncompressed.SetLineWidth(2);
0290   if (plot) {
0291     string plotName = vm[kPlotOpt].as<string>();
0292     gROOT->SetStyle("Plain");
0293     gStyle->SetOptStat(kFALSE);
0294     gStyle->SetOptLogy();
0295     TCanvas c;
0296     uncompressed.Draw();
0297     compressed.Draw("same");
0298     c.SaveAs(plotName.c_str());
0299   }
0300   if (save) {
0301     string fileName = vm[kSavePlotOpt].as<string>();
0302     TFile f(fileName.c_str(), "RECREATE");
0303     compressed.Write();
0304     uncompressed.Write();
0305     f.Close();
0306   }
0307   return 0;
0308 }