File indexing completed on 2024-04-06 12:23:21
0001
0002
0003
0004
0005
0006
0007 #include <boost/program_options.hpp>
0008 #include <string>
0009 #include <iostream>
0010 #include <vector>
0011 #include <utility>
0012 #include <cassert>
0013 #include <TROOT.h>
0014 #include <TFile.h>
0015 #include <TTree.h>
0016 #include <TSystem.h>
0017 #include <TStyle.h>
0018 #include <TObjArray.h>
0019 #include <TBranch.h>
0020 #include <TH1.h>
0021 #include <TCanvas.h>
0022 #include "FWCore/FWLite/interface/FWLiteEnabler.h"
0023 #include <utility>
0024
0025 #include "TBufferFile.h"
0026
0027 using namespace std;
0028
0029 static const char *const kHelpOpt = "help";
0030 static const char *const kHelpCommandOpt = "help,h";
0031 static const char *const kDataFileOpt = "data-file";
0032 static const char *const kDataFileCommandOpt = "data-file,d";
0033 static const char *const kAutoLoadOpt = "auto-loader";
0034 static const char *const kAutoLoadCommandOpt = "auto-loader,a";
0035 static const char *const kPlotOpt = "plot";
0036 static const char *const kPlotCommandOpt = "plot,p";
0037 static const char *const kSavePlotOpt = "save-plot";
0038 static const char *const kSavePlotCommandOpt = "save-plot,s";
0039 static const char *const kPlotTopOpt = "plot-top";
0040 static const char *const kPlotTopCommandOpt = "plot-top,t";
0041 static const char *const kVerboseOpt = "verbose";
0042 static const char *const kVerboseCommandOpt = "verbose,v";
0043 static const char *const kAlphabeticOrderOpt = "alphabetic-order";
0044 static const char *const kAlphabeticOrderCommandOpt = "alphabetic-order,A";
0045
0046 typedef pair<size_t, size_t> size_type;
0047
0048 typedef pair<string, size_type> BranchRecord;
0049
0050 typedef vector<BranchRecord> BranchVector;
0051
0052 ostream &operator<<(ostream &out, const size_type &s) {
0053 out << s.first << '/' << s.second << " bytes";
0054 if (s.second != 0)
0055 out << " (compr: " << double(s.first) / double(s.second) << ")";
0056 return out;
0057 }
0058
0059 size_type &operator+=(size_type &s1, const size_type &s2) {
0060 s1.first += s2.first;
0061 s1.second += s2.second;
0062 return s1;
0063 }
0064
0065 size_type GetTotalSize(TBranch *, bool verbose);
0066
0067 size_type GetBasketSize(TBranch *, bool verbose);
0068
0069 size_type GetBasketSize(TObjArray *branches, bool verbose) {
0070 size_type result = make_pair(0, 0);
0071 size_t n = branches->GetEntries();
0072 for (size_t i = 0; i < n; ++i) {
0073 TBranch *b = dynamic_cast<TBranch *>(branches->At(i));
0074 assert(b != 0);
0075 result += GetBasketSize(b, verbose);
0076 }
0077 return result;
0078 }
0079
0080 size_type GetBasketSize(TBranch *b, bool verbose) {
0081 size_type result = make_pair(0, 0);
0082 if (b != 0) {
0083 if (b->GetZipBytes() > 0) {
0084 result = make_pair(b->GetTotBytes(), b->GetZipBytes());
0085 } else {
0086 result = make_pair(b->GetTotalSize(), b->GetTotalSize());
0087 }
0088 if (verbose)
0089 cout << " branch: " << b->GetName() << ", size:" << result.first << "/" << result.second << endl;
0090 result += GetBasketSize(b->GetListOfBranches(), verbose);
0091 }
0092 return result;
0093 }
0094
0095 size_type GetTotalSize(TBranch *br, bool verbose) {
0096 TBufferFile buf(TBuffer::kWrite, 10000);
0097 TBranch::Class()->WriteBuffer(buf, br);
0098 size_type size = GetBasketSize(br, verbose);
0099 if (br->GetZipBytes() > 0)
0100 size.first += buf.Length();
0101 if (verbose)
0102 cout << ">>> total branch size: " << br->GetName() << ":" << size.first << "/" << size.second << endl;
0103 return size;
0104 }
0105
0106 size_type GetTotalSize(TObjArray *branches, bool verbose) {
0107 size_type result = make_pair(0, 0);
0108 size_t n = branches->GetEntries();
0109 for (size_t i = 0; i < n; ++i) {
0110 result += GetTotalSize(dynamic_cast<TBranch *>(branches->At(i)), verbose);
0111 }
0112 return result;
0113 }
0114
0115 size_type GetTotalSize(TTree *t) {
0116 size_t total = t->GetTotBytes();
0117 TBufferFile b(TBuffer::kWrite, 10000);
0118 TTree::Class()->WriteBuffer(b, t);
0119 total += b.Length();
0120 return make_pair(total, t->GetZipBytes());
0121 }
0122
0123 size_type GetTotalBranchSize(TTree *t, bool verbose) { return GetTotalSize(t->GetListOfBranches(), verbose); }
0124
0125 struct sortByCompressedSize {
0126 bool operator()(const BranchRecord &t1, const BranchRecord &t2) const {
0127 size_t s1 = t1.second.second, s2 = t2.second.second;
0128 if (s1 == 0 && s2 == 0) {
0129 s1 = t1.second.first;
0130 s2 = t2.second.first;
0131 }
0132 return s1 > s2;
0133 }
0134 };
0135
0136 struct sortByName {
0137 bool operator()(const BranchRecord &t1, const BranchRecord &t2) const { return t1.first < t2.first; }
0138 };
0139
0140 int main(int argc, char *argv[]) {
0141 using namespace boost::program_options;
0142 using namespace std;
0143
0144 string programName(argv[0]);
0145 string descString(programName);
0146 descString += " [options] ";
0147 descString += "data_file \nAllowed options";
0148 options_description desc(descString);
0149
0150
0151 desc.add_options()(kHelpCommandOpt, "produce help message")(kAutoLoadCommandOpt,
0152 "automatic library loading (avoid root warnings)")(
0153 kDataFileCommandOpt, value<string>(), "data file")(kAlphabeticOrderCommandOpt,
0154 "sort by alphabetic order (default: sort by size)")(
0155 kPlotCommandOpt, value<string>(), "produce a summary plot")(
0156 kPlotTopCommandOpt, value<int>(), "plot only the <arg> top size branches")(
0157 kSavePlotCommandOpt, value<string>(), "save plot into root file <arg>")(kVerboseCommandOpt, "verbose printout");
0158
0159
0160 positional_options_description p;
0161
0162 p.add(kDataFileOpt, -1);
0163
0164 variables_map vm;
0165 try {
0166 store(command_line_parser(argc, argv).options(desc).positional(p).run(), vm);
0167 notify(vm);
0168 } catch (const error &) {
0169 return 7000;
0170 }
0171
0172 if (vm.count(kHelpOpt)) {
0173 cout << desc << std::endl;
0174 return 0;
0175 }
0176
0177 if (!vm.count(kDataFileOpt)) {
0178 string shortDesc("ConfigFileNotFound");
0179 cerr << programName << ": no data file given" << endl;
0180 return 7001;
0181 }
0182
0183 gROOT->SetBatch();
0184
0185 if (vm.count(kAutoLoadOpt) != 0) {
0186 gSystem->Load("libFWCoreFWLite");
0187 FWLiteEnabler::enable();
0188 }
0189
0190 string fileName = vm[kDataFileOpt].as<string>();
0191 TFile file(fileName.c_str());
0192 if (!file.IsOpen()) {
0193 cerr << programName << ": unable to open data file " << fileName << endl;
0194 return 7002;
0195 }
0196
0197 TObject *o = file.Get("Events");
0198 if (o == 0) {
0199 cerr << programName << ": no object \"Events\" found in file: " << fileName << endl;
0200 return 7003;
0201 }
0202
0203 TTree *events = dynamic_cast<TTree *>(o);
0204 if (events == 0) {
0205 cerr << programName << ": object \"Events\" is not a TTree in file: " << fileName << endl;
0206 return 7004;
0207 }
0208
0209 TObjArray *branches = events->GetListOfBranches();
0210 if (branches == 0) {
0211 cerr << programName << ": tree \"Events\" in file " << fileName << " contains no branches" << endl;
0212 return 7004;
0213 }
0214
0215 bool verbose = vm.count(kVerboseOpt) > 0;
0216
0217 BranchVector v;
0218 const size_t n = branches->GetEntries();
0219 cout << fileName << " has " << n << " branches" << endl;
0220 for (size_t i = 0; i < n; ++i) {
0221 TBranch *b = dynamic_cast<TBranch *>(branches->At(i));
0222 assert(b != 0);
0223 string name(b->GetName());
0224 if (name == "EventAux")
0225 continue;
0226 size_type s = GetTotalSize(b, verbose);
0227 v.push_back(make_pair(b->GetName(), s));
0228 }
0229 if (vm.count(kAlphabeticOrderOpt)) {
0230 sort(v.begin(), v.end(), sortByName());
0231 } else {
0232 sort(v.begin(), v.end(), sortByCompressedSize());
0233 }
0234 bool plot = (vm.count(kPlotOpt) > 0);
0235 bool save = (vm.count(kSavePlotOpt) > 0);
0236 int top = n;
0237 if (vm.count(kPlotTopOpt) > 0)
0238 top = vm[kPlotTopOpt].as<int>();
0239 TH1F uncompressed("uncompressed", "branch sizes", top, -0.5, -0.5 + top);
0240 TH1F compressed("compressed", "branch sizes", top, -0.5, -0.5 + top);
0241 int x = 0;
0242 TAxis *cxAxis = compressed.GetXaxis();
0243 TAxis *uxAxis = uncompressed.GetXaxis();
0244
0245 for (BranchVector::const_iterator b = v.begin(); b != v.end(); ++b) {
0246 const string &name = b->first;
0247 size_type size = b->second;
0248 cout << size << " " << name << endl;
0249 if (x < top) {
0250 cxAxis->SetBinLabel(x + 1, name.c_str());
0251 uxAxis->SetBinLabel(x + 1, name.c_str());
0252 compressed.Fill(x, size.second);
0253 uncompressed.Fill(x, size.first);
0254 x++;
0255 }
0256 }
0257
0258
0259
0260 size_type totalSize = GetTotalSize(events);
0261 cout << "total tree size: " << totalSize.first << " bytes (uncompressed), " << totalSize.second
0262 << " bytes (compressed)" << endl;
0263 double mn = DBL_MAX;
0264 for (int i = 1; i <= top; ++i) {
0265 double cm = compressed.GetMinimum(i), um = uncompressed.GetMinimum(i);
0266 if (cm > 0 && cm < mn)
0267 mn = cm;
0268 if (um > 0 && um < mn)
0269 mn = um;
0270 }
0271 mn *= 0.8;
0272 double mx = max(compressed.GetMaximum(), uncompressed.GetMaximum());
0273 mx *= 1.2;
0274 uncompressed.SetMinimum(mn);
0275 uncompressed.SetMaximum(mx);
0276 compressed.SetMinimum(mn);
0277
0278 cxAxis->SetLabelOffset(-0.32);
0279 cxAxis->LabelsOption("v");
0280 cxAxis->SetLabelSize(0.03);
0281 uxAxis->SetLabelOffset(-0.32);
0282 uxAxis->LabelsOption("v");
0283 uxAxis->SetLabelSize(0.03);
0284 compressed.GetYaxis()->SetTitle("Bytes");
0285 compressed.SetFillColor(kBlue);
0286 compressed.SetLineWidth(2);
0287 uncompressed.GetYaxis()->SetTitle("Bytes");
0288 uncompressed.SetFillColor(kRed);
0289 uncompressed.SetLineWidth(2);
0290 if (plot) {
0291 string plotName = vm[kPlotOpt].as<string>();
0292 gROOT->SetStyle("Plain");
0293 gStyle->SetOptStat(kFALSE);
0294 gStyle->SetOptLogy();
0295 TCanvas c;
0296 uncompressed.Draw();
0297 compressed.Draw("same");
0298 c.SaveAs(plotName.c_str());
0299 }
0300 if (save) {
0301 string fileName = vm[kSavePlotOpt].as<string>();
0302 TFile f(fileName.c_str(), "RECREATE");
0303 compressed.Write();
0304 uncompressed.Write();
0305 f.Close();
0306 }
0307 return 0;
0308 }