File indexing completed on 2025-03-14 23:36:33
0001
0002
0003
0004
0005
0006 #include "PerfTools/EdmEvent/interface/EdmEventSize.h"
0007 #include <valarray>
0008 #include <functional>
0009 #include <algorithm>
0010 #include <ostream>
0011 #include <limits>
0012 #include <cassert>
0013 #include <numeric>
0014
0015 #include "Rtypes.h"
0016 #include "TROOT.h"
0017 #include "TFile.h"
0018 #include "TTree.h"
0019 #include "TStyle.h"
0020 #include "TObjArray.h"
0021 #include "TBranch.h"
0022 #include "TH1.h"
0023 #include "TCanvas.h"
0024 #include "Riostream.h"
0025 #include "TDataMember.h"
0026 #include "TLeaf.h"
0027
0028 #include "TBufferFile.h"
0029
0030 namespace perftools {
0031
0032 enum Indices { kUncompressed, kCompressed };
0033
0034 typedef std::valarray<Long64_t> size_type;
0035
0036 size_type getBasketSize(TBranch*);
0037
0038 size_type getBasketSize(TObjArray* branches) {
0039 size_type result(static_cast<Long64_t>(0), 2);
0040 size_t n = branches->GetEntries();
0041 for (size_t i = 0; i < n; ++i) {
0042 TBranch* b = dynamic_cast<TBranch*>(branches->At(i));
0043 assert(b != nullptr);
0044 result += getBasketSize(b);
0045 }
0046 return result;
0047 }
0048
0049 size_type getBasketSize(TBranch* b) {
0050 size_type result(static_cast<Long64_t>(0), 2);
0051 if (b != nullptr) {
0052 if (b->GetZipBytes() > 0) {
0053 result[kUncompressed] = b->GetTotBytes();
0054 result[kCompressed] = b->GetZipBytes();
0055 } else {
0056 result[kUncompressed] = b->GetTotalSize();
0057 result[kCompressed] = b->GetTotalSize();
0058 }
0059 result += getBasketSize(b->GetListOfBranches());
0060 }
0061 return result;
0062 }
0063
0064 size_type getTotalSize(TBranch* br) {
0065 TBufferFile buf(TBuffer::kWrite, 10000);
0066 TBranch::Class()->WriteBuffer(buf, br);
0067 size_type size = getBasketSize(br);
0068 if (br->GetZipBytes() > 0)
0069 size[kUncompressed] += buf.Length();
0070 return size;
0071 }
0072
0073 template <EdmEventMode M>
0074 using Record = EdmEventSize<M>::Record;
0075
0076 template <EdmEventMode M>
0077 EdmEventSize<M>::EdmEventSize() : m_nEvents(0) {}
0078
0079 template <EdmEventMode M>
0080 EdmEventSize<M>::EdmEventSize(std::string const& fileName, std::string const& treeName) : m_nEvents(0) {
0081 parseFile(fileName, treeName);
0082 }
0083
0084 template <EdmEventMode M>
0085 typename EdmEventSize<M>::Records getLeaves(TBranch* b) {
0086 typename EdmEventSize<M>::Records new_leaves;
0087 auto subBranches = b->GetListOfBranches();
0088 const size_t nl = subBranches->GetEntries();
0089 if (nl == 0) {
0090 TLeaf* l = dynamic_cast<TLeaf*>(b->GetListOfLeaves()->At(0));
0091 if (l == nullptr)
0092 return new_leaves;
0093
0094 std::string const leaf_name = l->GetName();
0095 std::string const leaf_type = l->GetTypeName();
0096 size_t compressed_size = l->GetBranch()->GetZipBytes();
0097 size_t uncompressed_size = l->GetBranch()->GetTotBytes();
0098 std::string full_name = leaf_name + '|' + leaf_type;
0099 full_name.erase(std::remove(full_name.begin(), full_name.end(), ' '), full_name.end());
0100 size_t nEvents = l->GetBranch()->GetEntries();
0101 new_leaves.push_back(Record<M>(full_name, nEvents, compressed_size, uncompressed_size));
0102 } else {
0103 for (size_t j = 0; j < nl; ++j) {
0104 TBranch* subBranch = dynamic_cast<TBranch*>(subBranches->At(j));
0105 if (subBranch == nullptr)
0106 continue;
0107 auto leaves = getLeaves<M>(subBranch);
0108 new_leaves.insert(new_leaves.end(), leaves.begin(), leaves.end());
0109 }
0110 }
0111 return new_leaves;
0112 }
0113
0114 template <EdmEventMode M>
0115 void EdmEventSize<M>::parseFile(std::string const& fileName, std::string const& treeName) {
0116 m_fileName = fileName;
0117 m_records.clear();
0118
0119 TFile* file = TFile::Open(fileName.c_str());
0120 if (file == nullptr || (!(*file).IsOpen()))
0121 throw Error("unable to open data file " + fileName, 7002);
0122
0123 TObject* o = file->Get(treeName.c_str());
0124 if (o == nullptr)
0125 throw Error("no object \"" + treeName + "\" found in file: " + fileName, 7003);
0126
0127 TTree* events = dynamic_cast<TTree*>(o);
0128 if (events == nullptr)
0129 throw Error("object \"" + treeName + "\" is not a TTree in file: " + fileName, 7004);
0130
0131 m_nEvents = events->GetEntries();
0132 if (m_nEvents == 0)
0133 throw Error("tree \"" + treeName + "\" in file " + fileName + " contains no Events", 7005);
0134
0135 TObjArray* branches = events->GetListOfBranches();
0136 if (branches == nullptr)
0137 throw Error("tree \"" + treeName + "\" in file " + fileName + " contains no branches", 7006);
0138
0139 const size_t n = branches->GetEntries();
0140 m_records.reserve(n);
0141 for (size_t i = 0; i < n; ++i) {
0142 TBranch* b = dynamic_cast<TBranch*>(branches->At(i));
0143 if (b == nullptr)
0144 continue;
0145 std::string const name(b->GetName());
0146 if (name == "EventAux")
0147 continue;
0148 size_type s = getTotalSize(b);
0149 size_t compressed_size = s[kCompressed];
0150 size_t uncompressed_size = s[kUncompressed];
0151 if constexpr (M == EdmEventMode::Branches) {
0152 m_records.push_back(Record(name, m_nEvents, compressed_size, uncompressed_size));
0153 } else if constexpr (M == EdmEventMode::Leaves) {
0154 Records new_leaves = getLeaves<M>(b);
0155 m_records.insert(m_records.end(), new_leaves.begin(), new_leaves.end());
0156
0157 auto new_leaves_compressed =
0158 std::accumulate(new_leaves.begin(), new_leaves.end(), 0, [](size_t sum, Record const& leaf) {
0159 return sum + leaf.compr_size;
0160 });
0161 auto new_leaves_uncompressed =
0162 std::accumulate(new_leaves.begin(), new_leaves.end(), 0, [](size_t sum, Record const& leaf) {
0163 return sum + leaf.uncompr_size;
0164 });
0165 size_t overehead_compressed = compressed_size - new_leaves_compressed;
0166 size_t overehead_uncompressed = uncompressed_size - new_leaves_uncompressed;
0167 m_records.push_back(Record(name + "overhead", m_nEvents, overehead_compressed, overehead_uncompressed));
0168 } else {
0169 throw Error("Unsupported mode", 7007);
0170 }
0171 }
0172 std::sort(m_records.begin(),
0173 m_records.end(),
0174 std::bind(std::greater<size_t>(),
0175 std::bind(&Record::compr_size, std::placeholders::_1),
0176 std::bind(&Record::compr_size, std::placeholders::_2)));
0177 }
0178
0179 template <EdmEventMode M>
0180 void EdmEventSize<M>::sortAlpha() {
0181 std::sort(m_records.begin(),
0182 m_records.end(),
0183 std::bind(std::less<std::string>(),
0184 std::bind(&Record::name, std::placeholders::_1),
0185 std::bind(&Record::name, std::placeholders::_2)));
0186 }
0187
0188 namespace detail {
0189
0190 template <EdmEventMode M>
0191 void shorterName(Record<M>& record) {
0192 if constexpr (M == EdmEventMode::Branches) {
0193 std::string const& fullName = record.name;
0194 size_t b = fullName.find('_');
0195 size_t e = fullName.rfind('_');
0196 if (b == e)
0197 record.name = fullName;
0198 else {
0199
0200 record.name = fullName.substr(b + 1, e - b - 1);
0201
0202 e = record.name.rfind('_');
0203 if (e != std::string::npos)
0204 record.name.replace(e, 1, ":");
0205
0206 record.name.append(" (" + fullName.substr(0, b) + ")");
0207 }
0208 } else if constexpr (M == EdmEventMode::Leaves) {
0209 size_t b = record.type.find('_');
0210 size_t e = record.type.rfind('_');
0211 if (b == e)
0212 record.name = record.type;
0213 else {
0214
0215 record.name = record.type.substr(b + 1, e - b - 1);
0216
0217 e = record.name.rfind('_');
0218 if (e != std::string::npos)
0219 record.name.replace(e, 1, ":");
0220
0221 record.name.append(" (" + record.type.substr(0, b) + ")");
0222 }
0223 if (!record.label.empty()) {
0224
0225 e = record.label.find('|');
0226 if (e != std::string::npos) {
0227 std::string obj = record.label.substr(0, e);
0228 std::string objType = record.label.substr(e + 1);
0229 record.name.append(" " + obj + " (" + objType + ")");
0230 } else {
0231 record.name.append(" " + record.label);
0232 }
0233 }
0234 } else {
0235 throw EdmEventSize<M>::Error("Unsupported mode", 7007);
0236 }
0237 }
0238
0239 }
0240
0241 template <EdmEventMode M>
0242 void EdmEventSize<M>::formatNames() {
0243 std::for_each(m_records.begin(), m_records.end(), std::bind(detail::shorterName<M>, std::placeholders::_1));
0244 }
0245
0246 namespace detail {
0247
0248 template <EdmEventMode M>
0249 void dump(std::ostream& co, Record<M> const& record) {
0250 co << record.name << " " << static_cast<double>(record.uncompr_size) / static_cast<double>(record.nEvents) << " "
0251 << static_cast<double>(record.compr_size) / static_cast<double>(record.nEvents) << "\n";
0252 }
0253
0254 const std::string RESOURCES_JSON = R"("resources": [
0255 {
0256 "name": "size_uncompressed",
0257 "description" : "uncompressed size",
0258 "unit" : "B",
0259 "title" : "Data Size"
0260 },
0261 {
0262 "name":"size_compressed",
0263 "description": "compressed size",
0264 "unit" : "B",
0265 "title" : "Data Size"
0266 }
0267 ],
0268 )";
0269
0270 template <EdmEventMode M>
0271 void dumpJson(std::ostream& co, Record<M> const& record, bool isLast = false) {
0272 co << "{\n";
0273 co << "\"events\": " << record.nEvents << ",\n";
0274 co << "\"type\": \"" << record.type << "\",\n";
0275 co << "\"label\": \"" << record.label << "\",\n";
0276 co << "\"size_compressed\": " << record.compr_size << ",\n";
0277 co << "\"size_uncompressed\": " << record.uncompr_size << ",\n";
0278 co << "\"ratio\": "
0279 << (record.uncompr_size == 0
0280 ? 0.0
0281 : static_cast<double>(record.compr_size) / static_cast<double>(record.uncompr_size));
0282 co << (isLast ? "}\n" : "},\n");
0283 }
0284
0285 }
0286
0287 template <EdmEventMode M>
0288 void EdmEventSize<M>::dump(std::ostream& co, bool header) const {
0289 if (header) {
0290 co << "File " << m_fileName << " Events " << m_nEvents << "\n";
0291 if constexpr (M == EdmEventMode::Branches) {
0292 co << "Branch Name | Average Uncompressed Size (Bytes/Event) | Average Compressed Size (Bytes/Event) \n";
0293 } else if constexpr (M == EdmEventMode::Leaves) {
0294 co << "Leaf Name | Average Uncompressed Size (Bytes/Event) | Average Compressed Size (Bytes/Event) \n";
0295 } else {
0296 throw Error("Unsupported mode", 7007);
0297 }
0298 }
0299
0300 std::for_each(m_records.begin(), m_records.end(), std::bind(detail::dump<M>, std::ref(co), std::placeholders::_1));
0301 }
0302
0303 template <EdmEventMode M>
0304 void EdmEventSize<M>::dumpJson(std::ostream& co) const {
0305
0306 co << "{\n";
0307 co << "\"modules\": [\n";
0308
0309 std::for_each(
0310 m_records.begin(), m_records.end() - 1, [&co](const Record& record) { detail::dumpJson<M>(co, record); });
0311 detail::dumpJson<M>(co, m_records.back(), true);
0312
0313 co << "],\n";
0314
0315
0316 co << detail::RESOURCES_JSON;
0317
0318
0319 co << "\"total\": {\n";
0320 co << "\"events\": " << m_nEvents << ",\n";
0321 auto [total_uncompressed, total_compressed] = std::accumulate(
0322 m_records.begin(), m_records.end(), std::make_pair<size_t, size_t>(0, 0), [](auto sum, Record const& leaf) {
0323 return std::make_pair(sum.first + leaf.uncompr_size, sum.second + leaf.compr_size);
0324 });
0325 co << "\"size_uncompressed\": " << total_uncompressed << ",\n";
0326 co << "\"size_compressed\": " << total_compressed << ",\n";
0327 co << "\"ratio\": "
0328 << (total_uncompressed == 0 ? 0.0
0329 : static_cast<double>(total_compressed) / static_cast<double>(total_uncompressed))
0330 << "\n";
0331 co << "}\n}\n";
0332 }
0333
0334 namespace detail {
0335 struct Hist {
0336 explicit Hist(int itop)
0337 : top(itop),
0338 uncompressed("uncompressed", "sizes", top, -0.5, -0.5 + top),
0339 compressed("compressed", "sizes", top, -0.5, -0.5 + top),
0340 cxAxis(compressed.GetXaxis()),
0341 uxAxis(uncompressed.GetXaxis()),
0342 x(0) {}
0343
0344 template <EdmEventMode M>
0345 void fill(Record<M> const& record) {
0346 if (x < top) {
0347 cxAxis->SetBinLabel(x + 1, record.name.c_str());
0348 uxAxis->SetBinLabel(x + 1, record.name.c_str());
0349 compressed.Fill(x, record.compr_size);
0350 uncompressed.Fill(x, record.uncompr_size);
0351 x++;
0352 }
0353 }
0354
0355 void finalize() {
0356 double mn = std::numeric_limits<double>::max();
0357 for (int i = 1; i <= top; ++i) {
0358 double cm = compressed.GetMinimum(i), um = uncompressed.GetMinimum(i);
0359 if (cm > 0 && cm < mn)
0360 mn = cm;
0361 if (um > 0 && um < mn)
0362 mn = um;
0363 }
0364 mn *= 0.8;
0365 double mx = std::max(compressed.GetMaximum(), uncompressed.GetMaximum());
0366 mx *= 1.2;
0367 uncompressed.SetMinimum(mn);
0368 uncompressed.SetMaximum(mx);
0369 compressed.SetMinimum(mn);
0370
0371 cxAxis->SetLabelOffset(-0.32);
0372 cxAxis->LabelsOption("v");
0373 cxAxis->SetLabelSize(0.03);
0374 uxAxis->SetLabelOffset(-0.32);
0375 uxAxis->LabelsOption("v");
0376 uxAxis->SetLabelSize(0.03);
0377 compressed.GetYaxis()->SetTitle("Bytes");
0378 compressed.SetFillColor(kBlue);
0379 compressed.SetLineWidth(2);
0380 uncompressed.GetYaxis()->SetTitle("Bytes");
0381 uncompressed.SetFillColor(kRed);
0382 uncompressed.SetLineWidth(2);
0383 }
0384
0385 int top;
0386 TH1F uncompressed;
0387 TH1F compressed;
0388 TAxis* cxAxis;
0389 TAxis* uxAxis;
0390 int x;
0391 };
0392
0393 }
0394
0395 template <EdmEventMode M>
0396 void EdmEventSize<M>::produceHistos(std::string const& plot, std::string const& file, int top) const {
0397 if (top == 0)
0398 top = m_records.size();
0399
0400 detail::Hist h(top);
0401 if constexpr (M == EdmEventMode::Leaves) {
0402 h.uncompressed.SetTitle("Leaf sizes");
0403 h.compressed.SetTitle("Leaf sizes");
0404 }
0405 std::for_each(
0406 m_records.begin(), m_records.end(), std::bind(&detail::Hist::fill<M>, std::ref(h), std::placeholders::_1));
0407
0408 h.finalize();
0409 if (!plot.empty()) {
0410 gROOT->SetStyle("Plain");
0411 gStyle->SetOptStat(kFALSE);
0412 gStyle->SetOptLogy();
0413 TCanvas c;
0414 h.uncompressed.Draw();
0415 h.compressed.Draw("same");
0416 c.SaveAs(plot.c_str());
0417 }
0418 if (!file.empty()) {
0419 TFile f(file.c_str(), "RECREATE");
0420 h.compressed.Write();
0421 h.uncompressed.Write();
0422 f.Close();
0423 }
0424 }
0425
0426 template class perftools::EdmEventSize<perftools::EdmEventMode::Leaves>;
0427 template class perftools::EdmEventSize<perftools::EdmEventMode::Branches>;
0428 }