Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2025-03-14 23:36:33

0001 /** \file PerfTools/EdmEvent/interface/EdmEventSize.cc
0002  *
0003  *  \author Vincenzo Innocente
0004  *  \author Simone Rossi Tisbeni
0005  */
0006 #include "PerfTools/EdmEvent/interface/EdmEventSize.h"
0007 #include <valarray>
0008 #include <functional>
0009 #include <algorithm>
0010 #include <ostream>
0011 #include <limits>
0012 #include <cassert>
0013 #include <numeric>
0014 
0015 #include "Rtypes.h"
0016 #include "TROOT.h"
0017 #include "TFile.h"
0018 #include "TTree.h"
0019 #include "TStyle.h"
0020 #include "TObjArray.h"
0021 #include "TBranch.h"
0022 #include "TH1.h"
0023 #include "TCanvas.h"
0024 #include "Riostream.h"
0025 #include "TDataMember.h"
0026 #include "TLeaf.h"
0027 
0028 #include "TBufferFile.h"
0029 
0030 namespace perftools {
0031 
0032   enum Indices { kUncompressed, kCompressed };
0033 
0034   typedef std::valarray<Long64_t> size_type;
0035 
0036   size_type getBasketSize(TBranch*);
0037 
0038   size_type getBasketSize(TObjArray* branches) {
0039     size_type result(static_cast<Long64_t>(0), 2);
0040     size_t n = branches->GetEntries();
0041     for (size_t i = 0; i < n; ++i) {
0042       TBranch* b = dynamic_cast<TBranch*>(branches->At(i));
0043       assert(b != nullptr);
0044       result += getBasketSize(b);
0045     }
0046     return result;
0047   }
0048 
0049   size_type getBasketSize(TBranch* b) {
0050     size_type result(static_cast<Long64_t>(0), 2);
0051     if (b != nullptr) {
0052       if (b->GetZipBytes() > 0) {
0053         result[kUncompressed] = b->GetTotBytes();
0054         result[kCompressed] = b->GetZipBytes();
0055       } else {
0056         result[kUncompressed] = b->GetTotalSize();
0057         result[kCompressed] = b->GetTotalSize();
0058       }
0059       result += getBasketSize(b->GetListOfBranches());
0060     }
0061     return result;
0062   }
0063 
0064   size_type getTotalSize(TBranch* br) {
0065     TBufferFile buf(TBuffer::kWrite, 10000);
0066     TBranch::Class()->WriteBuffer(buf, br);
0067     size_type size = getBasketSize(br);
0068     if (br->GetZipBytes() > 0)
0069       size[kUncompressed] += buf.Length();
0070     return size;
0071   }
0072 
0073   template <EdmEventMode M>
0074   using Record = EdmEventSize<M>::Record;
0075 
0076   template <EdmEventMode M>
0077   EdmEventSize<M>::EdmEventSize() : m_nEvents(0) {}
0078 
0079   template <EdmEventMode M>
0080   EdmEventSize<M>::EdmEventSize(std::string const& fileName, std::string const& treeName) : m_nEvents(0) {
0081     parseFile(fileName, treeName);
0082   }
0083 
0084   template <EdmEventMode M>
0085   typename EdmEventSize<M>::Records getLeaves(TBranch* b) {
0086     typename EdmEventSize<M>::Records new_leaves;
0087     auto subBranches = b->GetListOfBranches();
0088     const size_t nl = subBranches->GetEntries();
0089     if (nl == 0) {
0090       TLeaf* l = dynamic_cast<TLeaf*>(b->GetListOfLeaves()->At(0));
0091       if (l == nullptr)
0092         return new_leaves;
0093 
0094       std::string const leaf_name = l->GetName();
0095       std::string const leaf_type = l->GetTypeName();
0096       size_t compressed_size = l->GetBranch()->GetZipBytes();
0097       size_t uncompressed_size = l->GetBranch()->GetTotBytes();
0098       std::string full_name = leaf_name + '|' + leaf_type;
0099       full_name.erase(std::remove(full_name.begin(), full_name.end(), ' '), full_name.end());
0100       size_t nEvents = l->GetBranch()->GetEntries();
0101       new_leaves.push_back(Record<M>(full_name, nEvents, compressed_size, uncompressed_size));
0102     } else {
0103       for (size_t j = 0; j < nl; ++j) {
0104         TBranch* subBranch = dynamic_cast<TBranch*>(subBranches->At(j));
0105         if (subBranch == nullptr)
0106           continue;
0107         auto leaves = getLeaves<M>(subBranch);
0108         new_leaves.insert(new_leaves.end(), leaves.begin(), leaves.end());
0109       }
0110     }
0111     return new_leaves;
0112   }
0113 
0114   template <EdmEventMode M>
0115   void EdmEventSize<M>::parseFile(std::string const& fileName, std::string const& treeName) {
0116     m_fileName = fileName;
0117     m_records.clear();
0118 
0119     TFile* file = TFile::Open(fileName.c_str());
0120     if (file == nullptr || (!(*file).IsOpen()))
0121       throw Error("unable to open data file " + fileName, 7002);
0122 
0123     TObject* o = file->Get(treeName.c_str());
0124     if (o == nullptr)
0125       throw Error("no object \"" + treeName + "\" found in file: " + fileName, 7003);
0126 
0127     TTree* events = dynamic_cast<TTree*>(o);
0128     if (events == nullptr)
0129       throw Error("object \"" + treeName + "\" is not a TTree in file: " + fileName, 7004);
0130 
0131     m_nEvents = events->GetEntries();
0132     if (m_nEvents == 0)
0133       throw Error("tree \"" + treeName + "\" in file " + fileName + " contains no Events", 7005);
0134 
0135     TObjArray* branches = events->GetListOfBranches();
0136     if (branches == nullptr)
0137       throw Error("tree \"" + treeName + "\" in file " + fileName + " contains no branches", 7006);
0138 
0139     const size_t n = branches->GetEntries();
0140     m_records.reserve(n);
0141     for (size_t i = 0; i < n; ++i) {
0142       TBranch* b = dynamic_cast<TBranch*>(branches->At(i));
0143       if (b == nullptr)
0144         continue;
0145       std::string const name(b->GetName());
0146       if (name == "EventAux")
0147         continue;
0148       size_type s = getTotalSize(b);
0149       size_t compressed_size = s[kCompressed];
0150       size_t uncompressed_size = s[kUncompressed];
0151       if constexpr (M == EdmEventMode::Branches) {
0152         m_records.push_back(Record(name, m_nEvents, compressed_size, uncompressed_size));
0153       } else if constexpr (M == EdmEventMode::Leaves) {
0154         Records new_leaves = getLeaves<M>(b);
0155         m_records.insert(m_records.end(), new_leaves.begin(), new_leaves.end());
0156 
0157         auto new_leaves_compressed =
0158             std::accumulate(new_leaves.begin(), new_leaves.end(), 0, [](size_t sum, Record const& leaf) {
0159               return sum + leaf.compr_size;
0160             });
0161         auto new_leaves_uncompressed =
0162             std::accumulate(new_leaves.begin(), new_leaves.end(), 0, [](size_t sum, Record const& leaf) {
0163               return sum + leaf.uncompr_size;
0164             });
0165         size_t overehead_compressed = compressed_size - new_leaves_compressed;
0166         size_t overehead_uncompressed = uncompressed_size - new_leaves_uncompressed;
0167         m_records.push_back(Record(name + "overhead", m_nEvents, overehead_compressed, overehead_uncompressed));
0168       } else {
0169         throw Error("Unsupported mode", 7007);
0170       }
0171     }
0172     std::sort(m_records.begin(),
0173               m_records.end(),
0174               std::bind(std::greater<size_t>(),
0175                         std::bind(&Record::compr_size, std::placeholders::_1),
0176                         std::bind(&Record::compr_size, std::placeholders::_2)));
0177   }
0178 
0179   template <EdmEventMode M>
0180   void EdmEventSize<M>::sortAlpha() {
0181     std::sort(m_records.begin(),
0182               m_records.end(),
0183               std::bind(std::less<std::string>(),
0184                         std::bind(&Record::name, std::placeholders::_1),
0185                         std::bind(&Record::name, std::placeholders::_2)));
0186   }
0187 
0188   namespace detail {
0189     // format as product:label (type)
0190     template <EdmEventMode M>
0191     void shorterName(Record<M>& record) {
0192       if constexpr (M == EdmEventMode::Branches) {
0193         std::string const& fullName = record.name;
0194         size_t b = fullName.find('_');
0195         size_t e = fullName.rfind('_');
0196         if (b == e)
0197           record.name = fullName;
0198         else {
0199           // remove type and process
0200           record.name = fullName.substr(b + 1, e - b - 1);
0201           // change label separator in :
0202           e = record.name.rfind('_');
0203           if (e != std::string::npos)
0204             record.name.replace(e, 1, ":");
0205           // add the type name
0206           record.name.append(" (" + fullName.substr(0, b) + ")");
0207         }
0208       } else if constexpr (M == EdmEventMode::Leaves) {
0209         size_t b = record.type.find('_');
0210         size_t e = record.type.rfind('_');
0211         if (b == e)
0212           record.name = record.type;
0213         else {
0214           // remove type and process
0215           record.name = record.type.substr(b + 1, e - b - 1);
0216           // change label separator in :
0217           e = record.name.rfind('_');
0218           if (e != std::string::npos)
0219             record.name.replace(e, 1, ":");
0220           // add the type name
0221           record.name.append(" (" + record.type.substr(0, b) + ")");
0222         }
0223         if (!record.label.empty()) {
0224           // object is objectName_objectType. Transform in objectName (objectType) and add to name
0225           e = record.label.find('|');
0226           if (e != std::string::npos) {
0227             std::string obj = record.label.substr(0, e);
0228             std::string objType = record.label.substr(e + 1);
0229             record.name.append(" " + obj + " (" + objType + ")");
0230           } else {
0231             record.name.append(" " + record.label);
0232           }
0233         }
0234       } else {
0235         throw EdmEventSize<M>::Error("Unsupported mode", 7007);
0236       }
0237     }
0238 
0239   }  // namespace detail
0240 
0241   template <EdmEventMode M>
0242   void EdmEventSize<M>::formatNames() {
0243     std::for_each(m_records.begin(), m_records.end(), std::bind(detail::shorterName<M>, std::placeholders::_1));
0244   }
0245 
0246   namespace detail {
0247 
0248     template <EdmEventMode M>
0249     void dump(std::ostream& co, Record<M> const& record) {
0250       co << record.name << " " << static_cast<double>(record.uncompr_size) / static_cast<double>(record.nEvents) << " "
0251          << static_cast<double>(record.compr_size) / static_cast<double>(record.nEvents) << "\n";
0252     }
0253 
0254     const std::string RESOURCES_JSON = R"("resources": [
0255 {
0256 "name": "size_uncompressed",
0257 "description" : "uncompressed size",
0258 "unit" : "B",
0259 "title" : "Data Size"
0260 },
0261 {
0262 "name":"size_compressed",
0263 "description": "compressed size",
0264 "unit" : "B",
0265 "title" : "Data Size"
0266 }
0267 ],
0268 )";
0269 
0270     template <EdmEventMode M>
0271     void dumpJson(std::ostream& co, Record<M> const& record, bool isLast = false) {
0272       co << "{\n";
0273       co << "\"events\": " << record.nEvents << ",\n";
0274       co << "\"type\": \"" << record.type << "\",\n";
0275       co << "\"label\": \"" << record.label << "\",\n";
0276       co << "\"size_compressed\": " << record.compr_size << ",\n";
0277       co << "\"size_uncompressed\": " << record.uncompr_size << ",\n";
0278       co << "\"ratio\": "
0279          << (record.uncompr_size == 0
0280                  ? 0.0
0281                  : static_cast<double>(record.compr_size) / static_cast<double>(record.uncompr_size));
0282       co << (isLast ? "}\n" : "},\n");
0283     }
0284 
0285   }  // namespace detail
0286 
0287   template <EdmEventMode M>
0288   void EdmEventSize<M>::dump(std::ostream& co, bool header) const {
0289     if (header) {
0290       co << "File " << m_fileName << " Events " << m_nEvents << "\n";
0291       if constexpr (M == EdmEventMode::Branches) {
0292         co << "Branch Name | Average Uncompressed Size (Bytes/Event) | Average Compressed Size (Bytes/Event) \n";
0293       } else if constexpr (M == EdmEventMode::Leaves) {
0294         co << "Leaf Name | Average Uncompressed Size (Bytes/Event) | Average Compressed Size (Bytes/Event) \n";
0295       } else {
0296         throw Error("Unsupported mode", 7007);
0297       }
0298     }
0299 
0300     std::for_each(m_records.begin(), m_records.end(), std::bind(detail::dump<M>, std::ref(co), std::placeholders::_1));
0301   }
0302 
0303   template <EdmEventMode M>
0304   void EdmEventSize<M>::dumpJson(std::ostream& co) const {
0305     // Modules json
0306     co << "{\n";
0307     co << "\"modules\": [\n";
0308 
0309     std::for_each(
0310         m_records.begin(), m_records.end() - 1, [&co](const Record& record) { detail::dumpJson<M>(co, record); });
0311     detail::dumpJson<M>(co, m_records.back(), true);
0312 
0313     co << "],\n";
0314 
0315     // Resources json
0316     co << detail::RESOURCES_JSON;
0317 
0318     // Total json
0319     co << "\"total\": {\n";
0320     co << "\"events\": " << m_nEvents << ",\n";
0321     auto [total_uncompressed, total_compressed] = std::accumulate(
0322         m_records.begin(), m_records.end(), std::make_pair<size_t, size_t>(0, 0), [](auto sum, Record const& leaf) {
0323           return std::make_pair(sum.first + leaf.uncompr_size, sum.second + leaf.compr_size);
0324         });
0325     co << "\"size_uncompressed\": " << total_uncompressed << ",\n";
0326     co << "\"size_compressed\": " << total_compressed << ",\n";
0327     co << "\"ratio\": "
0328        << (total_uncompressed == 0 ? 0.0
0329                                    : static_cast<double>(total_compressed) / static_cast<double>(total_uncompressed))
0330        << "\n";
0331     co << "}\n}\n";
0332   }
0333 
0334   namespace detail {
0335     struct Hist {
0336       explicit Hist(int itop)
0337           : top(itop),
0338             uncompressed("uncompressed", "sizes", top, -0.5, -0.5 + top),
0339             compressed("compressed", "sizes", top, -0.5, -0.5 + top),
0340             cxAxis(compressed.GetXaxis()),
0341             uxAxis(uncompressed.GetXaxis()),
0342             x(0) {}
0343 
0344       template <EdmEventMode M>
0345       void fill(Record<M> const& record) {
0346         if (x < top) {
0347           cxAxis->SetBinLabel(x + 1, record.name.c_str());
0348           uxAxis->SetBinLabel(x + 1, record.name.c_str());
0349           compressed.Fill(x, record.compr_size);
0350           uncompressed.Fill(x, record.uncompr_size);
0351           x++;
0352         }
0353       }
0354 
0355       void finalize() {
0356         double mn = std::numeric_limits<double>::max();
0357         for (int i = 1; i <= top; ++i) {
0358           double cm = compressed.GetMinimum(i), um = uncompressed.GetMinimum(i);
0359           if (cm > 0 && cm < mn)
0360             mn = cm;
0361           if (um > 0 && um < mn)
0362             mn = um;
0363         }
0364         mn *= 0.8;
0365         double mx = std::max(compressed.GetMaximum(), uncompressed.GetMaximum());
0366         mx *= 1.2;
0367         uncompressed.SetMinimum(mn);
0368         uncompressed.SetMaximum(mx);
0369         compressed.SetMinimum(mn);
0370         //  compressed.SetMaximum( mx );
0371         cxAxis->SetLabelOffset(-0.32);
0372         cxAxis->LabelsOption("v");
0373         cxAxis->SetLabelSize(0.03);
0374         uxAxis->SetLabelOffset(-0.32);
0375         uxAxis->LabelsOption("v");
0376         uxAxis->SetLabelSize(0.03);
0377         compressed.GetYaxis()->SetTitle("Bytes");
0378         compressed.SetFillColor(kBlue);
0379         compressed.SetLineWidth(2);
0380         uncompressed.GetYaxis()->SetTitle("Bytes");
0381         uncompressed.SetFillColor(kRed);
0382         uncompressed.SetLineWidth(2);
0383       }
0384 
0385       int top;
0386       TH1F uncompressed;
0387       TH1F compressed;
0388       TAxis* cxAxis;
0389       TAxis* uxAxis;
0390       int x;
0391     };
0392 
0393   }  // namespace detail
0394 
0395   template <EdmEventMode M>
0396   void EdmEventSize<M>::produceHistos(std::string const& plot, std::string const& file, int top) const {
0397     if (top == 0)
0398       top = m_records.size();
0399 
0400     detail::Hist h(top);
0401     if constexpr (M == EdmEventMode::Leaves) {
0402       h.uncompressed.SetTitle("Leaf sizes");
0403       h.compressed.SetTitle("Leaf sizes");
0404     }
0405     std::for_each(
0406         m_records.begin(), m_records.end(), std::bind(&detail::Hist::fill<M>, std::ref(h), std::placeholders::_1));
0407 
0408     h.finalize();
0409     if (!plot.empty()) {
0410       gROOT->SetStyle("Plain");
0411       gStyle->SetOptStat(kFALSE);
0412       gStyle->SetOptLogy();
0413       TCanvas c;
0414       h.uncompressed.Draw();
0415       h.compressed.Draw("same");
0416       c.SaveAs(plot.c_str());
0417     }
0418     if (!file.empty()) {
0419       TFile f(file.c_str(), "RECREATE");
0420       h.compressed.Write();
0421       h.uncompressed.Write();
0422       f.Close();
0423     }
0424   }
0425 
0426   template class perftools::EdmEventSize<perftools::EdmEventMode::Leaves>;
0427   template class perftools::EdmEventSize<perftools::EdmEventMode::Branches>;
0428 }  // namespace perftools