Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:23:21

0001 /** \file PerfTools/EdmEvent/interface/EdmEventSize.cc
0002  *
0003  *  \author Vincenzo Innocente
0004  */
0005 #include "PerfTools/EdmEvent/interface/EdmEventSize.h"
0006 #include <valarray>
0007 #include <functional>
0008 #include <algorithm>
0009 #include <ostream>
0010 #include <limits>
0011 #include <cassert>
0012 
0013 #include "Rtypes.h"
0014 #include "TROOT.h"
0015 #include "TFile.h"
0016 #include "TTree.h"
0017 #include "TStyle.h"
0018 #include "TObjArray.h"
0019 #include "TBranch.h"
0020 #include "TH1.h"
0021 #include "TCanvas.h"
0022 #include "Riostream.h"
0023 
0024 #include "TBufferFile.h"
0025 
0026 namespace {
0027 
0028   enum Indices { kUncompressed, kCompressed };
0029 
0030   typedef std::valarray<Long64_t> size_type;
0031 
0032   size_type getBasketSize(TBranch*);
0033 
0034   size_type getBasketSize(TObjArray* branches) {
0035     size_type result(static_cast<Long64_t>(0), 2);
0036     size_t n = branches->GetEntries();
0037     for (size_t i = 0; i < n; ++i) {
0038       TBranch* b = dynamic_cast<TBranch*>(branches->At(i));
0039       assert(b != nullptr);
0040       result += getBasketSize(b);
0041     }
0042     return result;
0043   }
0044 
0045   size_type getBasketSize(TBranch* b) {
0046     size_type result(static_cast<Long64_t>(0), 2);
0047     if (b != nullptr) {
0048       if (b->GetZipBytes() > 0) {
0049         result[kUncompressed] = b->GetTotBytes();
0050         result[kCompressed] = b->GetZipBytes();
0051       } else {
0052         result[kUncompressed] = b->GetTotalSize();
0053         result[kCompressed] = b->GetTotalSize();
0054       }
0055       result += getBasketSize(b->GetListOfBranches());
0056     }
0057     return result;
0058   }
0059 
0060   size_type getTotalSize(TBranch* br) {
0061     TBufferFile buf(TBuffer::kWrite, 10000);
0062     TBranch::Class()->WriteBuffer(buf, br);
0063     size_type size = getBasketSize(br);
0064     if (br->GetZipBytes() > 0)
0065       size[kUncompressed] += buf.Length();
0066     return size;
0067   }
0068 }  // namespace
0069 
0070 namespace perftools {
0071 
0072   EdmEventSize::EdmEventSize() : m_nEvents(0) {}
0073 
0074   EdmEventSize::EdmEventSize(std::string const& fileName, std::string const& treeName) : m_nEvents(0) {
0075     parseFile(fileName);
0076   }
0077 
0078   void EdmEventSize::parseFile(std::string const& fileName, std::string const& treeName) {
0079     m_fileName = fileName;
0080     m_branches.clear();
0081 
0082     TFile* file = TFile::Open(fileName.c_str());
0083     if (file == nullptr || (!(*file).IsOpen()))
0084       throw Error("unable to open data file " + fileName, 7002);
0085 
0086     TObject* o = file->Get(treeName.c_str());
0087     if (o == nullptr)
0088       throw Error("no object \"" + treeName + "\" found in file: " + fileName, 7003);
0089 
0090     TTree* events = dynamic_cast<TTree*>(o);
0091     if (events == nullptr)
0092       throw Error("object \"" + treeName + "\" is not a TTree in file: " + fileName, 7004);
0093 
0094     m_nEvents = events->GetEntries();
0095     if (m_nEvents == 0)
0096       throw Error("tree \"" + treeName + "\" in file " + fileName + " contains no Events", 7005);
0097 
0098     TObjArray* branches = events->GetListOfBranches();
0099     if (branches == nullptr)
0100       throw Error("tree \"" + treeName + "\" in file " + fileName + " contains no branches", 7006);
0101 
0102     const size_t n = branches->GetEntries();
0103     m_branches.reserve(n);
0104     for (size_t i = 0; i < n; ++i) {
0105       TBranch* b = dynamic_cast<TBranch*>(branches->At(i));
0106       if (b == nullptr)
0107         continue;
0108       std::string const name(b->GetName());
0109       if (name == "EventAux")
0110         continue;
0111       size_type s = getTotalSize(b);
0112       m_branches.push_back(
0113           BranchRecord(name, double(s[kCompressed]) / double(m_nEvents), double(s[kUncompressed]) / double(m_nEvents)));
0114     }
0115     std::sort(m_branches.begin(),
0116               m_branches.end(),
0117               std::bind(std::greater<double>(),
0118                         std::bind(&BranchRecord::compr_size, std::placeholders::_1),
0119                         std::bind(&BranchRecord::compr_size, std::placeholders::_2)));
0120   }
0121 
0122   void EdmEventSize::sortAlpha() {
0123     std::sort(m_branches.begin(),
0124               m_branches.end(),
0125               std::bind(std::less<std::string>(),
0126                         std::bind(&BranchRecord::name, std::placeholders::_1),
0127                         std::bind(&BranchRecord::name, std::placeholders::_2)));
0128   }
0129 
0130   namespace detail {
0131     // format as product:label (type)
0132     void shorterName(EdmEventSize::BranchRecord& br) {
0133       size_t b = br.fullName.find('_');
0134       size_t e = br.fullName.rfind('_');
0135       if (b == e)
0136         br.name = br.fullName;
0137       else {
0138         // remove type and process
0139         br.name = br.fullName.substr(b + 1, e - b - 1);
0140         // change label separator in :
0141         e = br.name.rfind('_');
0142         if (e != std::string::npos)
0143           br.name.replace(e, 1, ":");
0144         // add the type name
0145         br.name.append(" (" + br.fullName.substr(0, b) + ")");
0146       }
0147     }
0148 
0149   }  // namespace detail
0150 
0151   void EdmEventSize::formatNames() { std::for_each(m_branches.begin(), m_branches.end(), &detail::shorterName); }
0152 
0153   namespace detail {
0154 
0155     void dump(std::ostream& co, EdmEventSize::BranchRecord const& br) {
0156       co << br.name << " " << br.uncompr_size << " " << br.compr_size << "\n";
0157     }
0158   }  // namespace detail
0159 
0160   void EdmEventSize::dump(std::ostream& co, bool header) const {
0161     if (header) {
0162       co << "File " << m_fileName << " Events " << m_nEvents << "\n";
0163       co << "Branch Name | Average Uncompressed Size (Bytes/Event) | Average Compressed Size (Bytes/Event) \n";
0164     }
0165     std::for_each(m_branches.begin(), m_branches.end(), std::bind(detail::dump, std::ref(co), std::placeholders::_1));
0166   }
0167 
0168   namespace detail {
0169 
0170     struct Hist {
0171       explicit Hist(int itop)
0172           : top(itop),
0173             uncompressed("uncompressed", "branch sizes", top, -0.5, -0.5 + top),
0174             compressed("compressed", "branch sizes", top, -0.5, -0.5 + top),
0175             cxAxis(compressed.GetXaxis()),
0176             uxAxis(uncompressed.GetXaxis()),
0177             x(0) {}
0178 
0179       void fill(EdmEventSize::BranchRecord const& br) {
0180         if (x < top) {
0181           cxAxis->SetBinLabel(x + 1, br.name.c_str());
0182           uxAxis->SetBinLabel(x + 1, br.name.c_str());
0183           compressed.Fill(x, br.compr_size);
0184           uncompressed.Fill(x, br.uncompr_size);
0185           x++;
0186         }
0187       }
0188 
0189       void finalize() {
0190         double mn = std::numeric_limits<double>::max();
0191         for (int i = 1; i <= top; ++i) {
0192           double cm = compressed.GetMinimum(i), um = uncompressed.GetMinimum(i);
0193           if (cm > 0 && cm < mn)
0194             mn = cm;
0195           if (um > 0 && um < mn)
0196             mn = um;
0197         }
0198         mn *= 0.8;
0199         double mx = std::max(compressed.GetMaximum(), uncompressed.GetMaximum());
0200         mx *= 1.2;
0201         uncompressed.SetMinimum(mn);
0202         uncompressed.SetMaximum(mx);
0203         compressed.SetMinimum(mn);
0204         //  compressed.SetMaximum( mx );
0205         cxAxis->SetLabelOffset(-0.32);
0206         cxAxis->LabelsOption("v");
0207         cxAxis->SetLabelSize(0.03);
0208         uxAxis->SetLabelOffset(-0.32);
0209         uxAxis->LabelsOption("v");
0210         uxAxis->SetLabelSize(0.03);
0211         compressed.GetYaxis()->SetTitle("Bytes");
0212         compressed.SetFillColor(kBlue);
0213         compressed.SetLineWidth(2);
0214         uncompressed.GetYaxis()->SetTitle("Bytes");
0215         uncompressed.SetFillColor(kRed);
0216         uncompressed.SetLineWidth(2);
0217       }
0218 
0219       int top;
0220       TH1F uncompressed;
0221       TH1F compressed;
0222       TAxis* cxAxis;
0223       TAxis* uxAxis;
0224 
0225       int x;
0226     };
0227 
0228   }  // namespace detail
0229 
0230   void EdmEventSize::produceHistos(std::string const& plot, std::string const& file, int top) const {
0231     if (top == 0)
0232       top = m_branches.size();
0233     detail::Hist h(top);
0234     std::for_each(
0235         m_branches.begin(), m_branches.end(), std::bind(&detail::Hist::fill, std::ref(h), std::placeholders::_1));
0236     h.finalize();
0237     if (!plot.empty()) {
0238       gROOT->SetStyle("Plain");
0239       gStyle->SetOptStat(kFALSE);
0240       gStyle->SetOptLogy();
0241       TCanvas c;
0242       h.uncompressed.Draw();
0243       h.compressed.Draw("same");
0244       c.SaveAs(plot.c_str());
0245     }
0246     if (!file.empty()) {
0247       TFile f(file.c_str(), "RECREATE");
0248       h.compressed.Write();
0249       h.uncompressed.Write();
0250       f.Close();
0251     }
0252   }
0253 
0254 }  // namespace perftools