File indexing completed on 2024-04-06 12:23:21
0001
0002
0003
0004
0005 #include "PerfTools/EdmEvent/interface/EdmEventSize.h"
0006 #include <valarray>
0007 #include <functional>
0008 #include <algorithm>
0009 #include <ostream>
0010 #include <limits>
0011 #include <cassert>
0012
0013 #include "Rtypes.h"
0014 #include "TROOT.h"
0015 #include "TFile.h"
0016 #include "TTree.h"
0017 #include "TStyle.h"
0018 #include "TObjArray.h"
0019 #include "TBranch.h"
0020 #include "TH1.h"
0021 #include "TCanvas.h"
0022 #include "Riostream.h"
0023
0024 #include "TBufferFile.h"
0025
0026 namespace {
0027
0028 enum Indices { kUncompressed, kCompressed };
0029
0030 typedef std::valarray<Long64_t> size_type;
0031
0032 size_type getBasketSize(TBranch*);
0033
0034 size_type getBasketSize(TObjArray* branches) {
0035 size_type result(static_cast<Long64_t>(0), 2);
0036 size_t n = branches->GetEntries();
0037 for (size_t i = 0; i < n; ++i) {
0038 TBranch* b = dynamic_cast<TBranch*>(branches->At(i));
0039 assert(b != nullptr);
0040 result += getBasketSize(b);
0041 }
0042 return result;
0043 }
0044
0045 size_type getBasketSize(TBranch* b) {
0046 size_type result(static_cast<Long64_t>(0), 2);
0047 if (b != nullptr) {
0048 if (b->GetZipBytes() > 0) {
0049 result[kUncompressed] = b->GetTotBytes();
0050 result[kCompressed] = b->GetZipBytes();
0051 } else {
0052 result[kUncompressed] = b->GetTotalSize();
0053 result[kCompressed] = b->GetTotalSize();
0054 }
0055 result += getBasketSize(b->GetListOfBranches());
0056 }
0057 return result;
0058 }
0059
0060 size_type getTotalSize(TBranch* br) {
0061 TBufferFile buf(TBuffer::kWrite, 10000);
0062 TBranch::Class()->WriteBuffer(buf, br);
0063 size_type size = getBasketSize(br);
0064 if (br->GetZipBytes() > 0)
0065 size[kUncompressed] += buf.Length();
0066 return size;
0067 }
0068 }
0069
0070 namespace perftools {
0071
0072 EdmEventSize::EdmEventSize() : m_nEvents(0) {}
0073
0074 EdmEventSize::EdmEventSize(std::string const& fileName, std::string const& treeName) : m_nEvents(0) {
0075 parseFile(fileName);
0076 }
0077
0078 void EdmEventSize::parseFile(std::string const& fileName, std::string const& treeName) {
0079 m_fileName = fileName;
0080 m_branches.clear();
0081
0082 TFile* file = TFile::Open(fileName.c_str());
0083 if (file == nullptr || (!(*file).IsOpen()))
0084 throw Error("unable to open data file " + fileName, 7002);
0085
0086 TObject* o = file->Get(treeName.c_str());
0087 if (o == nullptr)
0088 throw Error("no object \"" + treeName + "\" found in file: " + fileName, 7003);
0089
0090 TTree* events = dynamic_cast<TTree*>(o);
0091 if (events == nullptr)
0092 throw Error("object \"" + treeName + "\" is not a TTree in file: " + fileName, 7004);
0093
0094 m_nEvents = events->GetEntries();
0095 if (m_nEvents == 0)
0096 throw Error("tree \"" + treeName + "\" in file " + fileName + " contains no Events", 7005);
0097
0098 TObjArray* branches = events->GetListOfBranches();
0099 if (branches == nullptr)
0100 throw Error("tree \"" + treeName + "\" in file " + fileName + " contains no branches", 7006);
0101
0102 const size_t n = branches->GetEntries();
0103 m_branches.reserve(n);
0104 for (size_t i = 0; i < n; ++i) {
0105 TBranch* b = dynamic_cast<TBranch*>(branches->At(i));
0106 if (b == nullptr)
0107 continue;
0108 std::string const name(b->GetName());
0109 if (name == "EventAux")
0110 continue;
0111 size_type s = getTotalSize(b);
0112 m_branches.push_back(
0113 BranchRecord(name, double(s[kCompressed]) / double(m_nEvents), double(s[kUncompressed]) / double(m_nEvents)));
0114 }
0115 std::sort(m_branches.begin(),
0116 m_branches.end(),
0117 std::bind(std::greater<double>(),
0118 std::bind(&BranchRecord::compr_size, std::placeholders::_1),
0119 std::bind(&BranchRecord::compr_size, std::placeholders::_2)));
0120 }
0121
0122 void EdmEventSize::sortAlpha() {
0123 std::sort(m_branches.begin(),
0124 m_branches.end(),
0125 std::bind(std::less<std::string>(),
0126 std::bind(&BranchRecord::name, std::placeholders::_1),
0127 std::bind(&BranchRecord::name, std::placeholders::_2)));
0128 }
0129
0130 namespace detail {
0131
0132 void shorterName(EdmEventSize::BranchRecord& br) {
0133 size_t b = br.fullName.find('_');
0134 size_t e = br.fullName.rfind('_');
0135 if (b == e)
0136 br.name = br.fullName;
0137 else {
0138
0139 br.name = br.fullName.substr(b + 1, e - b - 1);
0140
0141 e = br.name.rfind('_');
0142 if (e != std::string::npos)
0143 br.name.replace(e, 1, ":");
0144
0145 br.name.append(" (" + br.fullName.substr(0, b) + ")");
0146 }
0147 }
0148
0149 }
0150
0151 void EdmEventSize::formatNames() { std::for_each(m_branches.begin(), m_branches.end(), &detail::shorterName); }
0152
0153 namespace detail {
0154
0155 void dump(std::ostream& co, EdmEventSize::BranchRecord const& br) {
0156 co << br.name << " " << br.uncompr_size << " " << br.compr_size << "\n";
0157 }
0158 }
0159
0160 void EdmEventSize::dump(std::ostream& co, bool header) const {
0161 if (header) {
0162 co << "File " << m_fileName << " Events " << m_nEvents << "\n";
0163 co << "Branch Name | Average Uncompressed Size (Bytes/Event) | Average Compressed Size (Bytes/Event) \n";
0164 }
0165 std::for_each(m_branches.begin(), m_branches.end(), std::bind(detail::dump, std::ref(co), std::placeholders::_1));
0166 }
0167
0168 namespace detail {
0169
0170 struct Hist {
0171 explicit Hist(int itop)
0172 : top(itop),
0173 uncompressed("uncompressed", "branch sizes", top, -0.5, -0.5 + top),
0174 compressed("compressed", "branch sizes", top, -0.5, -0.5 + top),
0175 cxAxis(compressed.GetXaxis()),
0176 uxAxis(uncompressed.GetXaxis()),
0177 x(0) {}
0178
0179 void fill(EdmEventSize::BranchRecord const& br) {
0180 if (x < top) {
0181 cxAxis->SetBinLabel(x + 1, br.name.c_str());
0182 uxAxis->SetBinLabel(x + 1, br.name.c_str());
0183 compressed.Fill(x, br.compr_size);
0184 uncompressed.Fill(x, br.uncompr_size);
0185 x++;
0186 }
0187 }
0188
0189 void finalize() {
0190 double mn = std::numeric_limits<double>::max();
0191 for (int i = 1; i <= top; ++i) {
0192 double cm = compressed.GetMinimum(i), um = uncompressed.GetMinimum(i);
0193 if (cm > 0 && cm < mn)
0194 mn = cm;
0195 if (um > 0 && um < mn)
0196 mn = um;
0197 }
0198 mn *= 0.8;
0199 double mx = std::max(compressed.GetMaximum(), uncompressed.GetMaximum());
0200 mx *= 1.2;
0201 uncompressed.SetMinimum(mn);
0202 uncompressed.SetMaximum(mx);
0203 compressed.SetMinimum(mn);
0204
0205 cxAxis->SetLabelOffset(-0.32);
0206 cxAxis->LabelsOption("v");
0207 cxAxis->SetLabelSize(0.03);
0208 uxAxis->SetLabelOffset(-0.32);
0209 uxAxis->LabelsOption("v");
0210 uxAxis->SetLabelSize(0.03);
0211 compressed.GetYaxis()->SetTitle("Bytes");
0212 compressed.SetFillColor(kBlue);
0213 compressed.SetLineWidth(2);
0214 uncompressed.GetYaxis()->SetTitle("Bytes");
0215 uncompressed.SetFillColor(kRed);
0216 uncompressed.SetLineWidth(2);
0217 }
0218
0219 int top;
0220 TH1F uncompressed;
0221 TH1F compressed;
0222 TAxis* cxAxis;
0223 TAxis* uxAxis;
0224
0225 int x;
0226 };
0227
0228 }
0229
0230 void EdmEventSize::produceHistos(std::string const& plot, std::string const& file, int top) const {
0231 if (top == 0)
0232 top = m_branches.size();
0233 detail::Hist h(top);
0234 std::for_each(
0235 m_branches.begin(), m_branches.end(), std::bind(&detail::Hist::fill, std::ref(h), std::placeholders::_1));
0236 h.finalize();
0237 if (!plot.empty()) {
0238 gROOT->SetStyle("Plain");
0239 gStyle->SetOptStat(kFALSE);
0240 gStyle->SetOptLogy();
0241 TCanvas c;
0242 h.uncompressed.Draw();
0243 h.compressed.Draw("same");
0244 c.SaveAs(plot.c_str());
0245 }
0246 if (!file.empty()) {
0247 TFile f(file.c_str(), "RECREATE");
0248 h.compressed.Write();
0249 h.uncompressed.Write();
0250 f.Close();
0251 }
0252 }
0253
0254 }