File indexing completed on 2023-03-17 10:58:57
0001
0002 from __future__ import print_function
0003 import re
0004 import json
0005 import ROOT
0006 import sqlite3
0007 import argparse
0008 import subprocess
0009 import multiprocessing
0010 import fnmatch
0011
0012 ROOTPREFIX = "root://cms-xrd-global.cern.ch/"
0013
0014
0015 parser = argparse.ArgumentParser(description="Collect a MEs from DQMIO data, with maximum possible granularity")
0016
0017 parser.add_argument('dataset', help='dataset name, like "/StreamHIExpress/HIRun2018A-Express-v1/DQMIO"')
0018 parser.add_argument('-o', '--output', help='SQLite file to write', default='dqmio.sqlite')
0019 parser.add_argument('-j', '--njobs', help='Number of threads to read files', type=int, default=1)
0020 parser.add_argument('-l', '--limit', help='Only load up to LIMIT files', type=int, default=-1)
0021 args = parser.parse_args()
0022
0023
0024
0025 interesting_types = {
0026 "TH1Fs",
0027 "TH1Ds",
0028 "TH2Fs"
0029 }
0030
0031
0032 interesting_mes = [
0033
0034 "PixelPhase1/Phase1_MechanicalView/PXBarrel/adc_PXLayer*",
0035
0036 ]
0037
0038 inf = re.compile("([- \[])inf([,}\]])")
0039 nan = re.compile("([- \[])nan([,}\]])")
0040
0041 def check_interesting(mename):
0042 for pattern in interesting_mes:
0043 if fnmatch.fnmatch(mename,pattern):
0044 return True
0045 return False
0046
0047 def tosqlite(x):
0048 if isinstance(x, ROOT.string):
0049 try:
0050 return unicode(x.data())
0051 except:
0052 return buffer(x.data())
0053 if isinstance(x, int):
0054 return x
0055 if isinstance(x, float):
0056 return x
0057 if isinstance(x, int):
0058 return x
0059 else:
0060 try:
0061 rootobj = unicode(ROOT.TBufferJSON.ConvertToJSON(x))
0062
0063 clean = nan.sub('\\g<1>0\\g<2>', inf.sub('\\g<1>1e38\\g<2>', rootobj))
0064 obj = json.loads(clean)
0065 jsonobj = json.dumps(obj, allow_nan=False)
0066 return jsonobj
0067 except Exception as e:
0068 return json.dumps({"root2sqlite_error": e.__repr__(), "root2sqlite_object": x.__repr__()})
0069
0070 def dasquery(dataset):
0071 if not dataset.endswith("DQMIO"):
0072 raise Exception("This tool probably cannot read the dataset you specified. The name should end with DQMIO.")
0073 dasquery = ["dasgoclient", "-query=file dataset=%s" % dataset]
0074 print("Querying das ... %s" % dasquery)
0075 files = subprocess.check_output(dasquery)
0076 files = files.splitlines()
0077 print("Got %d files." % len(files))
0078 return files
0079
0080
0081 treenames = {
0082 0: "Ints",
0083 1: "Floats",
0084 2: "Strings",
0085 3: "TH1Fs",
0086 4: "TH1Ss",
0087 5: "TH1Ds",
0088 6: "TH2Fs",
0089 7: "TH2Ss",
0090 8: "TH2Ds",
0091 9: "TH3Fs",
0092 10: "TProfiles",
0093 11: "TProfile2Ds",
0094 }
0095
0096 maketable = """
0097 CREATE TABLE IF NOT EXISTS monitorelements (
0098 name,
0099 fromrun, fromlumi, torun, tolumi,
0100 metype,
0101 value
0102 ); """
0103 makeindex = """
0104 CREATE INDEX runorder ON monitorelements(fromrun, fromlumi);
0105 """
0106 insertinto = """
0107 INSERT INTO monitorelements (
0108 name,
0109 fromrun, fromlumi, torun, tolumi,
0110 metype,
0111 value
0112 ) VALUES (
0113 ?, ?, ?, ?, ?, ?, ?
0114 ); """
0115 dumpmes = """
0116 SELECT fromlumi, tolumi, fromrun, name, value FROM monitorelements ORDER BY fromrun, fromlumi ASC;
0117 """
0118
0119 db = sqlite3.connect(args.output)
0120 db.execute(maketable)
0121 db.execute(makeindex)
0122
0123 def harvestfile(fname):
0124 f = ROOT.TFile.Open(ROOTPREFIX + fname)
0125 idxtree = getattr(f, "Indices")
0126
0127
0128
0129
0130
0131
0132 knownlumis = set()
0133 mes_to_store = []
0134
0135 for i in range(idxtree.GetEntries()):
0136 idxtree.GetEntry(i)
0137 run, lumi, metype = idxtree.Run, idxtree.Lumi, idxtree.Type
0138 if lumi != 0:
0139 knownlumis.add(lumi)
0140
0141 if not treenames[metype] in interesting_types:
0142 continue
0143
0144 endrun = run
0145 if lumi == 0:
0146 endlumi = max(knownlumis)
0147 lumi = min(knownlumis)
0148 else:
0149 endlumi = lumi
0150
0151
0152 firstidx, lastidx = idxtree.FirstIndex, idxtree.LastIndex
0153 metree = getattr(f, treenames[metype])
0154 metree.GetEntry(0)
0155 metree.SetBranchStatus("*",0)
0156 metree.SetBranchStatus("FullName",1)
0157
0158 for x in range(firstidx, lastidx+1):
0159 metree.GetEntry(x)
0160 mename = str(metree.FullName)
0161
0162 if mename.find("AlCaReco") != -1:
0163 continue
0164
0165 if mename.find("Isolated") != -1:
0166 continue
0167
0168 if mename.find("HLT") != -1:
0169 continue
0170
0171 if not ((mename.find("SiStrip") >= 0) or (mename.find("OfflinePV") >= 0) or (mename.find("PixelPhase1") >= 0) or (mename.find("Tracking") >= 0 )):
0172 continue
0173
0174 if check_interesting(mename):
0175 metree.GetEntry(x, 1)
0176 value = metree.Value
0177
0178 mes_to_store.append((
0179 mename,
0180 run, lumi, endrun, endlumi,
0181 metype,
0182 tosqlite(value),
0183 ))
0184
0185 return mes_to_store
0186
0187 files = dasquery(args.dataset)
0188 if args.limit > 0: files = files[:args.limit]
0189
0190 pool = multiprocessing.Pool(processes=args.njobs)
0191 ctr = 0
0192 for mes_to_store in pool.imap_unordered(harvestfile, files):
0193
0194 db.executemany(insertinto, mes_to_store);
0195 db.commit()
0196 ctr += 1
0197 print("Processed %d files of %d, got %d MEs...\r" % (ctr, len(files), len(mes_to_store)), end='')
0198 print("\nDone.")
0199
0200 sqlite2tree = """
0201 // Convert the sqlite format saved above back into a TTree.
0202 // Saving TTrees with objects (TH1's) seems to be close to impossible in Python,
0203 // so we do the roundtrip via SQLite and JSON in a ROOT macro.
0204 // This needs a ROOT with TBufferJSON::FromJSON, which the 6.12 in CMSSW for
0205 // for now does not have. We can load a newer version from SFT (on lxplus6,
0206 // in (!) a cmsenv):
0207 // source /cvmfs/sft.cern.ch/lcg/releases/ROOT/6.16.00-f8770/x86_64-slc6-gcc8-opt/bin/thisroot.sh
0208 // root sqlite2tree.C
0209 // It is rather slow, but the root file is a lot more compact.
0210
0211 int run;
0212 int fromlumi;
0213 int tolumi;
0214 TString* name;
0215 TH2F* value;
0216
0217 int sqlite2tree() {
0218
0219 auto sql = TSQLiteServer("sqlite:///dev/shm/schneiml/CMSSW_10_5_0_pre1/src/dqmio.sqlite");
0220 auto query = "SELECT fromlumi, tolumi, fromrun, name, value FROM monitorelements ORDER BY fromrun, fromlumi ASC;";
0221 auto res = sql.Query(query);
0222
0223 TFile outfile("/dev/shm/dqmio.root", "RECREATE");
0224 auto outtree = new TTree("MEs", "MonitorElements by run and lumisection");
0225 auto nameb = outtree->Branch("name", &name);
0226 auto valueb = outtree->Branch("value", &value,128*1024);
0227 auto runb = outtree->Branch("run", &run);
0228 auto fromlumib = outtree->Branch("fromlumi",&fromlumi);
0229 auto tolumib = outtree->Branch("tolumi", &tolumi);
0230
0231
0232 while (auto row = res->Next()) {
0233 fromlumi = atoi(row->GetField(0));
0234 tolumi = atoi(row->GetField(1));
0235 run = atoi(row->GetField(2));
0236 name = new TString(row->GetField(3));
0237 value = nullptr;
0238 TBufferJSON::FromJSON(value, row->GetField(4));
0239 outtree->Fill();
0240 }
0241 return 0;
0242 }
0243 """
0244
0245