File indexing completed on 2024-11-26 02:34:12
0001
0002 import re
0003 import json
0004 import ROOT
0005 import sqlite3
0006 import argparse
0007 import subprocess
0008 import multiprocessing
0009 import fnmatch
0010
0011 ROOTPREFIX = "root://cms-xrd-global.cern.ch/"
0012
0013
0014 parser = argparse.ArgumentParser(description="Collect a MEs from DQMIO data, with maximum possible granularity")
0015
0016 parser.add_argument('dataset', help='dataset name, like "/StreamHIExpress/HIRun2018A-Express-v1/DQMIO"')
0017 parser.add_argument('-o', '--output', help='SQLite file to write', default='dqmio.sqlite')
0018 parser.add_argument('-j', '--njobs', help='Number of threads to read files', type=int, default=1)
0019 parser.add_argument('-l', '--limit', help='Only load up to LIMIT files', type=int, default=-1)
0020 args = parser.parse_args()
0021
0022
0023
0024 interesting_types = {
0025 "TH1Fs",
0026 "TH1Ds",
0027 "TH2Fs"
0028 }
0029
0030
0031 interesting_mes = [
0032
0033 "PixelPhase1/Phase1_MechanicalView/PXBarrel/adc_PXLayer*",
0034
0035 ]
0036
0037 inf = re.compile("([- \[])inf([,}\]])")
0038 nan = re.compile("([- \[])nan([,}\]])")
0039
0040 def check_interesting(mename):
0041 for pattern in interesting_mes:
0042 if fnmatch.fnmatch(mename,pattern):
0043 return True
0044 return False
0045
0046 def tosqlite(x):
0047 if isinstance(x, ROOT.string):
0048 try:
0049 return unicode(x.data())
0050 except:
0051 return buffer(x.data())
0052 if isinstance(x, int):
0053 return x
0054 if isinstance(x, float):
0055 return x
0056 if isinstance(x, int):
0057 return x
0058 else:
0059 try:
0060 rootobj = unicode(ROOT.TBufferJSON.ConvertToJSON(x))
0061
0062 clean = nan.sub('\\g<1>0\\g<2>', inf.sub('\\g<1>1e38\\g<2>', rootobj))
0063 obj = json.loads(clean)
0064 jsonobj = json.dumps(obj, allow_nan=False)
0065 return jsonobj
0066 except Exception as e:
0067 return json.dumps({"root2sqlite_error": e.__repr__(), "root2sqlite_object": x.__repr__()})
0068
0069 def dasquery(dataset):
0070 if not dataset.endswith("DQMIO"):
0071 raise Exception("This tool probably cannot read the dataset you specified. The name should end with DQMIO.")
0072 dasquery = ["dasgoclient", "-query=file dataset=%s" % dataset]
0073 print("Querying das ... %s" % dasquery)
0074 files = subprocess.check_output(dasquery)
0075 files = files.splitlines()
0076 print("Got %d files." % len(files))
0077 return files
0078
0079
0080 treenames = {
0081 0: "Ints",
0082 1: "Floats",
0083 2: "Strings",
0084 3: "TH1Fs",
0085 4: "TH1Ss",
0086 5: "TH1Ds",
0087 6: "TH2Fs",
0088 7: "TH2Ss",
0089 8: "TH2Ds",
0090 9: "TH3Fs",
0091 10: "TProfiles",
0092 11: "TProfile2Ds",
0093 }
0094
0095 maketable = """
0096 CREATE TABLE IF NOT EXISTS monitorelements (
0097 name,
0098 fromrun, fromlumi, torun, tolumi,
0099 metype,
0100 value
0101 ); """
0102 makeindex = """
0103 CREATE INDEX runorder ON monitorelements(fromrun, fromlumi);
0104 """
0105 insertinto = """
0106 INSERT INTO monitorelements (
0107 name,
0108 fromrun, fromlumi, torun, tolumi,
0109 metype,
0110 value
0111 ) VALUES (
0112 ?, ?, ?, ?, ?, ?, ?
0113 ); """
0114 dumpmes = """
0115 SELECT fromlumi, tolumi, fromrun, name, value FROM monitorelements ORDER BY fromrun, fromlumi ASC;
0116 """
0117
0118 db = sqlite3.connect(args.output)
0119 db.execute(maketable)
0120 db.execute(makeindex)
0121
0122 def harvestfile(fname):
0123 f = ROOT.TFile.Open(ROOTPREFIX + fname)
0124 idxtree = getattr(f, "Indices")
0125
0126
0127
0128
0129
0130
0131 knownlumis = set()
0132 mes_to_store = []
0133
0134 for i in range(idxtree.GetEntries()):
0135 idxtree.GetEntry(i)
0136 run, lumi, metype = idxtree.Run, idxtree.Lumi, idxtree.Type
0137 if lumi != 0:
0138 knownlumis.add(lumi)
0139
0140 if not treenames[metype] in interesting_types:
0141 continue
0142
0143 endrun = run
0144 if lumi == 0:
0145 endlumi = max(knownlumis)
0146 lumi = min(knownlumis)
0147 else:
0148 endlumi = lumi
0149
0150
0151 firstidx, lastidx = idxtree.FirstIndex, idxtree.LastIndex
0152 metree = getattr(f, treenames[metype])
0153 metree.GetEntry(0)
0154 metree.SetBranchStatus("*",0)
0155 metree.SetBranchStatus("FullName",1)
0156
0157 for x in range(firstidx, lastidx+1):
0158 metree.GetEntry(x)
0159 mename = str(metree.FullName)
0160
0161 if mename.find("AlCaReco") != -1:
0162 continue
0163
0164 if mename.find("Isolated") != -1:
0165 continue
0166
0167 if mename.find("HLT") != -1:
0168 continue
0169
0170 if not ((mename.find("SiStrip") >= 0) or (mename.find("OfflinePV") >= 0) or (mename.find("PixelPhase1") >= 0) or (mename.find("Tracking") >= 0 )):
0171 continue
0172
0173 if check_interesting(mename):
0174 metree.GetEntry(x, 1)
0175 value = metree.Value
0176
0177 mes_to_store.append((
0178 mename,
0179 run, lumi, endrun, endlumi,
0180 metype,
0181 tosqlite(value),
0182 ))
0183
0184 return mes_to_store
0185
0186 files = dasquery(args.dataset)
0187 if args.limit > 0: files = files[:args.limit]
0188
0189 pool = multiprocessing.Pool(processes=args.njobs)
0190 ctr = 0
0191 for mes_to_store in pool.imap_unordered(harvestfile, files):
0192
0193 db.executemany(insertinto, mes_to_store);
0194 db.commit()
0195 ctr += 1
0196 print("Processed %d files of %d, got %d MEs...\r" % (ctr, len(files), len(mes_to_store)), end='')
0197 print("\nDone.")
0198
0199 sqlite2tree = """
0200 // Convert the sqlite format saved above back into a TTree.
0201 // Saving TTrees with objects (TH1's) seems to be close to impossible in Python,
0202 // so we do the roundtrip via SQLite and JSON in a ROOT macro.
0203 // This needs a ROOT with TBufferJSON::FromJSON, which the 6.12 in CMSSW for
0204 // for now does not have. We can load a newer version from SFT (on lxplus6,
0205 // in (!) a cmsenv):
0206 // source /cvmfs/sft.cern.ch/lcg/releases/ROOT/6.16.00-f8770/x86_64-slc6-gcc8-opt/bin/thisroot.sh
0207 // root sqlite2tree.C
0208 // It is rather slow, but the root file is a lot more compact.
0209
0210 int run;
0211 int fromlumi;
0212 int tolumi;
0213 TString* name;
0214 TH2F* value;
0215
0216 int sqlite2tree() {
0217
0218 auto sql = TSQLiteServer("sqlite:///dev/shm/schneiml/CMSSW_10_5_0_pre1/src/dqmio.sqlite");
0219 auto query = "SELECT fromlumi, tolumi, fromrun, name, value FROM monitorelements ORDER BY fromrun, fromlumi ASC;";
0220 auto res = sql.Query(query);
0221
0222 TFile outfile("/dev/shm/dqmio.root", "RECREATE");
0223 auto outtree = new TTree("MEs", "MonitorElements by run and lumisection");
0224 auto nameb = outtree->Branch("name", &name);
0225 auto valueb = outtree->Branch("value", &value,128*1024);
0226 auto runb = outtree->Branch("run", &run);
0227 auto fromlumib = outtree->Branch("fromlumi",&fromlumi);
0228 auto tolumib = outtree->Branch("tolumi", &tolumi);
0229
0230
0231 while (auto row = res->Next()) {
0232 fromlumi = atoi(row->GetField(0));
0233 tolumi = atoi(row->GetField(1));
0234 run = atoi(row->GetField(2));
0235 name = new TString(row->GetField(3));
0236 value = nullptr;
0237 TBufferJSON::FromJSON(value, row->GetField(4));
0238 outtree->Fill();
0239 }
0240 return 0;
0241 }
0242 """
0243
0244