Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-11-26 02:34:12

0001 #!/usr/bin/env python3
0002 import re
0003 import json
0004 import ROOT
0005 import sqlite3
0006 import argparse
0007 import subprocess
0008 import multiprocessing
0009 import fnmatch
0010 
0011 ROOTPREFIX = "root://cms-xrd-global.cern.ch/"
0012 #ROOTPREFIX = "root://eoscms//eos/cms" # for more local files
0013 
0014 parser = argparse.ArgumentParser(description="Collect a MEs from DQMIO data, with maximum possible granularity")
0015 
0016 parser.add_argument('dataset', help='dataset name, like "/StreamHIExpress/HIRun2018A-Express-v1/DQMIO"')
0017 parser.add_argument('-o', '--output', help='SQLite file to write', default='dqmio.sqlite')
0018 parser.add_argument('-j', '--njobs', help='Number of threads to read files', type=int, default=1)
0019 parser.add_argument('-l', '--limit', help='Only load up to LIMIT files', type=int, default=-1)
0020 args = parser.parse_args()
0021 
0022 
0023 # we can save a lot of time by only scanning some types, if we know all interesting MEs are of these types.
0024 interesting_types = {
0025   "TH1Fs",
0026   "TH1Ds",
0027   "TH2Fs"
0028 }
0029 
0030 # insert the list of needed histograms below, wild cards are usable
0031 interesting_mes = [
0032 
0033 "PixelPhase1/Phase1_MechanicalView/PXBarrel/adc_PXLayer*",
0034 
0035 ]
0036 
0037 inf = re.compile("([- \[])inf([,}\]])")
0038 nan = re.compile("([- \[])nan([,}\]])")
0039 
0040 def check_interesting(mename):
0041   for pattern in interesting_mes:
0042     if fnmatch.fnmatch(mename,pattern):
0043       return True
0044   return False
0045 
0046 def tosqlite(x):
0047     if isinstance(x, ROOT.string):
0048         try:
0049             return unicode(x.data())
0050         except:
0051             return buffer(x.data())
0052     if isinstance(x, int):
0053         return x
0054     if isinstance(x, float):
0055         return x
0056     if isinstance(x, int):
0057         return x
0058     else:
0059         try: 
0060             rootobj = unicode(ROOT.TBufferJSON.ConvertToJSON(x))
0061             # turns out ROOT does not generate valid JSON for NaN/inf
0062             clean = nan.sub('\\g<1>0\\g<2>', inf.sub('\\g<1>1e38\\g<2>', rootobj))
0063             obj = json.loads(clean)
0064             jsonobj = json.dumps(obj, allow_nan=False)
0065             return jsonobj
0066         except Exception as e:
0067             return json.dumps({"root2sqlite_error": e.__repr__(), "root2sqlite_object": x.__repr__()})
0068 
0069 def dasquery(dataset):
0070     if not dataset.endswith("DQMIO"):
0071         raise Exception("This tool probably cannot read the dataset you specified. The name should end with DQMIO.")
0072     dasquery = ["dasgoclient",  "-query=file dataset=%s" % dataset]
0073     print("Querying das ... %s" % dasquery)
0074     files = subprocess.check_output(dasquery)
0075     files = files.splitlines()
0076     print("Got %d files." % len(files))
0077     return files
0078 
0079 
0080 treenames = { 
0081   0: "Ints",
0082   1: "Floats",
0083   2: "Strings",
0084   3: "TH1Fs",
0085   4: "TH1Ss",
0086   5: "TH1Ds",
0087   6: "TH2Fs",
0088   7: "TH2Ss",
0089   8: "TH2Ds",
0090   9: "TH3Fs",
0091   10: "TProfiles",
0092   11: "TProfile2Ds",
0093 }
0094 
0095 maketable = """
0096   CREATE TABLE IF NOT EXISTS monitorelements (
0097     name,
0098     fromrun, fromlumi, torun, tolumi,
0099     metype,
0100     value
0101   ); """
0102 makeindex = """
0103   CREATE INDEX runorder ON monitorelements(fromrun, fromlumi);
0104 """
0105 insertinto = """
0106   INSERT INTO monitorelements (
0107     name,
0108     fromrun, fromlumi, torun, tolumi,
0109     metype,
0110     value
0111   ) VALUES (
0112     ?, ?, ?, ?, ?, ?, ?
0113   ); """
0114 dumpmes = """
0115   SELECT fromlumi, tolumi, fromrun, name, value FROM monitorelements ORDER BY fromrun, fromlumi ASC;
0116 """
0117 
0118 db = sqlite3.connect(args.output)
0119 db.execute(maketable)
0120 db.execute(makeindex)
0121 
0122 def harvestfile(fname):
0123     f = ROOT.TFile.Open(ROOTPREFIX + fname)
0124     idxtree = getattr(f, "Indices")
0125     #idxtree.GetEntry._threaded = True # now the blocking call should release the GIL...
0126 
0127     # we have no good way to find out which lumis where processed in a job.
0128     # so we watch the per-lumi indices and assume that all mentioned lumis 
0129     # are covered in the end-of-job MEs. This might fail if there are no 
0130     # per-lumi MEs.
0131     knownlumis = set()
0132     mes_to_store = []
0133 
0134     for i in range(idxtree.GetEntries()):
0135         idxtree.GetEntry(i)
0136         run, lumi, metype = idxtree.Run, idxtree.Lumi, idxtree.Type
0137         if lumi != 0:
0138             knownlumis.add(lumi)
0139 
0140         if not treenames[metype] in interesting_types:
0141           continue
0142 
0143         endrun = run # assume no multi-run files for now
0144         if lumi == 0: # per-job ME
0145             endlumi = max(knownlumis)
0146             lumi = min(knownlumis)
0147         else: 
0148             endlumi = lumi
0149 
0150         # inclusive range -- for 0 entries, row is left out
0151         firstidx, lastidx = idxtree.FirstIndex, idxtree.LastIndex
0152         metree = getattr(f, treenames[metype])
0153         metree.GetEntry(0)
0154         metree.SetBranchStatus("*",0)
0155         metree.SetBranchStatus("FullName",1)
0156 
0157         for x in range(firstidx, lastidx+1):
0158             metree.GetEntry(x)
0159             mename = str(metree.FullName)
0160 
0161             if mename.find("AlCaReco") != -1: 
0162               continue
0163 
0164             if mename.find("Isolated") != -1:
0165               continue
0166             
0167             if mename.find("HLT") != -1:
0168               continue
0169             
0170             if not ((mename.find("SiStrip") >= 0) or (mename.find("OfflinePV") >= 0) or (mename.find("PixelPhase1") >= 0) or (mename.find("Tracking") >= 0 )):    
0171               continue
0172 
0173             if check_interesting(mename):
0174                 metree.GetEntry(x, 1)
0175                 value = metree.Value
0176 
0177                 mes_to_store.append((
0178                   mename,
0179                   run, lumi, endrun, endlumi,
0180                   metype,
0181                   tosqlite(value),
0182                 ))
0183 
0184     return mes_to_store
0185 
0186 files = dasquery(args.dataset)
0187 if args.limit > 0: files = files[:args.limit]
0188 
0189 pool = multiprocessing.Pool(processes=args.njobs)
0190 ctr = 0
0191 for mes_to_store in pool.imap_unordered(harvestfile, files):
0192 #for mes_to_store in map(harvestfile, files):
0193     db.executemany(insertinto, mes_to_store);
0194     db.commit()
0195     ctr += 1
0196     print("Processed %d files of %d, got %d MEs...\r" % (ctr, len(files), len(mes_to_store)),  end='')
0197 print("\nDone.")
0198 
0199 sqlite2tree = """
0200 // Convert the sqlite format saved above back into a TTree.
0201 // Saving TTrees with objects (TH1's) seems to be close to impossible in Python,
0202 // so we do the roundtrip via SQLite and JSON in a ROOT macro.
0203 // This needs a ROOT with TBufferJSON::FromJSON, which the 6.12 in CMSSW for
0204 // for now does not have. We can load a newer version from SFT (on lxplus6,
0205 // in (!) a cmsenv):
0206 // source /cvmfs/sft.cern.ch/lcg/releases/ROOT/6.16.00-f8770/x86_64-slc6-gcc8-opt/bin/thisroot.sh
0207 // root sqlite2tree.C
0208 // It is rather slow, but the root file is a lot more compact.
0209 
0210 int run;
0211 int fromlumi;
0212 int tolumi;
0213 TString* name;
0214 TH2F* value;
0215 
0216 int sqlite2tree() {
0217 
0218   auto sql = TSQLiteServer("sqlite:///dev/shm/schneiml/CMSSW_10_5_0_pre1/src/dqmio.sqlite");
0219   auto query = "SELECT fromlumi, tolumi, fromrun, name, value FROM monitorelements ORDER BY fromrun, fromlumi ASC;";
0220   auto res = sql.Query(query);
0221 
0222   TFile outfile("/dev/shm/dqmio.root", "RECREATE");
0223   auto outtree = new TTree("MEs", "MonitorElements by run and lumisection");
0224   auto nameb     = outtree->Branch("name",    &name);
0225   auto valueb    = outtree->Branch("value",   &value,128*1024);
0226   auto runb      = outtree->Branch("run",     &run);
0227   auto fromlumib = outtree->Branch("fromlumi",&fromlumi);
0228   auto tolumib   = outtree->Branch("tolumi",  &tolumi);
0229 
0230 
0231   while (auto row = res->Next()) {
0232     fromlumi = atoi(row->GetField(0));
0233     tolumi   = atoi(row->GetField(1));
0234     run      = atoi(row->GetField(2));
0235     name  = new TString(row->GetField(3));
0236     value = nullptr;
0237     TBufferJSON::FromJSON(value, row->GetField(4));
0238     outtree->Fill();
0239   }
0240   return 0;
0241 }
0242 """
0243 
0244