Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:10:08

0001 #!/usr/bin/env python3
0002 from __future__ import print_function
0003 import re
0004 import json
0005 import ROOT
0006 import sqlite3
0007 import argparse
0008 import subprocess
0009 import multiprocessing
0010 import fnmatch
0011 
0012 ROOTPREFIX = "root://cms-xrd-global.cern.ch/"
0013 #ROOTPREFIX = "root://eoscms//eos/cms" # for more local files
0014 
0015 parser = argparse.ArgumentParser(description="Collect a MEs from DQMIO data, with maximum possible granularity")
0016 
0017 parser.add_argument('dataset', help='dataset name, like "/StreamHIExpress/HIRun2018A-Express-v1/DQMIO"')
0018 parser.add_argument('-o', '--output', help='SQLite file to write', default='dqmio.sqlite')
0019 parser.add_argument('-j', '--njobs', help='Number of threads to read files', type=int, default=1)
0020 parser.add_argument('-l', '--limit', help='Only load up to LIMIT files', type=int, default=-1)
0021 args = parser.parse_args()
0022 
0023 
0024 # we can save a lot of time by only scanning some types, if we know all interesting MEs are of these types.
0025 interesting_types = {
0026   "TH1Fs",
0027   "TH1Ds",
0028   "TH2Fs"
0029 }
0030 
0031 # insert the list of needed histograms below, wild cards are usable
0032 interesting_mes = [
0033 
0034 "PixelPhase1/Phase1_MechanicalView/PXBarrel/adc_PXLayer*",
0035 
0036 ]
0037 
0038 inf = re.compile("([- \[])inf([,}\]])")
0039 nan = re.compile("([- \[])nan([,}\]])")
0040 
0041 def check_interesting(mename):
0042   for pattern in interesting_mes:
0043     if fnmatch.fnmatch(mename,pattern):
0044       return True
0045   return False
0046 
0047 def tosqlite(x):
0048     if isinstance(x, ROOT.string):
0049         try:
0050             return unicode(x.data())
0051         except:
0052             return buffer(x.data())
0053     if isinstance(x, int):
0054         return x
0055     if isinstance(x, float):
0056         return x
0057     if isinstance(x, int):
0058         return x
0059     else:
0060         try: 
0061             rootobj = unicode(ROOT.TBufferJSON.ConvertToJSON(x))
0062             # turns out ROOT does not generate valid JSON for NaN/inf
0063             clean = nan.sub('\\g<1>0\\g<2>', inf.sub('\\g<1>1e38\\g<2>', rootobj))
0064             obj = json.loads(clean)
0065             jsonobj = json.dumps(obj, allow_nan=False)
0066             return jsonobj
0067         except Exception as e:
0068             return json.dumps({"root2sqlite_error": e.__repr__(), "root2sqlite_object": x.__repr__()})
0069 
0070 def dasquery(dataset):
0071     if not dataset.endswith("DQMIO"):
0072         raise Exception("This tool probably cannot read the dataset you specified. The name should end with DQMIO.")
0073     dasquery = ["dasgoclient",  "-query=file dataset=%s" % dataset]
0074     print("Querying das ... %s" % dasquery)
0075     files = subprocess.check_output(dasquery)
0076     files = files.splitlines()
0077     print("Got %d files." % len(files))
0078     return files
0079 
0080 
0081 treenames = { 
0082   0: "Ints",
0083   1: "Floats",
0084   2: "Strings",
0085   3: "TH1Fs",
0086   4: "TH1Ss",
0087   5: "TH1Ds",
0088   6: "TH2Fs",
0089   7: "TH2Ss",
0090   8: "TH2Ds",
0091   9: "TH3Fs",
0092   10: "TProfiles",
0093   11: "TProfile2Ds",
0094 }
0095 
0096 maketable = """
0097   CREATE TABLE IF NOT EXISTS monitorelements (
0098     name,
0099     fromrun, fromlumi, torun, tolumi,
0100     metype,
0101     value
0102   ); """
0103 makeindex = """
0104   CREATE INDEX runorder ON monitorelements(fromrun, fromlumi);
0105 """
0106 insertinto = """
0107   INSERT INTO monitorelements (
0108     name,
0109     fromrun, fromlumi, torun, tolumi,
0110     metype,
0111     value
0112   ) VALUES (
0113     ?, ?, ?, ?, ?, ?, ?
0114   ); """
0115 dumpmes = """
0116   SELECT fromlumi, tolumi, fromrun, name, value FROM monitorelements ORDER BY fromrun, fromlumi ASC;
0117 """
0118 
0119 db = sqlite3.connect(args.output)
0120 db.execute(maketable)
0121 db.execute(makeindex)
0122 
0123 def harvestfile(fname):
0124     f = ROOT.TFile.Open(ROOTPREFIX + fname)
0125     idxtree = getattr(f, "Indices")
0126     #idxtree.GetEntry._threaded = True # now the blocking call should release the GIL...
0127 
0128     # we have no good way to find out which lumis where processed in a job.
0129     # so we watch the per-lumi indices and assume that all mentioned lumis 
0130     # are covered in the end-of-job MEs. This might fail if there are no 
0131     # per-lumi MEs.
0132     knownlumis = set()
0133     mes_to_store = []
0134 
0135     for i in range(idxtree.GetEntries()):
0136         idxtree.GetEntry(i)
0137         run, lumi, metype = idxtree.Run, idxtree.Lumi, idxtree.Type
0138         if lumi != 0:
0139             knownlumis.add(lumi)
0140 
0141         if not treenames[metype] in interesting_types:
0142           continue
0143 
0144         endrun = run # assume no multi-run files for now
0145         if lumi == 0: # per-job ME
0146             endlumi = max(knownlumis)
0147             lumi = min(knownlumis)
0148         else: 
0149             endlumi = lumi
0150 
0151         # inclusive range -- for 0 entries, row is left out
0152         firstidx, lastidx = idxtree.FirstIndex, idxtree.LastIndex
0153         metree = getattr(f, treenames[metype])
0154         metree.GetEntry(0)
0155         metree.SetBranchStatus("*",0)
0156         metree.SetBranchStatus("FullName",1)
0157 
0158         for x in range(firstidx, lastidx+1):
0159             metree.GetEntry(x)
0160             mename = str(metree.FullName)
0161 
0162             if mename.find("AlCaReco") != -1: 
0163               continue
0164 
0165             if mename.find("Isolated") != -1:
0166               continue
0167             
0168             if mename.find("HLT") != -1:
0169               continue
0170             
0171             if not ((mename.find("SiStrip") >= 0) or (mename.find("OfflinePV") >= 0) or (mename.find("PixelPhase1") >= 0) or (mename.find("Tracking") >= 0 )):    
0172               continue
0173 
0174             if check_interesting(mename):
0175                 metree.GetEntry(x, 1)
0176                 value = metree.Value
0177 
0178                 mes_to_store.append((
0179                   mename,
0180                   run, lumi, endrun, endlumi,
0181                   metype,
0182                   tosqlite(value),
0183                 ))
0184 
0185     return mes_to_store
0186 
0187 files = dasquery(args.dataset)
0188 if args.limit > 0: files = files[:args.limit]
0189 
0190 pool = multiprocessing.Pool(processes=args.njobs)
0191 ctr = 0
0192 for mes_to_store in pool.imap_unordered(harvestfile, files):
0193 #for mes_to_store in map(harvestfile, files):
0194     db.executemany(insertinto, mes_to_store);
0195     db.commit()
0196     ctr += 1
0197     print("Processed %d files of %d, got %d MEs...\r" % (ctr, len(files), len(mes_to_store)),  end='')
0198 print("\nDone.")
0199 
0200 sqlite2tree = """
0201 // Convert the sqlite format saved above back into a TTree.
0202 // Saving TTrees with objects (TH1's) seems to be close to impossible in Python,
0203 // so we do the roundtrip via SQLite and JSON in a ROOT macro.
0204 // This needs a ROOT with TBufferJSON::FromJSON, which the 6.12 in CMSSW for
0205 // for now does not have. We can load a newer version from SFT (on lxplus6,
0206 // in (!) a cmsenv):
0207 // source /cvmfs/sft.cern.ch/lcg/releases/ROOT/6.16.00-f8770/x86_64-slc6-gcc8-opt/bin/thisroot.sh
0208 // root sqlite2tree.C
0209 // It is rather slow, but the root file is a lot more compact.
0210 
0211 int run;
0212 int fromlumi;
0213 int tolumi;
0214 TString* name;
0215 TH2F* value;
0216 
0217 int sqlite2tree() {
0218 
0219   auto sql = TSQLiteServer("sqlite:///dev/shm/schneiml/CMSSW_10_5_0_pre1/src/dqmio.sqlite");
0220   auto query = "SELECT fromlumi, tolumi, fromrun, name, value FROM monitorelements ORDER BY fromrun, fromlumi ASC;";
0221   auto res = sql.Query(query);
0222 
0223   TFile outfile("/dev/shm/dqmio.root", "RECREATE");
0224   auto outtree = new TTree("MEs", "MonitorElements by run and lumisection");
0225   auto nameb     = outtree->Branch("name",    &name);
0226   auto valueb    = outtree->Branch("value",   &value,128*1024);
0227   auto runb      = outtree->Branch("run",     &run);
0228   auto fromlumib = outtree->Branch("fromlumi",&fromlumi);
0229   auto tolumib   = outtree->Branch("tolumi",  &tolumi);
0230 
0231 
0232   while (auto row = res->Next()) {
0233     fromlumi = atoi(row->GetField(0));
0234     tolumi   = atoi(row->GetField(1));
0235     run      = atoi(row->GetField(2));
0236     name  = new TString(row->GetField(3));
0237     value = nullptr;
0238     TBufferJSON::FromJSON(value, row->GetField(4));
0239     outtree->Fill();
0240   }
0241   return 0;
0242 }
0243 """
0244 
0245