Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2023-03-17 10:58:57

0001 #!/usr/bin/env python3
0002 from __future__ import print_function
0003 import os
0004 import json
0005 import ROOT
0006 import fnmatch
0007 import argparse
0008 import subprocess
0009 import multiprocessing
0010 from collections import defaultdict
0011 
0012 
0013 ROOTPREFIX = "root://cms-xrd-global.cern.ch/"
0014 #ROOTPREFIX = "root://eoscms//eos/cms" # for more local files
0015 
0016 parser = argparse.ArgumentParser(description="Collect MEs for given lumisections from DQMIO data and upload to a DQMGUI. " +
0017                                              "The from-to lumi range will be shown in an artificial run number of form 1xxxxyyyy, while the run number goes into the lumi number field.")
0018 
0019 parser.add_argument('dataset', help='dataset name, like "/StreamHIExpress/HIRun2018A-Express-v1/DQMIO"')
0020 parser.add_argument('-r', '--run', help='Run number of run to process', default=None, type=int)
0021 parser.add_argument('-l', '--lumis', help='JSON file with runs/lumisecitons to process (golden JSON format)', default=None)
0022 parser.add_argument('-u', '--upload', help='Upload files to this GUI, instead of just creating them. Delete files after upload.', default=None)
0023 parser.add_argument('-j', '--njobs', help='Number of threads to read files', type=int, default=1)
0024 parser.add_argument('-m', '--me', help='Glob pattern of MEs to load.', default=[], action='append')
0025 parser.add_argument('--limit', help='Only load up to LIMIT files', type=int, default=-1)
0026 parser.add_argument('--perlumionly', help='Only save MEs that cover exactly one lumisection, and use simplified "run" numbers (10xxxx)', action='store_true')
0027 args = parser.parse_args()
0028 
0029 
0030 # we can save a lot of time by only scanning some types, if we know all interesting MEs are of these types.
0031 interesting_types = {
0032   "TH2Fs",
0033   "TH1Fs",
0034 #  "TH2Ds",
0035 #  "TH1Ds",
0036 #  "TH2Ds",
0037 #  "TProfiles",
0038 #  "TProfile2Ds",
0039 }
0040 
0041 interesting_mes = args.me
0042 if not interesting_mes:
0043   print("No --me patterns given. This is fine, but output *will* be empty.")
0044 
0045 if args.upload and "https:" in args.upload:
0046   print("Refuing to upload to production servers, only http upload to local servers allowed.")
0047   uploadurl = None
0048 else:
0049   uploadurl = args.upload
0050 
0051 def dasquery(dataset):
0052     if not dataset.endswith("DQMIO"):
0053         raise Exception("This tool probably cannot read the dataset you specified. The name should end with DQMIO.")
0054     dasquery = ["dasgoclient",  "-query=file dataset=%s" % dataset]
0055     print("Querying das ... %s" % dasquery)
0056     files = subprocess.check_output(dasquery)
0057     files = files.splitlines()
0058     print("Got %d files." % len(files))
0059     return files
0060 
0061 files = dasquery(args.dataset)
0062 if args.limit > 0: files = files[:args.limit]
0063 
0064 if args.lumis:
0065   with open(args.lumis) as f:
0066     j  = json.load(f)
0067     lumiranges = {int(run): lumis for run, lumis in j.iteritems()}
0068 else:
0069   if args.run:
0070     # let's define no lumis -> full run
0071     lumiranges = {args.run : []}
0072   else:
0073     # ... and similarly, no runs -> everything.
0074     lumiranges = {}
0075 
0076 if args.perlumionly:
0077   perlumionly = True
0078   def fake_run(lumi, endlumi):
0079     return "1%05d" % (lumi)
0080 else:
0081   perlumionly = False
0082   def fake_run(lumi, endlumi):
0083     return "1%04d%04d" % (lumi, endlumi)
0084 
0085 
0086 treenames = { 
0087   0: "Ints",
0088   1: "Floats",
0089   2: "Strings",
0090   3: "TH1Fs",
0091   4: "TH1Ss",
0092   5: "TH1Ds",
0093   6: "TH2Fs",
0094   7: "TH2Ss",
0095   8: "TH2Ds",
0096   9: "TH3Fs",
0097   10: "TProfiles",
0098   11: "TProfile2Ds",
0099 }
0100 
0101 def check_interesting(mename):
0102   for pattern in interesting_mes:
0103     if fnmatch.fnmatch(mename, pattern):
0104       return True
0105 
0106 def rangecheck(run, lumi):
0107   if not lumiranges: return True
0108   if run not in lumiranges: return False
0109   lumis = lumiranges[run]
0110   if not lumis: return True
0111   for start, end in lumis:
0112     if lumi >= start and lumi <= end:
0113       return True
0114   return False
0115 
0116 def create_dir(parent_dir, name):
0117    dir = parent_dir.Get(name)
0118    if not dir:
0119       dir = parent_dir.mkdir(name)
0120    return dir
0121 
0122 def gotodir(base, path):
0123   current = base
0124   for directory in path[:-1]:
0125     current = create_dir(current, directory)
0126     current.cd()
0127 
0128 
0129 def harvestfile(fname):
0130     f = ROOT.TFile.Open(ROOTPREFIX + fname)
0131     idxtree = getattr(f, "Indices")
0132     #idxtree.GetEntry._threaded = True # now the blocking call should release the GIL...
0133 
0134     # we have no good way to find out which lumis where processed in a job.
0135     # so we watch the per-lumi indices and assume that all mentioned lumis 
0136     # are covered in the end-of-job MEs. This might fail if there are no 
0137     # per-lumi MEs.
0138     knownlumis = set()
0139     files = []
0140 
0141     for i in range(idxtree.GetEntries()):
0142         idxtree.GetEntry(i)
0143         run, lumi, metype = idxtree.Run, idxtree.Lumi, idxtree.Type
0144         if lumi != 0:
0145             knownlumis.add(lumi)
0146 
0147         if not treenames[metype] in interesting_types:
0148             continue
0149 
0150 
0151         endrun = run # assume no multi-run files for now
0152         if lumi == 0: # per-job ME
0153             endlumi = max(knownlumis)
0154             lumi = min(knownlumis)
0155         else: 
0156             endlumi = lumi
0157 
0158         if not (rangecheck(run, lumi) or rangecheck(endrun, endlumi)):
0159           continue
0160         if perlumionly and lumi != endlumi:
0161           continue
0162            
0163         # we do the saving in here, concurrently with the reading, to avoid
0164         # needing to copy/move the TH1's.
0165         # doing a round-trip via JSON would probably also work, but this seems
0166         # cleaner. For better structure, one could use Generators...
0167         # but things need to stay in the same process (from multiprocessing).
0168         filename = "DQM_V0001_R%s__perlumiharvested__perlumi%d_%s_v1__DQMIO.root" % (fake_run(lumi, endlumi), run, treenames[metype])
0169         prefix = ["DQMData", "Run %s" % fake_run(lumi, endlumi)]
0170         # we open the file only on the first found ME, to avoid empty files.
0171         result_file = None
0172         subsystems = set()
0173 
0174         # inclusive range -- for 0 entries, row is left out
0175         firstidx, lastidx = idxtree.FirstIndex, idxtree.LastIndex
0176         metree = getattr(f, treenames[metype])
0177         # this GetEntry is only to make sure the TTree is initialized correctly
0178         metree.GetEntry(0)
0179         metree.SetBranchStatus("*",0)
0180         metree.SetBranchStatus("FullName",1)
0181 
0182         for x in range(firstidx, lastidx+1):
0183             metree.GetEntry(x)
0184             mename = str(metree.FullName)
0185             if check_interesting(mename):
0186                 metree.GetEntry(x, 1)
0187                 value = metree.Value
0188 
0189                 # navigate the TDirectory and save the thing again
0190                 if not result_file:
0191                     result_file = ROOT.TFile(filename, 'recreate')
0192                 path = mename.split("/")
0193                 filepath = prefix + [path[0], "Run summary"] + path[1:]
0194                 subsystems.add(path[0])
0195                 gotodir(result_file, filepath)
0196                 value.Write()
0197 
0198         # if we found a ME and wrote it to a file, finalize the file here.
0199         if result_file:
0200             # DQMGUI wants these to show them in the header bar. The folder name
0201             # in the TDirectory is also checked and has to match the filename,
0202             # but the  headerbar can show anything and uses these magic MEs.
0203             for subsys in subsystems:
0204                 # last item is considerd object name and ignored
0205                 gotodir(result_file, prefix + [subsys, "Run summary", "EventInfo", "blub"])
0206                 s = ROOT.TObjString("<iRun>i=%s</iRun>" % fake_run(lumi, endlumi))
0207                 s.Write()
0208                 s = ROOT.TObjString("<iLumiSection>i=%s</iLumiSection>" % run)
0209                 s.Write()
0210                 # we could also set iEvent and runStartTimeStamp if we had values.
0211             result_file.Close()
0212             files.append(filename)
0213 
0214     return files
0215 
0216 def uploadfile(filename):
0217     uploadcommand = ["visDQMUpload.py", uploadurl, filename]
0218     print("Uploading ... %s" % uploadcommand)
0219     subprocess.check_call(uploadcommand)
0220 
0221 pool = multiprocessing.Pool(processes=args.njobs)
0222 ctr = 0
0223 for outfiles in pool.imap_unordered(harvestfile, files):
0224 #for mes_to_store in map(harvestfile, files):
0225     if uploadurl:
0226         for f in outfiles:
0227             uploadfile(f)
0228             os.remove(f)
0229     ctr += 1
0230     print("Processed %d files of %d, got %d out files...\r" % (ctr, len(files), len(outfiles)),  end='')
0231 print("\nDone.")