Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-11-26 02:34:12

0001 #!/usr/bin/env python3
0002 import os
0003 import json
0004 import ROOT
0005 import fnmatch
0006 import argparse
0007 import subprocess
0008 import multiprocessing
0009 from collections import defaultdict
0010 
0011 
0012 ROOTPREFIX = "root://cms-xrd-global.cern.ch/"
0013 #ROOTPREFIX = "root://eoscms//eos/cms" # for more local files
0014 
0015 parser = argparse.ArgumentParser(description="Collect MEs for given lumisections from DQMIO data and upload to a DQMGUI. " +
0016                                              "The from-to lumi range will be shown in an artificial run number of form 1xxxxyyyy, while the run number goes into the lumi number field.")
0017 
0018 parser.add_argument('dataset', help='dataset name, like "/StreamHIExpress/HIRun2018A-Express-v1/DQMIO"')
0019 parser.add_argument('-r', '--run', help='Run number of run to process', default=None, type=int)
0020 parser.add_argument('-l', '--lumis', help='JSON file with runs/lumisecitons to process (golden JSON format)', default=None)
0021 parser.add_argument('-u', '--upload', help='Upload files to this GUI, instead of just creating them. Delete files after upload.', default=None)
0022 parser.add_argument('-j', '--njobs', help='Number of threads to read files', type=int, default=1)
0023 parser.add_argument('-m', '--me', help='Glob pattern of MEs to load.', default=[], action='append')
0024 parser.add_argument('--limit', help='Only load up to LIMIT files', type=int, default=-1)
0025 parser.add_argument('--perlumionly', help='Only save MEs that cover exactly one lumisection, and use simplified "run" numbers (10xxxx)', action='store_true')
0026 args = parser.parse_args()
0027 
0028 
0029 # we can save a lot of time by only scanning some types, if we know all interesting MEs are of these types.
0030 interesting_types = {
0031   "TH2Fs",
0032   "TH1Fs",
0033 #  "TH2Ds",
0034 #  "TH1Ds",
0035 #  "TH2Ds",
0036 #  "TProfiles",
0037 #  "TProfile2Ds",
0038 }
0039 
0040 interesting_mes = args.me
0041 if not interesting_mes:
0042   print("No --me patterns given. This is fine, but output *will* be empty.")
0043 
0044 if args.upload and "https:" in args.upload:
0045   print("Refuing to upload to production servers, only http upload to local servers allowed.")
0046   uploadurl = None
0047 else:
0048   uploadurl = args.upload
0049 
0050 def dasquery(dataset):
0051     if not dataset.endswith("DQMIO"):
0052         raise Exception("This tool probably cannot read the dataset you specified. The name should end with DQMIO.")
0053     dasquery = ["dasgoclient",  "-query=file dataset=%s" % dataset]
0054     print("Querying das ... %s" % dasquery)
0055     files = subprocess.check_output(dasquery)
0056     files = files.splitlines()
0057     print("Got %d files." % len(files))
0058     return files
0059 
0060 files = dasquery(args.dataset)
0061 if args.limit > 0: files = files[:args.limit]
0062 
0063 if args.lumis:
0064   with open(args.lumis) as f:
0065     j  = json.load(f)
0066     lumiranges = {int(run): lumis for run, lumis in j.iteritems()}
0067 else:
0068   if args.run:
0069     # let's define no lumis -> full run
0070     lumiranges = {args.run : []}
0071   else:
0072     # ... and similarly, no runs -> everything.
0073     lumiranges = {}
0074 
0075 if args.perlumionly:
0076   perlumionly = True
0077   def fake_run(lumi, endlumi):
0078     return "1%05d" % (lumi)
0079 else:
0080   perlumionly = False
0081   def fake_run(lumi, endlumi):
0082     return "1%04d%04d" % (lumi, endlumi)
0083 
0084 
0085 treenames = { 
0086   0: "Ints",
0087   1: "Floats",
0088   2: "Strings",
0089   3: "TH1Fs",
0090   4: "TH1Ss",
0091   5: "TH1Ds",
0092   6: "TH2Fs",
0093   7: "TH2Ss",
0094   8: "TH2Ds",
0095   9: "TH3Fs",
0096   10: "TProfiles",
0097   11: "TProfile2Ds",
0098 }
0099 
0100 def check_interesting(mename):
0101   for pattern in interesting_mes:
0102     if fnmatch.fnmatch(mename, pattern):
0103       return True
0104 
0105 def rangecheck(run, lumi):
0106   if not lumiranges: return True
0107   if run not in lumiranges: return False
0108   lumis = lumiranges[run]
0109   if not lumis: return True
0110   for start, end in lumis:
0111     if lumi >= start and lumi <= end:
0112       return True
0113   return False
0114 
0115 def create_dir(parent_dir, name):
0116    dir = parent_dir.Get(name)
0117    if not dir:
0118       dir = parent_dir.mkdir(name)
0119    return dir
0120 
0121 def gotodir(base, path):
0122   current = base
0123   for directory in path[:-1]:
0124     current = create_dir(current, directory)
0125     current.cd()
0126 
0127 
0128 def harvestfile(fname):
0129     f = ROOT.TFile.Open(ROOTPREFIX + fname)
0130     idxtree = getattr(f, "Indices")
0131     #idxtree.GetEntry._threaded = True # now the blocking call should release the GIL...
0132 
0133     # we have no good way to find out which lumis where processed in a job.
0134     # so we watch the per-lumi indices and assume that all mentioned lumis 
0135     # are covered in the end-of-job MEs. This might fail if there are no 
0136     # per-lumi MEs.
0137     knownlumis = set()
0138     files = []
0139 
0140     for i in range(idxtree.GetEntries()):
0141         idxtree.GetEntry(i)
0142         run, lumi, metype = idxtree.Run, idxtree.Lumi, idxtree.Type
0143         if lumi != 0:
0144             knownlumis.add(lumi)
0145 
0146         if not treenames[metype] in interesting_types:
0147             continue
0148 
0149 
0150         endrun = run # assume no multi-run files for now
0151         if lumi == 0: # per-job ME
0152             endlumi = max(knownlumis)
0153             lumi = min(knownlumis)
0154         else: 
0155             endlumi = lumi
0156 
0157         if not (rangecheck(run, lumi) or rangecheck(endrun, endlumi)):
0158           continue
0159         if perlumionly and lumi != endlumi:
0160           continue
0161            
0162         # we do the saving in here, concurrently with the reading, to avoid
0163         # needing to copy/move the TH1's.
0164         # doing a round-trip via JSON would probably also work, but this seems
0165         # cleaner. For better structure, one could use Generators...
0166         # but things need to stay in the same process (from multiprocessing).
0167         filename = "DQM_V0001_R%s__perlumiharvested__perlumi%d_%s_v1__DQMIO.root" % (fake_run(lumi, endlumi), run, treenames[metype])
0168         prefix = ["DQMData", "Run %s" % fake_run(lumi, endlumi)]
0169         # we open the file only on the first found ME, to avoid empty files.
0170         result_file = None
0171         subsystems = set()
0172 
0173         # inclusive range -- for 0 entries, row is left out
0174         firstidx, lastidx = idxtree.FirstIndex, idxtree.LastIndex
0175         metree = getattr(f, treenames[metype])
0176         # this GetEntry is only to make sure the TTree is initialized correctly
0177         metree.GetEntry(0)
0178         metree.SetBranchStatus("*",0)
0179         metree.SetBranchStatus("FullName",1)
0180 
0181         for x in range(firstidx, lastidx+1):
0182             metree.GetEntry(x)
0183             mename = str(metree.FullName)
0184             if check_interesting(mename):
0185                 metree.GetEntry(x, 1)
0186                 value = metree.Value
0187 
0188                 # navigate the TDirectory and save the thing again
0189                 if not result_file:
0190                     result_file = ROOT.TFile(filename, 'recreate')
0191                 path = mename.split("/")
0192                 filepath = prefix + [path[0], "Run summary"] + path[1:]
0193                 subsystems.add(path[0])
0194                 gotodir(result_file, filepath)
0195                 value.Write()
0196 
0197         # if we found a ME and wrote it to a file, finalize the file here.
0198         if result_file:
0199             # DQMGUI wants these to show them in the header bar. The folder name
0200             # in the TDirectory is also checked and has to match the filename,
0201             # but the  headerbar can show anything and uses these magic MEs.
0202             for subsys in subsystems:
0203                 # last item is considerd object name and ignored
0204                 gotodir(result_file, prefix + [subsys, "Run summary", "EventInfo", "blub"])
0205                 s = ROOT.TObjString("<iRun>i=%s</iRun>" % fake_run(lumi, endlumi))
0206                 s.Write()
0207                 s = ROOT.TObjString("<iLumiSection>i=%s</iLumiSection>" % run)
0208                 s.Write()
0209                 # we could also set iEvent and runStartTimeStamp if we had values.
0210             result_file.Close()
0211             files.append(filename)
0212 
0213     return files
0214 
0215 def uploadfile(filename):
0216     uploadcommand = ["visDQMUpload.py", uploadurl, filename]
0217     print("Uploading ... %s" % uploadcommand)
0218     subprocess.check_call(uploadcommand)
0219 
0220 pool = multiprocessing.Pool(processes=args.njobs)
0221 ctr = 0
0222 for outfiles in pool.imap_unordered(harvestfile, files):
0223 #for mes_to_store in map(harvestfile, files):
0224     if uploadurl:
0225         for f in outfiles:
0226             uploadfile(f)
0227             os.remove(f)
0228     ctr += 1
0229     print("Processed %d files of %d, got %d out files...\r" % (ctr, len(files), len(outfiles)),  end='')
0230 print("\nDone.")