File indexing completed on 2023-03-17 10:58:57
0001
0002 from __future__ import print_function
0003 import os
0004 import json
0005 import ROOT
0006 import fnmatch
0007 import argparse
0008 import subprocess
0009 import multiprocessing
0010 from collections import defaultdict
0011
0012
0013 ROOTPREFIX = "root://cms-xrd-global.cern.ch/"
0014
0015
0016 parser = argparse.ArgumentParser(description="Collect MEs for given lumisections from DQMIO data and upload to a DQMGUI. " +
0017 "The from-to lumi range will be shown in an artificial run number of form 1xxxxyyyy, while the run number goes into the lumi number field.")
0018
0019 parser.add_argument('dataset', help='dataset name, like "/StreamHIExpress/HIRun2018A-Express-v1/DQMIO"')
0020 parser.add_argument('-r', '--run', help='Run number of run to process', default=None, type=int)
0021 parser.add_argument('-l', '--lumis', help='JSON file with runs/lumisecitons to process (golden JSON format)', default=None)
0022 parser.add_argument('-u', '--upload', help='Upload files to this GUI, instead of just creating them. Delete files after upload.', default=None)
0023 parser.add_argument('-j', '--njobs', help='Number of threads to read files', type=int, default=1)
0024 parser.add_argument('-m', '--me', help='Glob pattern of MEs to load.', default=[], action='append')
0025 parser.add_argument('--limit', help='Only load up to LIMIT files', type=int, default=-1)
0026 parser.add_argument('--perlumionly', help='Only save MEs that cover exactly one lumisection, and use simplified "run" numbers (10xxxx)', action='store_true')
0027 args = parser.parse_args()
0028
0029
0030
0031 interesting_types = {
0032 "TH2Fs",
0033 "TH1Fs",
0034
0035
0036
0037
0038
0039 }
0040
0041 interesting_mes = args.me
0042 if not interesting_mes:
0043 print("No --me patterns given. This is fine, but output *will* be empty.")
0044
0045 if args.upload and "https:" in args.upload:
0046 print("Refuing to upload to production servers, only http upload to local servers allowed.")
0047 uploadurl = None
0048 else:
0049 uploadurl = args.upload
0050
0051 def dasquery(dataset):
0052 if not dataset.endswith("DQMIO"):
0053 raise Exception("This tool probably cannot read the dataset you specified. The name should end with DQMIO.")
0054 dasquery = ["dasgoclient", "-query=file dataset=%s" % dataset]
0055 print("Querying das ... %s" % dasquery)
0056 files = subprocess.check_output(dasquery)
0057 files = files.splitlines()
0058 print("Got %d files." % len(files))
0059 return files
0060
0061 files = dasquery(args.dataset)
0062 if args.limit > 0: files = files[:args.limit]
0063
0064 if args.lumis:
0065 with open(args.lumis) as f:
0066 j = json.load(f)
0067 lumiranges = {int(run): lumis for run, lumis in j.iteritems()}
0068 else:
0069 if args.run:
0070
0071 lumiranges = {args.run : []}
0072 else:
0073
0074 lumiranges = {}
0075
0076 if args.perlumionly:
0077 perlumionly = True
0078 def fake_run(lumi, endlumi):
0079 return "1%05d" % (lumi)
0080 else:
0081 perlumionly = False
0082 def fake_run(lumi, endlumi):
0083 return "1%04d%04d" % (lumi, endlumi)
0084
0085
0086 treenames = {
0087 0: "Ints",
0088 1: "Floats",
0089 2: "Strings",
0090 3: "TH1Fs",
0091 4: "TH1Ss",
0092 5: "TH1Ds",
0093 6: "TH2Fs",
0094 7: "TH2Ss",
0095 8: "TH2Ds",
0096 9: "TH3Fs",
0097 10: "TProfiles",
0098 11: "TProfile2Ds",
0099 }
0100
0101 def check_interesting(mename):
0102 for pattern in interesting_mes:
0103 if fnmatch.fnmatch(mename, pattern):
0104 return True
0105
0106 def rangecheck(run, lumi):
0107 if not lumiranges: return True
0108 if run not in lumiranges: return False
0109 lumis = lumiranges[run]
0110 if not lumis: return True
0111 for start, end in lumis:
0112 if lumi >= start and lumi <= end:
0113 return True
0114 return False
0115
0116 def create_dir(parent_dir, name):
0117 dir = parent_dir.Get(name)
0118 if not dir:
0119 dir = parent_dir.mkdir(name)
0120 return dir
0121
0122 def gotodir(base, path):
0123 current = base
0124 for directory in path[:-1]:
0125 current = create_dir(current, directory)
0126 current.cd()
0127
0128
0129 def harvestfile(fname):
0130 f = ROOT.TFile.Open(ROOTPREFIX + fname)
0131 idxtree = getattr(f, "Indices")
0132
0133
0134
0135
0136
0137
0138 knownlumis = set()
0139 files = []
0140
0141 for i in range(idxtree.GetEntries()):
0142 idxtree.GetEntry(i)
0143 run, lumi, metype = idxtree.Run, idxtree.Lumi, idxtree.Type
0144 if lumi != 0:
0145 knownlumis.add(lumi)
0146
0147 if not treenames[metype] in interesting_types:
0148 continue
0149
0150
0151 endrun = run
0152 if lumi == 0:
0153 endlumi = max(knownlumis)
0154 lumi = min(knownlumis)
0155 else:
0156 endlumi = lumi
0157
0158 if not (rangecheck(run, lumi) or rangecheck(endrun, endlumi)):
0159 continue
0160 if perlumionly and lumi != endlumi:
0161 continue
0162
0163
0164
0165
0166
0167
0168 filename = "DQM_V0001_R%s__perlumiharvested__perlumi%d_%s_v1__DQMIO.root" % (fake_run(lumi, endlumi), run, treenames[metype])
0169 prefix = ["DQMData", "Run %s" % fake_run(lumi, endlumi)]
0170
0171 result_file = None
0172 subsystems = set()
0173
0174
0175 firstidx, lastidx = idxtree.FirstIndex, idxtree.LastIndex
0176 metree = getattr(f, treenames[metype])
0177
0178 metree.GetEntry(0)
0179 metree.SetBranchStatus("*",0)
0180 metree.SetBranchStatus("FullName",1)
0181
0182 for x in range(firstidx, lastidx+1):
0183 metree.GetEntry(x)
0184 mename = str(metree.FullName)
0185 if check_interesting(mename):
0186 metree.GetEntry(x, 1)
0187 value = metree.Value
0188
0189
0190 if not result_file:
0191 result_file = ROOT.TFile(filename, 'recreate')
0192 path = mename.split("/")
0193 filepath = prefix + [path[0], "Run summary"] + path[1:]
0194 subsystems.add(path[0])
0195 gotodir(result_file, filepath)
0196 value.Write()
0197
0198
0199 if result_file:
0200
0201
0202
0203 for subsys in subsystems:
0204
0205 gotodir(result_file, prefix + [subsys, "Run summary", "EventInfo", "blub"])
0206 s = ROOT.TObjString("<iRun>i=%s</iRun>" % fake_run(lumi, endlumi))
0207 s.Write()
0208 s = ROOT.TObjString("<iLumiSection>i=%s</iLumiSection>" % run)
0209 s.Write()
0210
0211 result_file.Close()
0212 files.append(filename)
0213
0214 return files
0215
0216 def uploadfile(filename):
0217 uploadcommand = ["visDQMUpload.py", uploadurl, filename]
0218 print("Uploading ... %s" % uploadcommand)
0219 subprocess.check_call(uploadcommand)
0220
0221 pool = multiprocessing.Pool(processes=args.njobs)
0222 ctr = 0
0223 for outfiles in pool.imap_unordered(harvestfile, files):
0224
0225 if uploadurl:
0226 for f in outfiles:
0227 uploadfile(f)
0228 os.remove(f)
0229 ctr += 1
0230 print("Processed %d files of %d, got %d out files...\r" % (ctr, len(files), len(outfiles)), end='')
0231 print("\nDone.")