File indexing completed on 2024-11-26 02:34:12
0001
0002 import os
0003 import json
0004 import ROOT
0005 import fnmatch
0006 import argparse
0007 import subprocess
0008 import multiprocessing
0009 from collections import defaultdict
0010
0011
0012 ROOTPREFIX = "root://cms-xrd-global.cern.ch/"
0013
0014
0015 parser = argparse.ArgumentParser(description="Collect MEs for given lumisections from DQMIO data and upload to a DQMGUI. " +
0016 "The from-to lumi range will be shown in an artificial run number of form 1xxxxyyyy, while the run number goes into the lumi number field.")
0017
0018 parser.add_argument('dataset', help='dataset name, like "/StreamHIExpress/HIRun2018A-Express-v1/DQMIO"')
0019 parser.add_argument('-r', '--run', help='Run number of run to process', default=None, type=int)
0020 parser.add_argument('-l', '--lumis', help='JSON file with runs/lumisecitons to process (golden JSON format)', default=None)
0021 parser.add_argument('-u', '--upload', help='Upload files to this GUI, instead of just creating them. Delete files after upload.', default=None)
0022 parser.add_argument('-j', '--njobs', help='Number of threads to read files', type=int, default=1)
0023 parser.add_argument('-m', '--me', help='Glob pattern of MEs to load.', default=[], action='append')
0024 parser.add_argument('--limit', help='Only load up to LIMIT files', type=int, default=-1)
0025 parser.add_argument('--perlumionly', help='Only save MEs that cover exactly one lumisection, and use simplified "run" numbers (10xxxx)', action='store_true')
0026 args = parser.parse_args()
0027
0028
0029
0030 interesting_types = {
0031 "TH2Fs",
0032 "TH1Fs",
0033
0034
0035
0036
0037
0038 }
0039
0040 interesting_mes = args.me
0041 if not interesting_mes:
0042 print("No --me patterns given. This is fine, but output *will* be empty.")
0043
0044 if args.upload and "https:" in args.upload:
0045 print("Refuing to upload to production servers, only http upload to local servers allowed.")
0046 uploadurl = None
0047 else:
0048 uploadurl = args.upload
0049
0050 def dasquery(dataset):
0051 if not dataset.endswith("DQMIO"):
0052 raise Exception("This tool probably cannot read the dataset you specified. The name should end with DQMIO.")
0053 dasquery = ["dasgoclient", "-query=file dataset=%s" % dataset]
0054 print("Querying das ... %s" % dasquery)
0055 files = subprocess.check_output(dasquery)
0056 files = files.splitlines()
0057 print("Got %d files." % len(files))
0058 return files
0059
0060 files = dasquery(args.dataset)
0061 if args.limit > 0: files = files[:args.limit]
0062
0063 if args.lumis:
0064 with open(args.lumis) as f:
0065 j = json.load(f)
0066 lumiranges = {int(run): lumis for run, lumis in j.iteritems()}
0067 else:
0068 if args.run:
0069
0070 lumiranges = {args.run : []}
0071 else:
0072
0073 lumiranges = {}
0074
0075 if args.perlumionly:
0076 perlumionly = True
0077 def fake_run(lumi, endlumi):
0078 return "1%05d" % (lumi)
0079 else:
0080 perlumionly = False
0081 def fake_run(lumi, endlumi):
0082 return "1%04d%04d" % (lumi, endlumi)
0083
0084
0085 treenames = {
0086 0: "Ints",
0087 1: "Floats",
0088 2: "Strings",
0089 3: "TH1Fs",
0090 4: "TH1Ss",
0091 5: "TH1Ds",
0092 6: "TH2Fs",
0093 7: "TH2Ss",
0094 8: "TH2Ds",
0095 9: "TH3Fs",
0096 10: "TProfiles",
0097 11: "TProfile2Ds",
0098 }
0099
0100 def check_interesting(mename):
0101 for pattern in interesting_mes:
0102 if fnmatch.fnmatch(mename, pattern):
0103 return True
0104
0105 def rangecheck(run, lumi):
0106 if not lumiranges: return True
0107 if run not in lumiranges: return False
0108 lumis = lumiranges[run]
0109 if not lumis: return True
0110 for start, end in lumis:
0111 if lumi >= start and lumi <= end:
0112 return True
0113 return False
0114
0115 def create_dir(parent_dir, name):
0116 dir = parent_dir.Get(name)
0117 if not dir:
0118 dir = parent_dir.mkdir(name)
0119 return dir
0120
0121 def gotodir(base, path):
0122 current = base
0123 for directory in path[:-1]:
0124 current = create_dir(current, directory)
0125 current.cd()
0126
0127
0128 def harvestfile(fname):
0129 f = ROOT.TFile.Open(ROOTPREFIX + fname)
0130 idxtree = getattr(f, "Indices")
0131
0132
0133
0134
0135
0136
0137 knownlumis = set()
0138 files = []
0139
0140 for i in range(idxtree.GetEntries()):
0141 idxtree.GetEntry(i)
0142 run, lumi, metype = idxtree.Run, idxtree.Lumi, idxtree.Type
0143 if lumi != 0:
0144 knownlumis.add(lumi)
0145
0146 if not treenames[metype] in interesting_types:
0147 continue
0148
0149
0150 endrun = run
0151 if lumi == 0:
0152 endlumi = max(knownlumis)
0153 lumi = min(knownlumis)
0154 else:
0155 endlumi = lumi
0156
0157 if not (rangecheck(run, lumi) or rangecheck(endrun, endlumi)):
0158 continue
0159 if perlumionly and lumi != endlumi:
0160 continue
0161
0162
0163
0164
0165
0166
0167 filename = "DQM_V0001_R%s__perlumiharvested__perlumi%d_%s_v1__DQMIO.root" % (fake_run(lumi, endlumi), run, treenames[metype])
0168 prefix = ["DQMData", "Run %s" % fake_run(lumi, endlumi)]
0169
0170 result_file = None
0171 subsystems = set()
0172
0173
0174 firstidx, lastidx = idxtree.FirstIndex, idxtree.LastIndex
0175 metree = getattr(f, treenames[metype])
0176
0177 metree.GetEntry(0)
0178 metree.SetBranchStatus("*",0)
0179 metree.SetBranchStatus("FullName",1)
0180
0181 for x in range(firstidx, lastidx+1):
0182 metree.GetEntry(x)
0183 mename = str(metree.FullName)
0184 if check_interesting(mename):
0185 metree.GetEntry(x, 1)
0186 value = metree.Value
0187
0188
0189 if not result_file:
0190 result_file = ROOT.TFile(filename, 'recreate')
0191 path = mename.split("/")
0192 filepath = prefix + [path[0], "Run summary"] + path[1:]
0193 subsystems.add(path[0])
0194 gotodir(result_file, filepath)
0195 value.Write()
0196
0197
0198 if result_file:
0199
0200
0201
0202 for subsys in subsystems:
0203
0204 gotodir(result_file, prefix + [subsys, "Run summary", "EventInfo", "blub"])
0205 s = ROOT.TObjString("<iRun>i=%s</iRun>" % fake_run(lumi, endlumi))
0206 s.Write()
0207 s = ROOT.TObjString("<iLumiSection>i=%s</iLumiSection>" % run)
0208 s.Write()
0209
0210 result_file.Close()
0211 files.append(filename)
0212
0213 return files
0214
0215 def uploadfile(filename):
0216 uploadcommand = ["visDQMUpload.py", uploadurl, filename]
0217 print("Uploading ... %s" % uploadcommand)
0218 subprocess.check_call(uploadcommand)
0219
0220 pool = multiprocessing.Pool(processes=args.njobs)
0221 ctr = 0
0222 for outfiles in pool.imap_unordered(harvestfile, files):
0223
0224 if uploadurl:
0225 for f in outfiles:
0226 uploadfile(f)
0227 os.remove(f)
0228 ctr += 1
0229 print("Processed %d files of %d, got %d out files...\r" % (ctr, len(files), len(outfiles)), end='')
0230 print("\nDone.")