LumiDB/scripts/makePileupJSON.py

0001 #!/usr/bin/env python3
0002 import argparse
0003 import RecoLuminosity.LumiDB.LumiConstants as LumiConstants
0004 import re
0005 from math import sqrt
0006
0007 ##############################
0008 ## ######################## ##
0009 ## ## ################## ## ##
0010 ## ## ## Main Program ## ## ##
0011 ## ## ################## ## ##
0012 ## ######################## ##
0013 ##############################
0014
0015 # This script takes a .csv file containing the per-BX luminosity values from brilcalc and processes them to
0016 # produce an output file containing the average and RMS pileup for each lumi section. This can then be fed
0017 # into pileupCalc.py to calculate a final pileup histogram. For more documentation see:
0018 # https://twiki.cern.ch/twiki/bin/viewauth/CMS/PileupJSONFileforData
0019
0020 # modified from the estimatePileup.py script in RecoLuminosity/LumiDB
0021 # originally 5 Jan, 2012  Mike Hildreth
0022
0023 if __name__ == '__main__':
0024     parameters = LumiConstants.ParametersObject()
0025
0026     parser = argparse.ArgumentParser(description="Script to estimate average and RMS pileup using the per-bunch luminosity information provided by brilcalc. The output is a JSON file containing a dictionary by runs with one entry per lumi section.")
0027     parser.add_argument('inputFile', help='CSV input file as produced from brilcalc')
0028     parser.add_argument('outputFile', help='Name of JSON output file')
0029     parser.add_argument('-b', '--selBX', metavar='BXLIST', help='Comma-separated list of BXs to use (will use all by default)')
0030     parser.add_argument('-n', '--no-threshold', action='store_true', help='By default, to avoid including spurious luminosity from afterglow in the pileup calculation, bunches with luminosity below a given threshold are excluded. This threshold is 8.0/ub/LS for HFOC at nominal energy, 2.0/ub/LS for HFOC at low energy, and 1.2/ub/LS for other luminometers. If the input data has already been preprocessed (e.g. by using the --xingTr argument in brilcalc) to exclude these bunches, or if you are running on special data with low overall luminosity, then use this flag to disable the application of the threshold.')
0031     args = parser.parse_args()
0032
0033     output = args.outputFile
0034
0035     sel_bx = set()
0036     if args.selBX:
0037         for ibx in args.selBX.split(","):
0038             try:
0039                 bx = int(ibx)
0040                 sel_bx.add(bx)
0041             except:
0042                 print(ibx,"is not an int")
0043         print("Processing",args.inputFile,"with selected BXs:", sorted(sel_bx))
0044     else:
0045         print("Processing",args.inputFile,"with all BX")
0046
0047     # The "CSV" file actually is a little complicated, since we also want to split on the colons separating
0048     # run/fill as well as the spaces separating the per-BX information.
0049     sepRE = re.compile(r'[\]\[\s,;:]+')
0050     csv_input = open(args.inputFile, 'r')
0051     last_run = -1
0052
0053     last_valid_lumi = []
0054
0055     output_line = '{'
0056     for line in csv_input:
0057         if line[0] == '#':
0058             continue # skip comment lines
0059
0060         pieces = sepRE.split(line.strip())
0061
0062         if len(pieces) < 15:
0063             # The most likely cause of this is that we're using a csv file without the bunch data, so might as well
0064             # just give up now.
0065             raise RuntimeError("Not enough fields in input line; maybe you forgot to include --xing in your brilcalc command?\n"+line)
0066         try:
0067             run = int(pieces[0])
0068             lumi_section = int(pieces[2])
0069             beam_energy = float(pieces[10])
0070             #tot_del_lumi = float(pieces[11])
0071             #tot_rec_lumi = float(pieces[12])
0072             luminometer = pieces[14]
0073
0074             if luminometer == "HFOC":
0075                 if beam_energy > 3000:
0076                     # Use higher threshold for nominal runs otherwise we'll get a lot of junk.
0077                     threshold = 8.0
0078                 else:
0079                     # Use lower threshold for 5.02 GeV runs since the overall lumi is quite low.
0080                     threshold = 2.0
0081             else:
0082                 threshold = 1.2
0083
0084             xing_lumi_array = []
0085             for bxid, bunch_del_lumi, bunch_rec_lumi in zip(pieces[15::3], pieces[16::3], pieces[17::3]):
0086                 if sel_bx and int(bxid) not in sel_bx:
0087                     continue
0088                 if args.no_threshold or float(bunch_del_lumi) > threshold:
0089                     xing_lumi_array.append([int(bxid), float(bunch_del_lumi), float(bunch_rec_lumi)])
0090         except:
0091             print("Failed to parse line: check if the input format has changed")
0092             print(pieces[0],pieces[1],pieces[2],pieces[3],pieces[4],pieces[5],pieces[6],pieces[7],pieces[8],pieces[9])
0093             continue
0094
0095         # In principle we could also have a check for if len(pieces) == 15 (i.e. no BX data is present) but
0096         # luminosity is present, which implies we're using a luminometer without BX data. In this case we
0097         # could just extrapolate from the previous good LS (just scaling the pileup by the ratio of
0098         # luminosity). In practice this is an extremely small number of lumi sections, and in 2018 the 14
0099         # lumisections in normtag_PHYSICS without BX data (all RAMSES) are all in periods with zero recorded
0100         # lumi, so they don't affect the resulting pileup at all. So for run 2 I haven't bothered to implement
0101         # this.
0102
0103         if run != last_run:
0104             # the script also used to add a dummy LS at the end of runs but this is not necessary in run 2
0105             if last_run > 0:
0106                 output_line = output_line[:-1] + '], '
0107             last_run = run
0108             output_line += ('\n"%d":' % run )
0109             output_line += ' ['
0110
0111         # Now do the actual parsing.
0112         total_lumi = 0
0113         total_int = 0
0114         total_int2 = 0
0115         total_weight2 = 0
0116
0117         # first loop to get sum for (weighted) mean
0118         for bxid, bunch_del_lumi, bunch_rec_lumi in xing_lumi_array:
0119             total_lumi += bunch_rec_lumi
0120             # this will eventually be_pileup*bunch_rec_lumi but it's quicker to apply the factor once outside the loop
0121             total_int += bunch_del_lumi*bunch_rec_lumi
0122         # filled_xings = len(xing_lumi_array)
0123
0124         # convert sum to pileup and get the mean
0125         total_int *= parameters.orbitLength / parameters.lumiSectionLength
0126         if total_lumi > 0:
0127             mean_int = total_int/total_lumi
0128         else:
0129             mean_int = 0
0130
0131         # second loop to get (weighted) RMS
0132         for bxid, bunch_del_lumi, bunch_rec_lumi in xing_lumi_array:
0133             mean_pileup = bunch_del_lumi * parameters.orbitLength / parameters.lumiSectionLength
0134             if mean_pileup > 100:
0135                 print("mean number of pileup events > 100 for run %d, lum %d : m %f l %f" % \
0136                       (run, lumi_section, mean_pileup, bunch_del_lumi))
0137                 #print "mean number of pileup events for lum %d: m %f idx %d l %f" % (lumi_section, mean_pileup, bxid, bunch_rec_lumi)
0138
0139             total_int2 += bunch_rec_lumi*(mean_pileup-mean_int)*(mean_pileup-mean_int)
0140             total_weight2 += bunch_rec_lumi*bunch_rec_lumi
0141
0142         # compute final RMS and write it out
0143         #print " LS, Total lumi, filled xings %d, %f, %d" %(lumi_section,total_lumi,filled_xings)
0144         bunch_rms_lumi = 0
0145         denom = total_lumi*total_lumi-total_weight2
0146         if total_lumi > 0 and denom > 0:
0147             bunch_rms_lumi = sqrt(total_lumi*total_int2/denom)
0148
0149         output_line += "[%d,%2.4e,%2.4e,%2.4e]," % (lumi_section, total_lumi, bunch_rms_lumi, mean_int)
0150         last_valid_lumi = [lumi_section, total_lumi, bunch_rms_lumi, mean_int]
0151
0152     output_line = output_line[:-1] + ']}'
0153     csv_input.close()
0154
0155     outputfile = open(output,'w')
0156     if not outputfile:
0157         raise RuntimeError("Could not open '%s' as an output JSON file" % output)
0158
0159     outputfile.write(output_line)
0160     outputfile.close()
0161     print("Output written to", output)