File indexing completed on 2024-04-06 11:58:55
0001
0002
0003
0004
0005
0006
0007
0008 import uproot3
0009 import numpy as np
0010 import pandas as pd
0011 import argparse
0012 import matplotlib.pyplot as plt
0013
0014 parser = argparse.ArgumentParser()
0015
0016 parser.add_argument("-PU", "--filePU", help="input PU file")
0017 parser.add_argument("-NPU", "--fileNPU", help="input no PU file")
0018 parser.add_argument("-O", "--opfilename", help="ouput file name")
0019 parser.add_argument("-s", "--start", help="start entry for input PU file")
0020 parser.add_argument("-e", "--end", help="end entry for input PU file")
0021
0022 fName1 = parser.parse_args().filePU
0023 fName2 = parser.parse_args().fileNPU
0024 foutput = parser.parse_args().opfilename
0025 start = parser.parse_args().start
0026 stop = parser.parse_args().end
0027
0028
0029 tree1 = uproot3.open(fName1)['hcalIsoTrkAnalyzer/CalibTree']
0030
0031
0032 tree2 = uproot3.open(fName2)['hcalIsoTrkAnalyzer/CalibTree']
0033
0034 print ("loaded files")
0035
0036 branchespu = ['t_Run','t_Event','t_nVtx','t_ieta','t_iphi','t_p','t_pt','t_gentrackP','t_eMipDR','t_eHcal','t_eHcal10','t_eHcal30','t_hmaxNearP','t_emaxNearP','t_hAnnular','t_eAnnular','t_rhoh','t_selectTk','t_qltyFlag']
0037
0038
0039
0040 branchesnpu =['t_Run','t_Event','t_nVtx','t_ieta','t_iphi','t_p','t_pt','t_gentrackP','t_eMipDR','t_eHcal','t_eHcal10','t_eHcal30','t_hmaxNearP','t_emaxNearP','t_hAnnular','t_eAnnular','t_rhoh','t_selectTk','t_qltyFlag']
0041
0042 dictpu = tree1.arrays(branchespu, entrystart=int(start), entrystop=int(stop))
0043
0044 npu_entries = tree2.numentries
0045
0046 scale = 5000000
0047 npu_start = 0
0048 i = 0
0049
0050 for index in range(0,npu_entries, scale):
0051 npu_stop = index+scale
0052 if (npu_stop > npu_entries):
0053 npu_stop = npu_entries
0054 dictnpu = tree2.arrays(branchesnpu, entrystart=npu_start, entrystop=npu_stop)
0055 npu_start = npu_stop
0056
0057 dfspu = pd.DataFrame.from_dict(dictpu)
0058 dfspu.columns=branchespu
0059 dfsnpu = pd.DataFrame.from_dict(dictnpu)
0060 dfsnpu.columns=branchesnpu
0061 print("loaded % of nopile file is =",(npu_stop/npu_entries)*100)
0062 print ("PU sample size:",dfspu.shape[0])
0063 print ("noPU sample size:",dfsnpu.shape[0])
0064
0065 cuts_pu = (dfspu['t_selectTk'])&(dfspu['t_qltyFlag'])&(dfspu['t_hmaxNearP']<20)&(dfspu['t_eMipDR']<1)&(abs(dfspu['t_p'] - 50)<10)&(dfspu['t_eHcal']>10)
0066
0067 cuts_npu = (dfsnpu['t_selectTk'])&(dfsnpu['t_qltyFlag'])&(dfsnpu['t_hmaxNearP']<20)&(dfsnpu['t_eMipDR']<1)&(abs(dfsnpu['t_p'] - 50)<10)&(dfsnpu['t_eHcal']>10)
0068
0069 dfspu = dfspu.loc[cuts_pu]
0070 dfspu = dfspu.reset_index(drop=True)
0071
0072 dfsnpu = dfsnpu.loc[cuts_npu]
0073 dfsnpu = dfsnpu.reset_index(drop=True)
0074 branches_skim = ['t_Event','t_ieta','t_iphi','t_eHcal']
0075 dfsnpu = dfsnpu[branches_skim]
0076
0077 merged = pd.merge(dfspu, dfsnpu , on=['t_Event','t_ieta','t_iphi'])
0078 print(merged.keys())
0079 print ("selected common events before cut:",merged.shape[0])
0080
0081
0082 keepvars = ['t_nVtx','t_ieta','t_eHcal10','t_eHcal30','t_delta','t_hmaxNearP','t_emaxNearP','t_hAnnular','t_eAnnular','t_rhoh','t_pt','t_eHcal_x','t_eHcal_y','t_p','t_eMipDR']
0083
0084
0085
0086
0087
0088
0089 merged1=merged
0090
0091
0092 print ("selected events after cut for all ietas:",merged1.shape[0])
0093 merged1['t_delta']=merged1['t_eHcal30']-merged1['t_eHcal10']
0094 final_df_all = merged1[keepvars]
0095 output_file = foutput+'_'+str(i)+"_"+start+"_"+stop+"_all.parquet"
0096 final_df_all.to_parquet(output_file)
0097 final_df_all.to_csv(foutput+"_"+str(i)+"_"+start+"_"+stop+"_all.txt")
0098
0099
0100
0101 cuts2 = abs(merged['t_ieta'])<16
0102 merged2=merged.loc[cuts2]
0103 merged2 = merged2.reset_index(drop=True)
0104 print ("selected events after cut for ieta < 16:",merged2.shape[0])
0105
0106 merged2['t_delta']=merged2['t_eHcal30']-merged2['t_eHcal10']
0107 final_df_low = merged2[keepvars]
0108 final_df_low.to_parquet(foutput+'_'+str(i)+"_"+start+"_"+stop+"_lo.parquet")
0109 final_df_low.to_csv(foutput+'_'+str(i)+"_"+start+"_"+stop+"_lo.txt")
0110
0111
0112
0113 cuts3 = abs(merged['t_ieta'])>15
0114 merged3=merged.loc[cuts3]
0115 merged3 = merged3.reset_index(drop=True)
0116 print ("selected events after cut for ieta > 15:",merged3.shape[0])
0117
0118 merged3['t_delta']=merged3['t_eHcal30']-merged3['t_eHcal10']
0119 final_df_hi = merged3[keepvars]
0120 final_df_hi.to_parquet(foutput+'_'+str(i)+"_"+start+"_"+stop+"_hi.parquet")
0121 final_df_hi.to_csv(foutput+'_'+str(i)+"_"+start+"_"+stop+"_hi.txt")
0122 i+=1