File indexing completed on 2024-04-06 11:58:55
0001
0002
0003
0004
0005
0006
0007
0008 import uproot3
0009 import numpy as np
0010 import pandas as pd
0011 import matplotlib.pyplot as plt
0012 import argparse
0013 from mpl_toolkits.mplot3d import Axes3D
0014
0015 parser = argparse.ArgumentParser()
0016
0017 parser.add_argument("-PU", "--filePU",help="input PU file")
0018 parser.add_argument("-NPU", "--fileNPU",help="input no PU file")
0019 parser.add_argument("-O", "--opfilename",help="ouput file name")
0020 parser.add_argument("-s", "--start", help="start entry for input PU file")
0021 parser.add_argument("-e", "--end", help="end entry for input PU file")
0022
0023
0024 fName1 = parser.parse_args().filePU
0025 fName2 = parser.parse_args().fileNPU
0026 foutput = parser.parse_args().opfilename
0027 start = parser.parse_args().start
0028 stop = parser.parse_args().end
0029
0030
0031 tree1 = uproot3.open(fName1)['hcalIsoTrkAnalyzer/CalibTree']
0032
0033
0034 tree2 = uproot3.open(fName2)['hcalIsoTrkAnalyzer/CalibTree']
0035
0036
0037 print ("loaded files")
0038
0039 branchespu = ['t_Run','t_Event','t_nVtx','t_ieta','t_iphi','t_p','t_pt','t_gentrackP','t_eMipDR','t_eHcal','t_eHcal10','t_eHcal30','t_hmaxNearP','t_emaxNearP','t_hAnnular','t_eAnnular','t_rhoh','t_selectTk','t_qltyFlag']
0040
0041 branchesnpu = ['t_Run','t_Event','t_nVtx','t_ieta','t_iphi','t_p','t_pt','t_gentrackP','t_eMipDR','t_eHcal','t_eHcal10','t_eHcal30','t_hmaxNearP','t_emaxNearP','t_hAnnular','t_eAnnular','t_rhoh','t_selectTk','t_qltyFlag']
0042
0043 dictpu = tree1.arrays(branchespu, entrystart=int(start), entrystop=int(stop))
0044
0045 npu_entries = tree2.numentries
0046
0047 scale = 5000000
0048 npu_start = 0
0049 i = 0
0050
0051 for index in range(0,npu_entries, scale):
0052 npu_stop = index+scale
0053 if (npu_stop > npu_entries):
0054 npu_stop = npu_entries
0055 dictnpu = tree2.arrays(branchesnpu, entrystart=npu_start, entrystop=npu_stop)
0056 npu_start = npu_stop
0057 dfspu = pd.DataFrame.from_dict(dictpu)
0058 dfspu.columns=branchespu
0059 dfsnpu = pd.DataFrame.from_dict(dictnpu)
0060 dfsnpu.columns=branchesnpu
0061 print("loaded % of nopile file is =",(npu_stop/npu_entries)*100)
0062 print ("PU sample size:",dfspu.shape[0])
0063 print ("noPU sample size:",dfsnpu.shape[0])
0064
0065 cuts_pu = (dfspu['t_selectTk'])&(dfspu['t_qltyFlag'])&(dfspu['t_hmaxNearP']<20)&(dfspu['t_eMipDR']<1)&(abs(dfspu['t_p'] - 50)<10)&(dfspu['t_eHcal']>10)
0066
0067 cuts_npu = (dfsnpu['t_selectTk'])&(dfsnpu['t_qltyFlag'])&(dfsnpu['t_hmaxNearP']<20)&(dfsnpu['t_eMipDR']<1)&(abs(dfsnpu['t_p'] - 50)<10)&(dfsnpu['t_eHcal']>10)
0068
0069 dfspu = dfspu.loc[cuts_pu]
0070 dfspu = dfspu.reset_index(drop=True)
0071
0072 dfsnpu = dfsnpu.loc[cuts_npu]
0073 dfsnpu = dfsnpu.reset_index(drop=True)
0074 branches_skim = ['t_Event','t_ieta','t_iphi','t_p','t_eHcal','t_eHcal10','t_eHcal30']
0075 dfsnpu = dfsnpu[branches_skim]
0076
0077 merged = pd.merge(dfspu, dfsnpu , on=['t_Event','t_ieta','t_iphi'])
0078 print ("selected common events before cut:",merged.shape[0])
0079
0080 keepvars = ['t_nVtx','t_ieta','t_eHcal10_x','t_eHcal30_x','t_delta_x','t_eHcal10_y','t_eHcal30_y','t_delta_y','t_hmaxNearP','t_emaxNearP','t_hAnnular','t_eAnnular','t_rhoh','t_pt','t_eHcal_x','t_eHcal_y','t_p_x','t_p_y','t_eMipDR']
0081
0082 merged3 = merged
0083
0084 print ("selected events after cut:",merged3.shape[0])
0085 merged3['t_delta_x']=merged3['t_eHcal30_x']-merged3['t_eHcal10_x']
0086 merged3['t_delta_y']=merged3['t_eHcal30_y']-merged3['t_eHcal10_y']
0087
0088 final_df_hi = merged3[keepvars]
0089 final_df_hi.to_parquet(foutput+'_'+str(i)+"_"+start+"_"+stop+".parquet")
0090 final_df_hi.to_csv(foutput+'_'+str(i)+"_"+start+"_"+stop+".txt")
0091
0092 print(merged3['t_ieta'].dtype)
0093
0094 with uproot3.recreate(foutput+'_'+str(i)+"_"+start+"_"+stop+".root") as f:
0095
0096 f["tree"] = uproot3.newtree({"t_Event": np.int32,
0097 "t_p_PU": np.float64,
0098 "t_eHcal_PU":np.float64,
0099 "t_delta_PU":np.float64,
0100 "t_p_NoPU": np.float64,
0101 "t_eHcal_noPU":np.float64,
0102 "t_delta_NoPU":np.float64,
0103 "t_ieta":np.int32})
0104
0105
0106 f["tree"].extend({"t_Event": merged3['t_Event'],
0107 "t_p_PU": merged3['t_p_x'].to_numpy(),
0108 "t_eHcal_PU": merged3['t_eHcal_x'].to_numpy(),
0109 "t_delta_PU": merged3['t_delta_x'].to_numpy(),
0110 "t_p_NoPU": merged3['t_p_y'].to_numpy(),
0111 "t_eHcal_noPU": merged3['t_eHcal_y'].to_numpy(),
0112 "t_delta_NoPU": merged3['t_delta_y'].to_numpy(),
0113 "t_ieta": merged3['t_ieta'].to_numpy()})
0114 i += 1