Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2021-02-14 12:48:16

0001 ######################################################################################
0002 # Makes pkl, root and text files comparing PU and noPU samples for training regressor and other stuff
0003 # Usage:
0004 # python3 isotrackRootTreeMaker.py -PU root://cmseos.fnal.gov//store/user/sghosh/ISOTRACK/DIPI_2021_PUpart.root -NPU root://cmseos.fnal.gov//store/user/sghosh/ISOTRACK/DIPI_2021_noPU.root -O isotrackRelval 
0005 ######################################################################################
0006 
0007 import uproot
0008 import numpy as np
0009 import pandas as pd
0010 import matplotlib.pyplot as plt
0011 import argparse
0012 from mpl_toolkits.mplot3d import Axes3D
0013 
0014 parser = argparse.ArgumentParser()
0015 parser.add_argument("-PU", "--filePU",help="input PU file",default="2021PU.root")
0016 parser.add_argument("-NPU", "--fileNPU",help="input no PU file",default="2021noPU.root")
0017 parser.add_argument("-O", "--opfilename",help="ouput file name",default="isotk_relval")
0018 
0019 fName1 = parser.parse_args().filePU
0020 fName2 = parser.parse_args().fileNPU
0021 foutput = parser.parse_args().opfilename
0022 
0023 # PU
0024 tree1 = uproot.open(fName1,xrootdsource=dict(chunkbytes=1024**3, limitbytes=1024**3))['HcalIsoTrkAnalyzer/CalibTree']
0025 
0026 #no PU
0027 tree2 = uproot.open(fName2,xrootdsource=dict(chunkbytes=1024**3, limitbytes=1024**3))['HcalIsoTrkAnalyzer/CalibTree']
0028 
0029 #tree2.keys()
0030 print ("loaded files")
0031 branchespu = ['t_Run','t_Event','t_nVtx','t_ieta','t_iphi','t_p','t_pt','t_gentrackP','t_eMipDR','t_eHcal','t_eHcal10','t_eHcal30','t_hmaxNearP','t_emaxNearP','t_hAnnular','t_eAnnular','t_rhoh','t_selectTk','t_qltyFlag']
0032 branchesnpu = ['t_Event','t_ieta','t_iphi','t_p','t_eHcal','t_eHcal10','t_eHcal30']
0033 #dictn = tree.arrays(branches=branches,entrystart=0, entrystop=300)
0034 dictpu = tree1.arrays(branches=branchespu)
0035 dictnpu = tree2.arrays(branches=branchesnpu)
0036 dfspu = pd.DataFrame.from_dict(dictpu)
0037 dfspu.columns=branchespu
0038 dfsnpu = pd.DataFrame.from_dict(dictnpu)
0039 dfsnpu.columns=branchesnpu
0040 print ("loaded dicts and dfs")
0041 print ("PU sample size:",dfspu.shape[0])
0042 print ("noPU sample size:",dfsnpu.shape[0])
0043 #dfspu
0044 merged = pd.merge(dfspu, dfsnpu , on=['t_Event','t_ieta','t_iphi'])
0045 print ("selected common events before cut:",merged.shape[0])
0046 #print(merged)
0047 keepvars =  ['t_nVtx','t_ieta','t_eHcal10_x','t_eHcal30_x','t_delta_x','t_eHcal10_y','t_eHcal30_y','t_delta_y','t_hmaxNearP','t_emaxNearP','t_hAnnular','t_eAnnular','t_rhoh','t_pt','t_eHcal_x','t_eHcal_y','t_p_x','t_p_y','t_eMipDR']
0048 
0049 '''
0050 #########################all ietas
0051 cuts1 = (merged['t_selectTk'])&(merged['t_qltyFlag'])&(merged['t_hmaxNearP']<20)&(merged['t_eMipDR']<1)&(abs(merged['t_p'] - 50)<10)&(merged['t_eHcal_x']>10)
0052 merged1=merged.loc[cuts1]
0053 merged1 = merged1.reset_index(drop=True)
0054 print ("selected events after cut for all ietas:",merged1.shape[0])
0055 merged1['t_delta']=merged1['t_eHcal30']-merged1['t_eHcal10']
0056 final_df_all = merged1[keepvars]
0057 #final_dfnp = final_df.values
0058 #np.save('isotk_relval_all.npy',final_df_all.values)
0059 #np.save('isotk_relval_all.npy',final_df_all)
0060 final_df_all.to_pickle(foutput+"_all.pkl")
0061 final_df_all.to_csv(foutput+"_all.txt")
0062 #########################split ieta < 16
0063 
0064 cuts2 = (merged['t_selectTk'])&(merged['t_qltyFlag'])&(merged['t_hmaxNearP']<20)&(merged['t_eMipDR']<1)&(abs(merged['t_ieta'])<16)&(abs(merged['t_p'] - 50)<10)&(merged['t_eHcal_x']>10)
0065 merged2=merged.loc[cuts2]
0066 merged2 = merged2.reset_index(drop=True)
0067 print ("selected events after cut for ieta < 16:",merged2.shape[0])
0068 merged2['t_delta']=merged2['t_eHcal30']-merged2['t_eHcal10']
0069 final_df_low = merged2[keepvars]
0070 #final_dfnp = final_df.values
0071 #np.save('isotk_relval_lo.npy',final_df_low.values)
0072 #np.save('isotk_relval_lo.npy',final_df_low)
0073 final_df_low.to_pickle(foutput+"_lo.pkl")
0074 final_df_low.to_csv(foutput+"_lo.txt")
0075 '''
0076 
0077 #########################split ieta > 24
0078 
0079 cuts3 = (merged['t_selectTk'])&(merged['t_qltyFlag'])&(merged['t_hmaxNearP']<20)&(merged['t_eMipDR']<1)&(abs(merged['t_p_x'] - 50)<10)&(merged['t_eHcal_x']>10)
0080 merged3=merged.loc[cuts3]
0081 merged3 = merged3.reset_index(drop=True)
0082 print ("selected events after cut for ieta > 24:",merged3.shape[0])
0083 merged3['t_delta_x']=merged3['t_eHcal30_x']-merged3['t_eHcal10_x']
0084 merged3['t_delta_y']=merged3['t_eHcal30_y']-merged3['t_eHcal10_y']
0085 
0086 final_df_hi = merged3[keepvars]
0087 final_df_hi.to_pickle(foutput+"_hi.pkl")
0088 final_df_hi.to_csv(foutput+"_hi.txt")
0089 
0090 '''
0091 threedee = plt.figure().gca(projection='3d')
0092 threedee.scatter(final_df_hi['t_eHcal_x'], final_df_hi['t_eHcal_y'], final_df_hi['t_delta'])
0093 threedee.set_xlabel('Corrected Energy')
0094 threedee.set_ylabel('Uncorrected Energy')
0095 threedee.set_zlabel('delta')
0096 fig = threedee.get_figure()
0097 fig.show()
0098 fig.savefig('debu.png')
0099 
0100 print(type(merged3['t_p']))
0101 print(merged3['t_p'])
0102 print(merged3['t_p'].to_numpy())
0103 
0104 a=merged3['t_p'].to_numpy()
0105 print(type(a))
0106 print(a.ndim)
0107 print(a.shape)
0108 '''
0109 
0110 print(merged3['t_ieta'].dtype)
0111 
0112 with uproot.recreate(foutput+".root") as f:
0113 
0114     f["tree"] = uproot.newtree({"t_Event": np.int32,
0115                                 "t_p_PU": np.float64,
0116                                 "t_eHcal_PU":np.float64,
0117                                 "t_delta_PU":np.float64,
0118                                 "t_p_NoPU": np.float64,
0119                                 "t_eHcal_noPU":np.float64,
0120                                 "t_delta_NoPU":np.float64,
0121                                 "t_ieta":np.int32})
0122 
0123 
0124     f["tree"].extend({"t_Event": merged3['t_Event'],
0125                       "t_p_PU": merged3['t_p_x'].to_numpy(),
0126                       "t_eHcal_PU": merged3['t_eHcal_x'].to_numpy(),
0127                       "t_delta_PU": merged3['t_delta_x'].to_numpy(),
0128                       "t_p_NoPU": merged3['t_p_y'].to_numpy(),
0129                       "t_eHcal_noPU": merged3['t_eHcal_y'].to_numpy(),
0130                       "t_delta_NoPU": merged3['t_delta_y'].to_numpy(),
0131                       "t_ieta": merged3['t_ieta'].to_numpy()})