Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2023-03-17 10:43:03

0001 ######################################################################################
0002 # Makes pkl and text files comparing PU and noPU samples for training regressor and other stuff
0003 # Usage:
0004 # source /cvmfs/sft.cern.ch/lcg/views/LCG_97apython3/x86_64-centos7-gcc8-opt/setup.csh
0005 # python3 isotrackNtupler.py -PU root://eoscms.cern.ch//eos/cms/store/group/dpg_hcal/comm_hcal/ISOTRACK/SinglePion_E-50_Eta-0to3_Run3Winter21_112X_PU.root -NPU root://eoscms.cern.ch//eos/cms/store/group/dpg_hcal/comm_hcal/ISOTRACK/SinglePion_E-50_Eta-0to3_Run3Winter21_112X_PU.root -O isotk_relval 
0006 ######################################################################################
0007 
0008 
0009 
0010 import uproot
0011 import numpy as np
0012 import pandas as pd
0013 import argparse
0014 import matplotlib.pyplot as plt
0015 
0016 parser = argparse.ArgumentParser()
0017 parser.add_argument("-PU", "--filePU",help="input PU file",default="root://eoscms.cern.ch//eos/cms/store/group/dpg_hcal/comm_hcal/ISOTRACK/SinglePion_E-50_Eta-0to3_Run3Winter21_112X_PU.root")
0018 parser.add_argument("-NPU", "--fileNPU",help="input no PU file",default="//eoscms.cern.ch//eos/cms/store/group/dpg_hcal/comm_hcal/ISOTRACK/SinglePion_E-50_Eta-0to3_Run3Winter21_112X_PU.root")
0019 parser.add_argument("-O", "--opfilename",help="ouput file name",default="isotk_relval")
0020 
0021 
0022 fName1 = parser.parse_args().filePU
0023 fName2 = parser.parse_args().fileNPU
0024 foutput = parser.parse_args().opfilename
0025 
0026 
0027 # PU
0028 tree1 = uproot.open(fName1,xrootdsource=dict(chunkbytes=1024**3, limitbytes=1024**3))['HcalIsoTrkAnalyzer/CalibTree']
0029 
0030 #no PU
0031 tree2 = uproot.open(fName2,xrootdsource=dict(chunkbytes=1024**3, limitbytes=1024**3))['HcalIsoTrkAnalyzer/CalibTree']
0032 #tree2.keys()
0033 print ("loaded files")
0034 branchespu = ['t_Run','t_Event','t_nVtx','t_ieta','t_iphi','t_p','t_pt','t_gentrackP','t_eMipDR','t_eHcal','t_eHcal10','t_eHcal30','t_hmaxNearP','t_emaxNearP','t_hAnnular','t_eAnnular','t_rhoh','t_selectTk','t_qltyFlag']
0035 branchesnpu = ['t_Event','t_ieta','t_iphi','t_eHcal']
0036 #dictn = tree.arrays(branches=branches,entrystart=0, entrystop=300)
0037 dictpu = tree1.arrays(branches=branchespu)
0038 dictnpu = tree2.arrays(branches=branchesnpu)
0039 dfspu = pd.DataFrame.from_dict(dictpu)
0040 dfspu.columns=branchespu
0041 dfsnpu = pd.DataFrame.from_dict(dictnpu)
0042 dfsnpu.columns=branchesnpu
0043 print ("loaded dicts and dfs")
0044 print ("PU sample size:",dfspu.shape[0])
0045 print ("noPU sample size:",dfsnpu.shape[0])
0046 merged = pd.merge(dfspu, dfsnpu , on=['t_Event','t_ieta','t_iphi'])
0047 print ("selected common events before cut:",merged.shape[0])
0048 
0049 #cuts = (merged['t_selectTk'])&(merged['t_qltyFlag'])&(merged['t_hmaxNearP']<10)&(merged['t_eMipDR_y']<1)
0050 keepvars =  ['t_nVtx','t_ieta','t_eHcal10','t_eHcal30','t_delta','t_hmaxNearP','t_emaxNearP','t_hAnnular','t_eAnnular','t_rhoh','t_pt','t_eHcal_x','t_eHcal_y','t_p','t_eMipDR']
0051 
0052 
0053 
0054 #########################all ietas
0055 cuts1 = (merged['t_selectTk'])&(merged['t_qltyFlag'])&(merged['t_hmaxNearP']<20)&(merged['t_eMipDR']<1)&(abs(merged['t_p'] - 50)<10)&(merged['t_eHcal_x']>10)
0056 merged1=merged.loc[cuts1]
0057 merged1 = merged1.reset_index(drop=True)
0058 print ("selected events after cut for all ietas:",merged1.shape[0])
0059 merged1['t_delta']=merged1['t_eHcal30']-merged1['t_eHcal10']
0060 final_df_all = merged1[keepvars]
0061 final_df_all.to_pickle(foutput+"_all.pkl")
0062 final_df_all.to_csv(foutput+"_all.txt")
0063 #########################split ieta < 16
0064 
0065 cuts2 = (merged['t_selectTk'])&(merged['t_qltyFlag'])&(merged['t_hmaxNearP']<20)&(merged['t_eMipDR']<1)&(abs(merged['t_ieta'])<16)&(abs(merged['t_p'] - 50)<10)&(merged['t_eHcal_x']>10)
0066 merged2=merged.loc[cuts2]
0067 merged2 = merged2.reset_index(drop=True)
0068 print ("selected events after cut for ieta < 16:",merged2.shape[0])
0069 merged2['t_delta']=merged2['t_eHcal30']-merged2['t_eHcal10']
0070 final_df_low = merged2[keepvars]
0071 final_df_low.to_pickle(foutput+"_lo.pkl")
0072 final_df_low.to_csv(foutput+"_lo.txt")
0073 
0074 #########################split ieta > 15
0075 
0076 cuts3 = (merged['t_selectTk'])&(merged['t_qltyFlag'])&(merged['t_hmaxNearP']<20)&(merged['t_eMipDR']<1)&(abs(merged['t_ieta'])>15)&(abs(merged['t_p'] - 50)<10)&(merged['t_eHcal_x']>10)
0077 merged3=merged.loc[cuts3]
0078 merged3 = merged3.reset_index(drop=True)
0079 print ("selected events after cut for ieta > 15:",merged3.shape[0])
0080 merged3['t_delta']=merged3['t_eHcal30']-merged3['t_eHcal10']
0081 final_df_hi = merged3[keepvars]
0082 final_df_hi.to_pickle(foutput+"_hi.pkl")
0083 final_df_hi.to_csv(foutput+"_hi.txt")
0084