Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 11:58:55

0001 ######################################################################################
0002 # Evaluates regressor from loaded model
0003 # Usage:
0004 # source /cvmfs/sft.cern.ch/lcg/views/LCG_97apython3/x86_64-centos7-gcc8-opt/setup.csh
0005 # python3 isotrackApplyRegressor.py -PU root://eoscms.cern.ch//eos/cms/store/group/dpg_hcal/comm_hcal/ISOTRACK/SinglePion_E-50_Eta-0to3_Run3Winter21_112X_PU.root -M ./models/model1.h5 -B endcap -O corrfac1.txt
0006 # python3 isotrackApplyRegressor.py -PU root://eoscms.cern.ch//eos/cms/store/group/dpg_hcal/comm_hcal/ISOTRACK/SinglePion_E-50_Eta-0to3_Run3Winter21_112X_PU.root -M ./models/model2.h5 -B barrel -O corrfac2.txt
0007 ######################################################################################
0008 # coding: utf-8
0009 
0010 # In[1]:
0011 
0012 import pandas as pd
0013 import numpy as np
0014 import matplotlib.pyplot as plt
0015 import argparse
0016 import tensorflow.keras
0017 from tensorflow.keras.models import Sequential
0018 from tensorflow.keras.layers import Dense
0019 from tensorflow.keras.layers import Dropout
0020 from tensorflow.keras.utils import to_categorical
0021 from tensorflow.keras.utils import plot_model
0022 from tensorflow.keras import regularizers
0023 from sklearn.metrics import roc_curve, auc
0024 from tensorflow.keras.layers import Activation
0025 from tensorflow.keras import backend as K
0026 from tensorflow.keras.models import load_model
0027 import uproot
0028 
0029 
0030 # In[2]:
0031 
0032 parser = argparse.ArgumentParser()
0033 parser.add_argument("-PU", "--filePU",help="input PU file",default="root://eoscms.cern.ch//eos/cms/store/group/dpg_hcal/comm_hcal/ISOTRACK/SinglePion_E-50_Eta-0to3_Run3Winter21_112X_PU.root")
0034 parser.add_argument("-B", "--ifbarrel",help="barrel / endcap",default='barrel')
0035 parser.add_argument("-M", "--modelname",help="model file name",default="./models/model.h5")
0036 parser.add_argument("-O", "--opfilename",help="output text file name",default="corrfac_regression.txt")
0037 
0038 
0039 fName1 = parser.parse_args().filePU
0040 modeln = parser.parse_args().modelname
0041 foutput = parser.parse_args().opfilename
0042 barrelflag = parser.parse_args().ifbarrel
0043 
0044 
0045 
0046 #fName1='/eos/uscms/store/user/sghosh/ISOTRACK/DIPI_2021_PUpart.root'
0047 tree1 = uproot.open(fName1,xrootdsource=dict(chunkbytes=1024**3, limitbytes=1024**3))['HcalIsoTrkAnalyzer/CalibTree']
0048 print ("loaded files")
0049 
0050 branchespu = ['t_Run','t_Event','t_nVtx','t_ieta','t_iphi','t_p','t_pt','t_gentrackP','t_eMipDR','t_eHcal','t_eHcal10','t_eHcal30','t_hmaxNearP','t_emaxNearP','t_hAnnular','t_eAnnular','t_rhoh']
0051 dictpu = tree1.arrays(branches=branchespu)
0052 #dictpu = tree1.arrays(branches=branchespu,entrystart=0, entrystop=100000)
0053 dfspu = pd.DataFrame.from_dict(dictpu)
0054 dfspu.columns=branchespu
0055 print ("sample size:",dfspu.shape[0])
0056 
0057 
0058 # In[3]:
0059 
0060 
0061 dfspu['t_delta']=dfspu['t_eHcal30']-dfspu['t_eHcal10']
0062 keepvars =['t_nVtx', 't_ieta', 't_eHcal10', 't_eHcal30', 't_delta', 't_hmaxNearP','t_emaxNearP', 't_hAnnular', 't_eAnnular', 't_rhoh', 't_pt','t_eHcal', 't_p', 't_eMipDR']
0063 
0064 df = dfspu[keepvars].copy()
0065 df['t_eHcal_xun'] = df['t_eHcal']
0066 df['t_delta_un'] = df['t_delta']
0067 df['t_ieta_un'] = df['t_ieta']
0068 
0069 #cols_to_stand = ['t_nVtx','t_ieta','t_eHcal10', 't_eHcal30','t_rhoh','t_eHcal_x']
0070 #cols_to_minmax = ['t_delta', 't_hmaxNearP','t_emaxNearP', 't_hAnnular', 't_eAnnular','t_pt']
0071 
0072 #### if using global norm
0073 #cols_to_minmax = ['t_delta', 't_hmaxNearP','t_emaxNearP', 't_hAnnular', 't_eAnnular','t_pt','t_nVtx','t_ieta','t_eHcal10', 't_eHcal30','t_rhoh','t_eHcal']
0074 #df[cols_to_minmax] = df[cols_to_minmax].apply(lambda x: (x - x.min()) /  (x.max() - x.min())  if (x.max() - x.min() > 0) else 1.0/200.0)
0075 
0076 #### if using training norm
0077 if barrelflag=='barrel':
0078     var = ['t_nVtx', 't_ieta', 't_eHcal10', 't_eHcal30', 't_delta', 't_hmaxNearP', 't_emaxNearP', 't_hAnnular', 't_eAnnular', 't_rhoh', 't_pt', 't_eHcal_x', 't_eHcal_y', 't_p', 't_eMipDR', 't_eHcal_xun', 't_delta_un', 't_ieta_un']
0079     mina = [20, -15, 17.33086721925065, 17.655660001095384, 0.0, -1.0, -1.0, 0.0, -5.33359869197011, 4.093925265397289, 20.783629520782718, 16.998163268435746, 20.000221125315875, 40.074083721419896, 0.0, 16.998163268435746, 0.0, -15]
0080     maxa = [138, 15, 138.26640254695667, 155.83508832909865, 50.9643486259738, 17.140547914961605, 2870.424876287056, 35.727171580074355, 17.763740802183747, 26.38359781195008, 59.169594172331905, 133.07561272289604, 122.8542027361691, 59.977312414583295, 0.9999987781047821, 133.07561272289604, 50.9643486259738, 15]
0081 else:
0082     var = ['t_nVtx', 't_ieta', 't_eHcal10', 't_eHcal30', 't_delta', 't_hmaxNearP', 't_emaxNearP', 't_hAnnular', 't_eAnnular', 't_rhoh', 't_pt', 't_eHcal_x', 't_eHcal_y', 't_p', 't_eMipDR', 't_eHcal_xun', 't_delta_un', 't_ieta_un']
0083     mina = [20, -27, 20.934556022286415, 27.213239994598553, 0.7803188574616797, -1.0, -1.0, 0.0, -57.422806948423386, 4.435386319143829, 6.013227444563335, 18.5278014652431, 20.00695458613336, 40.00339106433163, 0.0, 18.5278014652431, 0.7803188574616797, -27]
0084     maxa = [117, 28, 584.1150933708996, 822.056631873711, 424.1259534251876, 19.999632518345965, 13218.009997109137, 185.96148682758212, 128.60208600759506, 25.911101538538524, 28.928444814127907, 428.99590471945703, 111.25579111767001, 59.98427126709689, 0.9999948740005493, 428.99590471945703, 424.1259534251876, 28]
0085 
0086 print(mina)
0087 print(maxa)
0088 
0089 normkey = ['t_nVtx', 't_ieta', 't_eHcal10', 't_eHcal30', 't_delta', 't_hmaxNearP', 't_emaxNearP', 't_hAnnular', 't_eAnnular', 't_rhoh', 't_pt', 't_eHcal']
0090 k = 0
0091 for i in df.keys():
0092     if i not in normkey:
0093         break
0094     #print(normkey[k],mina[k],maxa[k])
0095     df[i]=abs(df[i]-mina[k])/(maxa[k] - mina[k])
0096     k+=1
0097 
0098 
0099 df['t_eHcal_xun'] = df['t_eHcal_xun'].apply(lambda x: 1.0 if (x==0) else x)
0100 uncorrected_values = df['t_eHcal_xun'].values
0101 print (uncorrected_values.shape)
0102 print ('data vars:',df.keys())
0103 data = df.values
0104 X_train = data[:,0:12]
0105 
0106 
0107 # In[4]:
0108 
0109 
0110 model = load_model(modeln)
0111 preds = model.predict(X_train,verbose=1)
0112 preds = preds.reshape(preds.shape[0])
0113 print (preds.shape)
0114 
0115 
0116 # In[5]:
0117 if barrelflag=='barrel':
0118     tag = 'barrel'
0119 else:
0120     tag = 'endcap'
0121 
0122 print(tag)
0123 
0124 plt.hist(preds, bins =100, range=(0,100),label='predicted enerhy',alpha=0.6)
0125 plt.savefig('predicted_Edist_'+tag+'.png')
0126 plt.close()
0127 
0128 parray = dfspu['t_p'].values
0129 mipdr = dfspu['t_eMipDR'].values
0130 plt.hist(preds/(parray - mipdr), bins =100, range=(0,10),label='predicted e/p-ecal',alpha=0.6)
0131 plt.savefig('predicted_eopdist_'+tag+'.png')
0132 plt.close()
0133 
0134 
0135 
0136 # In[6]:
0137 eventnumarray = np.arange(0,X_train.shape[0],1,dtype=int)
0138 eventnumarray = eventnumarray.reshape(eventnumarray.shape[0],1)
0139 #runnumarray = dfspu['t_Run'].values
0140 #runnumarray = runnumarray.reshape(runnumarray.shape[0],1)
0141 #eventnumarray = dfspu['t_Event'].values
0142 #eventnumarray = eventnumarray.reshape(eventnumarray.shape[0],1)
0143 ietaarray = dfspu['t_ieta'].values
0144 ietaarray = ietaarray.reshape(ietaarray.shape[0],1)
0145 #iphiarray = dfspu['t_iphi'].values
0146 #iphiarray = iphiarray.reshape(iphiarray.shape[0],1)
0147 corrfac = preds/uncorrected_values
0148 corrfac = corrfac.reshape(corrfac.shape[0],1)
0149 fileo = np.hstack((eventnumarray,ietaarray,corrfac))
0150 #fileo = np.hstack((runnumarray,eventnumarray,ietaarray,iphiarray,corrfac))
0151 np.savetxt(foutput, fileo,fmt=['%d','%d','%.10f'])   
0152 
0153 
0154 # In[ ]:
0155 
0156 
0157 
0158