Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2021-11-09 23:31:37

0001 ######################################################################################
0002 # Evaluates regressor from loaded model
0003 # Usage:
0004 # python3 isotrackApplyRegressor.py -PU root://cmseos.fnal.gov//store/user/sghosh/ISOTRACK/DIPI_2021_PUpart.root -M ./models/model1.h5 -B endcap -O corrfac1.txt
0005 # python3 isotrackApplyRegressor.py -PU root://cmseos.fnal.gov//store/user/sghosh/ISOTRACK/DIPI_2021_PUpart.root -M ./models/model2.h5 -B barrel -O corrfac2.txt
0006 ######################################################################################
0007 # coding: utf-8
0008 
0009 # In[1]:
0010 
0011 
0012 import pandas as pd
0013 import numpy as np
0014 import matplotlib.pyplot as plt
0015 import argparse
0016 import tensorflow.keras
0017 from tensorflow.keras.models import Sequential
0018 from tensorflow.keras.layers import Dense
0019 from tensorflow.keras.layers import Dropout
0020 from tensorflow.keras.utils import to_categorical
0021 from tensorflow.keras.utils import plot_model
0022 from tensorflow.keras import regularizers
0023 from sklearn.metrics import roc_curve, auc
0024 from tensorflow.keras.layers import Activation
0025 from tensorflow.keras import backend as K
0026 from tensorflow.keras.models import load_model
0027 import uproot
0028 
0029 
0030 # In[2]:
0031 
0032 parser = argparse.ArgumentParser()
0033 parser.add_argument("-PU", "--filePU",help="input PU file",default="root://cmseos.fnal.gov//store/user/sghosh/ISOTRACK/DIPI_2021_PUpart.root")
0034 #parser.add_argument("-PU", "--filePU",help="input PU file",default="/eos/uscms/store/user/sghosh/ISOTRACK/DIPI_2021_noPU.root")
0035 parser.add_argument("-B", "--ifbarrel",help="barrel / endcap",default='barrel')
0036 parser.add_argument("-M", "--modelname",help="model file name",default="./models/model.h5")
0037 parser.add_argument("-O", "--opfilename",help="output text file name",default="corrfac_regression.txt")
0038 
0039 
0040 fName1 = parser.parse_args().filePU
0041 modeln = parser.parse_args().modelname
0042 foutput = parser.parse_args().opfilename
0043 barrelflag = parser.parse_args().ifbarrel
0044 
0045 
0046 
0047 #fName1='/eos/uscms/store/user/sghosh/ISOTRACK/DIPI_2021_PUpart.root'
0048 tree1 = uproot.open(fName1,xrootdsource=dict(chunkbytes=1024**3, limitbytes=1024**3))['HcalIsoTrkAnalyzer/CalibTree']
0049 print ("loaded files")
0050 
0051 branchespu = ['t_Run','t_Event','t_nVtx','t_ieta','t_iphi','t_p','t_pt','t_gentrackP','t_eMipDR','t_eHcal','t_eHcal10','t_eHcal30','t_hmaxNearP','t_emaxNearP','t_hAnnular','t_eAnnular','t_rhoh']
0052 dictpu = tree1.arrays(branches=branchespu)
0053 #dictpu = tree1.arrays(branches=branchespu,entrystart=0, entrystop=100000)
0054 dfspu = pd.DataFrame.from_dict(dictpu)
0055 dfspu.columns=branchespu
0056 print ("sample size:",dfspu.shape[0])
0057 
0058 
0059 # In[3]:
0060 
0061 
0062 dfspu['t_delta']=dfspu['t_eHcal30']-dfspu['t_eHcal10']
0063 keepvars =['t_nVtx', 't_ieta', 't_eHcal10', 't_eHcal30', 't_delta', 't_hmaxNearP','t_emaxNearP', 't_hAnnular', 't_eAnnular', 't_rhoh', 't_pt','t_eHcal', 't_p', 't_eMipDR']
0064 
0065 df = dfspu[keepvars].copy()
0066 df['t_eHcal_xun'] = df['t_eHcal']
0067 df['t_delta_un'] = df['t_delta']
0068 df['t_ieta_un'] = df['t_ieta']
0069 
0070 #cols_to_stand = ['t_nVtx','t_ieta','t_eHcal10', 't_eHcal30','t_rhoh','t_eHcal_x']
0071 #cols_to_minmax = ['t_delta', 't_hmaxNearP','t_emaxNearP', 't_hAnnular', 't_eAnnular','t_pt']
0072 
0073 #### if using global norm
0074 #cols_to_minmax = ['t_delta', 't_hmaxNearP','t_emaxNearP', 't_hAnnular', 't_eAnnular','t_pt','t_nVtx','t_ieta','t_eHcal10', 't_eHcal30','t_rhoh','t_eHcal']
0075 #df[cols_to_minmax] = df[cols_to_minmax].apply(lambda x: (x - x.min()) /  (x.max() - x.min())  if (x.max() - x.min() > 0) else 1.0/200.0)
0076 
0077 #### if using training norm
0078 if barrelflag=='barrel':
0079     var = ['t_nVtx', 't_ieta', 't_eHcal10', 't_eHcal30', 't_delta', 't_hmaxNearP', 't_emaxNearP', 't_hAnnular', 't_eAnnular', 't_rhoh', 't_pt', 't_eHcal_x', 't_eHcal_y', 't_p', 't_eMipDR', 't_eHcal_xun', 't_delta_un', 't_ieta_un']
0080     mina = [20, -15, 17.33086721925065, 17.655660001095384, 0.0, -1.0, -1.0, 0.0, -5.33359869197011, 4.093925265397289, 20.783629520782718, 16.998163268435746, 20.000221125315875, 40.074083721419896, 0.0, 16.998163268435746, 0.0, -15]
0081     maxa = [138, 15, 138.26640254695667, 155.83508832909865, 50.9643486259738, 17.140547914961605, 2870.424876287056, 35.727171580074355, 17.763740802183747, 26.38359781195008, 59.169594172331905, 133.07561272289604, 122.8542027361691, 59.977312414583295, 0.9999987781047821, 133.07561272289604, 50.9643486259738, 15]
0082 else:
0083     var = ['t_nVtx', 't_ieta', 't_eHcal10', 't_eHcal30', 't_delta', 't_hmaxNearP', 't_emaxNearP', 't_hAnnular', 't_eAnnular', 't_rhoh', 't_pt', 't_eHcal_x', 't_eHcal_y', 't_p', 't_eMipDR', 't_eHcal_xun', 't_delta_un', 't_ieta_un']
0084     mina = [20, -27, 20.934556022286415, 27.213239994598553, 0.7803188574616797, -1.0, -1.0, 0.0, -57.422806948423386, 4.435386319143829, 6.013227444563335, 18.5278014652431, 20.00695458613336, 40.00339106433163, 0.0, 18.5278014652431, 0.7803188574616797, -27]
0085     maxa = [117, 28, 584.1150933708996, 822.056631873711, 424.1259534251876, 19.999632518345965, 13218.009997109137, 185.96148682758212, 128.60208600759506, 25.911101538538524, 28.928444814127907, 428.99590471945703, 111.25579111767001, 59.98427126709689, 0.9999948740005493, 428.99590471945703, 424.1259534251876, 28]
0086 
0087 print(mina)
0088 print(maxa)
0089 
0090 normkey = ['t_nVtx', 't_ieta', 't_eHcal10', 't_eHcal30', 't_delta', 't_hmaxNearP', 't_emaxNearP', 't_hAnnular', 't_eAnnular', 't_rhoh', 't_pt', 't_eHcal']
0091 k = 0
0092 for i in df.keys():
0093     if i not in normkey:
0094         break
0095     #print(normkey[k],mina[k],maxa[k])
0096     df[i]=abs(df[i]-mina[k])/(maxa[k] - mina[k])
0097     k+=1
0098 
0099 
0100 df['t_eHcal_xun'] = df['t_eHcal_xun'].apply(lambda x: 1.0 if (x==0) else x)
0101 uncorrected_values = df['t_eHcal_xun'].values
0102 print (uncorrected_values.shape)
0103 print ('data vars:',df.keys())
0104 data = df.values
0105 X_train = data[:,0:12]
0106 
0107 
0108 # In[4]:
0109 
0110 
0111 model = load_model(modeln)
0112 preds = model.predict(X_train,verbose=1)
0113 preds = preds.reshape(preds.shape[0])
0114 print (preds.shape)
0115 
0116 
0117 # In[5]:
0118 if barrelflag=='barrel':
0119     tag = 'barrel'
0120 else:
0121     tag = 'endcap'
0122 
0123 print(tag)
0124 
0125 plt.hist(preds, bins =100, range=(0,100),label='predicted enerhy',alpha=0.6)
0126 plt.savefig('predicted_Edist_'+tag+'.png')
0127 plt.close()
0128 
0129 parray = dfspu['t_p'].values
0130 mipdr = dfspu['t_eMipDR'].values
0131 plt.hist(preds/(parray - mipdr), bins =100, range=(0,10),label='predicted e/p-ecal',alpha=0.6)
0132 plt.savefig('predicted_eopdist_'+tag+'.png')
0133 plt.close()
0134 
0135 
0136 
0137 # In[6]:
0138 eventnumarray = np.arange(0,X_train.shape[0],1,dtype=int)
0139 eventnumarray = eventnumarray.reshape(eventnumarray.shape[0],1)
0140 #runnumarray = dfspu['t_Run'].values
0141 #runnumarray = runnumarray.reshape(runnumarray.shape[0],1)
0142 #eventnumarray = dfspu['t_Event'].values
0143 #eventnumarray = eventnumarray.reshape(eventnumarray.shape[0],1)
0144 ietaarray = dfspu['t_ieta'].values
0145 ietaarray = ietaarray.reshape(ietaarray.shape[0],1)
0146 #iphiarray = dfspu['t_iphi'].values
0147 #iphiarray = iphiarray.reshape(iphiarray.shape[0],1)
0148 corrfac = preds/uncorrected_values
0149 corrfac = corrfac.reshape(corrfac.shape[0],1)
0150 fileo = np.hstack((eventnumarray,ietaarray,corrfac))
0151 #fileo = np.hstack((runnumarray,eventnumarray,ietaarray,iphiarray,corrfac))
0152 np.savetxt(foutput, fileo,fmt=['%d','%d','%.10f'])   
0153 
0154 
0155 # In[ ]:
0156 
0157 
0158 
0159