File indexing completed on 2021-11-09 23:31:37
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012 import pandas as pd
0013 import numpy as np
0014 import matplotlib.pyplot as plt
0015 import argparse
0016 import tensorflow.keras
0017 from tensorflow.keras.models import Sequential
0018 from tensorflow.keras.layers import Dense
0019 from tensorflow.keras.layers import Dropout
0020 from tensorflow.keras.utils import to_categorical
0021 from tensorflow.keras.utils import plot_model
0022 from tensorflow.keras import regularizers
0023 from sklearn.metrics import roc_curve, auc
0024 from tensorflow.keras.layers import Activation
0025 from tensorflow.keras import backend as K
0026 from tensorflow.keras.models import load_model
0027 import uproot
0028
0029
0030
0031
0032 parser = argparse.ArgumentParser()
0033 parser.add_argument("-PU", "--filePU",help="input PU file",default="root://cmseos.fnal.gov//store/user/sghosh/ISOTRACK/DIPI_2021_PUpart.root")
0034
0035 parser.add_argument("-B", "--ifbarrel",help="barrel / endcap",default='barrel')
0036 parser.add_argument("-M", "--modelname",help="model file name",default="./models/model.h5")
0037 parser.add_argument("-O", "--opfilename",help="output text file name",default="corrfac_regression.txt")
0038
0039
0040 fName1 = parser.parse_args().filePU
0041 modeln = parser.parse_args().modelname
0042 foutput = parser.parse_args().opfilename
0043 barrelflag = parser.parse_args().ifbarrel
0044
0045
0046
0047
0048 tree1 = uproot.open(fName1,xrootdsource=dict(chunkbytes=1024**3, limitbytes=1024**3))['HcalIsoTrkAnalyzer/CalibTree']
0049 print ("loaded files")
0050
0051 branchespu = ['t_Run','t_Event','t_nVtx','t_ieta','t_iphi','t_p','t_pt','t_gentrackP','t_eMipDR','t_eHcal','t_eHcal10','t_eHcal30','t_hmaxNearP','t_emaxNearP','t_hAnnular','t_eAnnular','t_rhoh']
0052 dictpu = tree1.arrays(branches=branchespu)
0053
0054 dfspu = pd.DataFrame.from_dict(dictpu)
0055 dfspu.columns=branchespu
0056 print ("sample size:",dfspu.shape[0])
0057
0058
0059
0060
0061
0062 dfspu['t_delta']=dfspu['t_eHcal30']-dfspu['t_eHcal10']
0063 keepvars =['t_nVtx', 't_ieta', 't_eHcal10', 't_eHcal30', 't_delta', 't_hmaxNearP','t_emaxNearP', 't_hAnnular', 't_eAnnular', 't_rhoh', 't_pt','t_eHcal', 't_p', 't_eMipDR']
0064
0065 df = dfspu[keepvars].copy()
0066 df['t_eHcal_xun'] = df['t_eHcal']
0067 df['t_delta_un'] = df['t_delta']
0068 df['t_ieta_un'] = df['t_ieta']
0069
0070
0071
0072
0073
0074
0075
0076
0077
0078 if barrelflag=='barrel':
0079 var = ['t_nVtx', 't_ieta', 't_eHcal10', 't_eHcal30', 't_delta', 't_hmaxNearP', 't_emaxNearP', 't_hAnnular', 't_eAnnular', 't_rhoh', 't_pt', 't_eHcal_x', 't_eHcal_y', 't_p', 't_eMipDR', 't_eHcal_xun', 't_delta_un', 't_ieta_un']
0080 mina = [20, -15, 17.33086721925065, 17.655660001095384, 0.0, -1.0, -1.0, 0.0, -5.33359869197011, 4.093925265397289, 20.783629520782718, 16.998163268435746, 20.000221125315875, 40.074083721419896, 0.0, 16.998163268435746, 0.0, -15]
0081 maxa = [138, 15, 138.26640254695667, 155.83508832909865, 50.9643486259738, 17.140547914961605, 2870.424876287056, 35.727171580074355, 17.763740802183747, 26.38359781195008, 59.169594172331905, 133.07561272289604, 122.8542027361691, 59.977312414583295, 0.9999987781047821, 133.07561272289604, 50.9643486259738, 15]
0082 else:
0083 var = ['t_nVtx', 't_ieta', 't_eHcal10', 't_eHcal30', 't_delta', 't_hmaxNearP', 't_emaxNearP', 't_hAnnular', 't_eAnnular', 't_rhoh', 't_pt', 't_eHcal_x', 't_eHcal_y', 't_p', 't_eMipDR', 't_eHcal_xun', 't_delta_un', 't_ieta_un']
0084 mina = [20, -27, 20.934556022286415, 27.213239994598553, 0.7803188574616797, -1.0, -1.0, 0.0, -57.422806948423386, 4.435386319143829, 6.013227444563335, 18.5278014652431, 20.00695458613336, 40.00339106433163, 0.0, 18.5278014652431, 0.7803188574616797, -27]
0085 maxa = [117, 28, 584.1150933708996, 822.056631873711, 424.1259534251876, 19.999632518345965, 13218.009997109137, 185.96148682758212, 128.60208600759506, 25.911101538538524, 28.928444814127907, 428.99590471945703, 111.25579111767001, 59.98427126709689, 0.9999948740005493, 428.99590471945703, 424.1259534251876, 28]
0086
0087 print(mina)
0088 print(maxa)
0089
0090 normkey = ['t_nVtx', 't_ieta', 't_eHcal10', 't_eHcal30', 't_delta', 't_hmaxNearP', 't_emaxNearP', 't_hAnnular', 't_eAnnular', 't_rhoh', 't_pt', 't_eHcal']
0091 k = 0
0092 for i in df.keys():
0093 if i not in normkey:
0094 break
0095
0096 df[i]=abs(df[i]-mina[k])/(maxa[k] - mina[k])
0097 k+=1
0098
0099
0100 df['t_eHcal_xun'] = df['t_eHcal_xun'].apply(lambda x: 1.0 if (x==0) else x)
0101 uncorrected_values = df['t_eHcal_xun'].values
0102 print (uncorrected_values.shape)
0103 print ('data vars:',df.keys())
0104 data = df.values
0105 X_train = data[:,0:12]
0106
0107
0108
0109
0110
0111 model = load_model(modeln)
0112 preds = model.predict(X_train,verbose=1)
0113 preds = preds.reshape(preds.shape[0])
0114 print (preds.shape)
0115
0116
0117
0118 if barrelflag=='barrel':
0119 tag = 'barrel'
0120 else:
0121 tag = 'endcap'
0122
0123 print(tag)
0124
0125 plt.hist(preds, bins =100, range=(0,100),label='predicted enerhy',alpha=0.6)
0126 plt.savefig('predicted_Edist_'+tag+'.png')
0127 plt.close()
0128
0129 parray = dfspu['t_p'].values
0130 mipdr = dfspu['t_eMipDR'].values
0131 plt.hist(preds/(parray - mipdr), bins =100, range=(0,10),label='predicted e/p-ecal',alpha=0.6)
0132 plt.savefig('predicted_eopdist_'+tag+'.png')
0133 plt.close()
0134
0135
0136
0137
0138 eventnumarray = np.arange(0,X_train.shape[0],1,dtype=int)
0139 eventnumarray = eventnumarray.reshape(eventnumarray.shape[0],1)
0140
0141
0142
0143
0144 ietaarray = dfspu['t_ieta'].values
0145 ietaarray = ietaarray.reshape(ietaarray.shape[0],1)
0146
0147
0148 corrfac = preds/uncorrected_values
0149 corrfac = corrfac.reshape(corrfac.shape[0],1)
0150 fileo = np.hstack((eventnumarray,ietaarray,corrfac))
0151
0152 np.savetxt(foutput, fileo,fmt=['%d','%d','%.10f'])
0153
0154
0155
0156
0157
0158
0159