File indexing completed on 2024-04-06 11:58:56
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015 import pandas as pd
0016 import numpy as np
0017 import matplotlib.pyplot as plt
0018 import argparse
0019 import tensorflow.keras
0020 from tensorflow.keras.models import Sequential
0021 from tensorflow.keras.layers import Dense
0022 from tensorflow.keras.layers import Dropout
0023 from tensorflow.keras.utils import to_categorical
0024 from tensorflow.keras.utils import plot_model
0025 from tensorflow.keras import regularizers
0026 from tensorflow.keras.wrappers.scikit_learn import KerasRegressor
0027 from sklearn.model_selection import GridSearchCV, RandomizedSearchCV,train_test_split, cross_val_score
0028 from sklearn.metrics import roc_curve, auc
0029 from tensorflow.keras.layers import Activation
0030 from tensorflow.keras import backend as K
0031 from tensorflow.keras.models import save_model
0032 import tensorflow as tf
0033 from tensorflow import keras
0034
0035
0036
0037
0038
0039 parser1 = argparse.ArgumentParser()
0040 parser1.add_argument("-I", "--input",help="input file",default="isotk_relval_lowinter1.pkl")
0041 parser1.add_argument("-V", "--modelv",help="model version (any number) ",default="2")
0042
0043 args = parser1.parse_args(args=[])
0044
0045 fName = args.input
0046 modv = args.modelv
0047
0048
0049
0050
0051
0052 print ("file Name:", fName)
0053 print ("modv :", modv)
0054
0055
0056
0057
0058
0059 df = pd.read_pickle(fName)
0060 print ("vars in file:",df.keys())
0061 print("events in df original:",df.shape[0])
0062 df = df.loc[df['t_eHcal_y'] > 20]
0063 print("events in df after energy cut:",df.shape[0])
0064 df['t_eHcal_xun'] = df['t_eHcal_x']
0065 df['t_delta_un'] = df['t_delta']
0066 df['t_ieta_un'] = df['t_ieta']
0067
0068
0069
0070
0071
0072 mina = []
0073 maxa = []
0074 keya = []
0075
0076
0077 for i in df.keys():
0078 keya.append(i)
0079 mina.append(df[i].min())
0080 maxa.append(df[i].max())
0081
0082 print('var = ',keya)
0083 print('mina = ',mina)
0084 print('maxa = ',maxa)
0085
0086
0087
0088
0089
0090 cols_to_minmax = ['t_delta', 't_hmaxNearP','t_emaxNearP', 't_hAnnular', 't_eAnnular','t_pt','t_nVtx','t_ieta','t_eHcal10', 't_eHcal30','t_rhoh','t_eHcal_x']
0091
0092
0093
0094 df[cols_to_minmax] = df[cols_to_minmax].apply(lambda x: (x - x.min()) / (x.max() - x.min()))
0095
0096 data = df.values
0097 print ('data shape:',data.shape)
0098 targets = data[:,12]
0099 targets.shape
0100 print ('targets shape:', targets.shape)
0101 print ("vars in file:",df.keys())
0102
0103
0104
0105
0106
0107 data = df.values
0108 ntest = 20000
0109 testindx = data.shape[0] - ntest
0110
0111
0112
0113 print ('data shape:',data.shape[0])
0114 print ('testindx: ' ,testindx)
0115
0116 X_train = data[:testindx,0:12]
0117 Y_train = data[:testindx,12]
0118 X_test = data[testindx:,:]
0119 print ("shape of X_train:",X_train.shape)
0120 print ("shape of Y_train:",Y_train.shape)
0121 print ("shape of X_test:",X_test.shape)
0122 meany = np.mean(Y_train)
0123 print ("mean y:",meany)
0124 stdy = np.std(Y_train)
0125 print ("std y:", stdy)
0126
0127
0128
0129
0130
0131
0132 a0 = [0.973, 0.998, 0.992, 0.965 ]
0133 a1 = [0, -0.318, -0.261, -0.406]
0134 a2 = [0, 0, 0.047, 0.089]
0135 def fac0(jeta):
0136 PU_IETA_1 = 9
0137 PU_IETA_2 = 16
0138 PU_IETA_3 = 25
0139 idx = (int(jeta >= PU_IETA_1) + int(jeta >= PU_IETA_2) + int(jeta >= PU_IETA_3))
0140 return a0[idx]
0141 def fac1(jeta):
0142 PU_IETA_1 = 9
0143 PU_IETA_2 = 16
0144 PU_IETA_3 = 25
0145 idx = (int(jeta >= PU_IETA_1) + int(jeta >= PU_IETA_2) + int(jeta >= PU_IETA_3))
0146 return a1[idx]
0147 def fac2(jeta):
0148 PU_IETA_1 = 9
0149 PU_IETA_2 = 16
0150 PU_IETA_3 = 25
0151 idx = (int(jeta >= PU_IETA_1) + int(jeta >= PU_IETA_2) + int(jeta >= PU_IETA_3))
0152 return a2[idx]
0153
0154
0155
0156
0157
0158 vec0 = np.vectorize(fac0)
0159 vec1 = np.vectorize(fac1)
0160 vec2 = np.vectorize(fac2)
0161
0162 X_test[:,17]
0163 eop = (X_test[:,15]/X_test[:,13])
0164 dop = (X_test[:,16]/X_test[:,13])
0165 print ("eop: ", eop)
0166
0167
0168 mcorrval = vec0(abs(X_test[:,17])) + vec1(abs(X_test[:,17]))*eop*dop*( 1 + vec2(abs(X_test[:,17]))*dop)
0169
0170
0171
0172
0173
0174 def propweights(y_true):
0175 weight = np.copy(y_true)
0176 weight[abs(y_true - meany) > 0] = 1.90*abs(y_true - meany)/stdy
0177
0178 weight[abs(y_true - meany) < stdy] = 1
0179 print ("wieght : ", weight)
0180 return weight
0181
0182
0183
0184
0185
0186 from keras import optimizers
0187 print ("creating model=========>")
0188 model = Sequential()
0189 model.add(Dense(128, input_shape=(X_train.shape[1],), activation='relu'))
0190
0191 model.add(Dense(271, activation='relu',kernel_regularizer=regularizers.l2(0.01)))
0192 model.add(Dense(301, activation='relu',kernel_regularizer=regularizers.l2(0.01)))
0193 model.add(Dense(241, activation='relu',kernel_regularizer=regularizers.l2(0.01)))
0194 model.add(Dense(1))
0195
0196
0197
0198
0199
0200 print ("compilation up next=======>")
0201 model.compile(loss='logcosh',optimizer='adam')
0202 model.summary()
0203
0204
0205 print ("fitting now=========>")
0206 history = model.fit(X_train,Y_train , batch_size=5000, epochs=50, validation_split=0.2, verbose=1,sample_weight=propweights(Y_train))
0207
0208
0209
0210
0211
0212 plt.plot(history.history['loss'])
0213 plt.plot(history.history['val_loss'])
0214 plt.title('model loss')
0215 plt.ylabel('loss')
0216 plt.xlabel('epoch')
0217 plt.legend(['train', 'validation'], loc='upper left')
0218 plt.savefig(modv+'_loss_distributions_lowWinter.png')
0219
0220 plt.close()
0221
0222
0223
0224
0225
0226 preds = model.predict(X_test[:,0:12])
0227 targets = X_test[:,12]
0228 uncorrected = X_test[:,15]
0229 marinascorr = X_test[:,15]*mcorrval
0230 plt.hist(preds, bins =100, range=(0,200),label='PU regression',alpha=0.6)
0231 plt.hist(targets, bins =100, range=(0,200),label='no PU',alpha=0.6)
0232 plt.hist(uncorrected, bins =100, range=(0,200),label='uncorrected',alpha=0.6)
0233
0234 plt.yscale('log')
0235 plt.title("Energy distribution")
0236 plt.legend(loc='upper right')
0237 plt.savefig(modv+'_energy_distributions_lowWinter.png')
0238
0239 plt.close()
0240
0241 preds = preds.reshape(preds.shape[0])
0242 print (preds.shape)
0243
0244
0245
0246
0247
0248 plt.hist(abs(uncorrected -targets)/targets*100, bins =100, range=(0,100),label='uncorrected',alpha=0.6)
0249
0250 plt.hist(100*abs(preds -targets)/targets, bins =100, range=(0,100),label='PU correction',alpha=0.6)
0251
0252 plt.title("error distribution")
0253 plt.legend(loc='upper right')
0254 plt.savefig(modv+'_errors_low.png')
0255
0256 plt.close()
0257
0258
0259
0260
0261
0262 plt.scatter(targets, uncorrected,alpha=0.3,label='uncorr')
0263 plt.scatter(targets, preds,alpha=0.3,label='PUcorr')
0264 plt.xlabel('True Values ')
0265 plt.ylabel('Predictions ')
0266 plt.legend(loc='upper right')
0267 lims = [0, 200]
0268 plt.xlim(lims)
0269 plt.ylim(lims)
0270 _ = plt.plot(lims, lims)
0271 plt.savefig(modv+'_energyscatt_lowWinter.png')
0272
0273 plt.close()
0274
0275
0276
0277
0278
0279 pmom= X_test[:,13]
0280
0281 plt.hist(targets/pmom, bins =100, range=(0,5),label='E/p noPU',alpha=0.6)
0282
0283
0284 plt.hist(marinascorr/pmom, bins =100, range=(0,5),label='E/p marina corr',alpha=0.6)
0285
0286
0287 plt.legend(loc='upper right')
0288 plt.yscale('log')
0289 plt.title("E/p distributions")
0290 plt.savefig(modv+'_eopdist_lowWinter.png')
0291
0292 plt.close()
0293
0294
0295
0296
0297
0298 import os
0299
0300 if not os.path.exists('models'):
0301 os.makedirs('models')
0302 model.save('models/model_BarrelWinter'+modv+'.h5')
0303
0304
0305
0306
0307
0308
0309
0310