HcalCalibAlgos/macros/isotrackTrainRegressorNoOptimization.py

0001 #!/usr/bin/env python
0002 # coding: utf-8
0003
0004 # In[1]:
0005
0006
0007 ######################################################################################
0008 # Trains regressor and saves model for evaluation
0009 # Usage:
0010 # python3 isotrackTrainRegressorNoOptimization.py -I isotk_relval_hi.pkl  -V 1
0011 # python3 isotrackTrainRegressorNoOptimization.py -I isotk_relval_lo.pkl  -V 2
0012 ######################################################################################
0013
0014
0015 import pandas as pd
0016 import numpy as np
0017 import matplotlib.pyplot as plt
0018 import argparse
0019 import tensorflow.keras
0020 from tensorflow.keras.models import Sequential
0021 from tensorflow.keras.layers import Dense
0022 from tensorflow.keras.layers import Dropout
0023 from tensorflow.keras.utils import to_categorical
0024 from tensorflow.keras.utils import plot_model
0025 from tensorflow.keras import regularizers
0026 from tensorflow.keras.wrappers.scikit_learn import KerasRegressor
0027 from sklearn.model_selection import GridSearchCV, RandomizedSearchCV,train_test_split, cross_val_score
0028 from sklearn.metrics import roc_curve, auc
0029 from tensorflow.keras.layers import Activation
0030 from tensorflow.keras import backend as K
0031 from tensorflow.keras.models import save_model
0032 import tensorflow as tf
0033 from tensorflow import keras
0034
0035
0036 # In[37]:
0037
0038
0039 parser1 = argparse.ArgumentParser()
0040 parser1.add_argument("-I", "--input",help="input file",default="isotk_relval_lowinter1.pkl")
0041 parser1.add_argument("-V", "--modelv",help="model version (any number) ",default="2")
0042
0043 args = parser1.parse_args(args=[])
0044
0045 fName = args.input
0046 modv = args.modelv
0047
0048
0049 # In[38]:
0050
0051
0052 print ("file Name:", fName)
0053 print ("modv :", modv)
0054
0055
0056 # In[39]:
0057
0058
0059 df = pd.read_pickle(fName)
0060 print ("vars in file:",df.keys())
0061 print("events in df original:",df.shape[0])
0062 df = df.loc[df['t_eHcal_y'] > 20]
0063 print("events in df after energy cut:",df.shape[0])
0064 df['t_eHcal_xun'] = df['t_eHcal_x']
0065 df['t_delta_un'] = df['t_delta']
0066 df['t_ieta_un'] = df['t_ieta']
0067
0068
0069 # In[40]:
0070
0071
0072 mina = []
0073 maxa = []
0074 keya = []
0075
0076
0077 for i in df.keys():
0078     keya.append(i)
0079     mina.append(df[i].min())
0080     maxa.append(df[i].max())
0081
0082 print('var = ',keya)
0083 print('mina = ',mina)
0084 print('maxa = ',maxa)
0085
0086
0087 # In[41]:
0088
0089
0090 cols_to_minmax = ['t_delta', 't_hmaxNearP','t_emaxNearP', 't_hAnnular', 't_eAnnular','t_pt','t_nVtx','t_ieta','t_eHcal10', 't_eHcal30','t_rhoh','t_eHcal_x']
0091 #df[cols_to_stand] = df[cols_to_stand].apply(lambda x: (x - x.mean()) /(x.std()))
0092 #df[cols_to_minmax] = df[cols_to_minmax].apply(lambda x: (x - x.mean()) /  (x.max() - x.min()))
0093 #                                            #(x.max() - x.min()))
0094 df[cols_to_minmax] = df[cols_to_minmax].apply(lambda x: (x - x.min()) /  (x.max() - x.min()))
0095
0096 data = df.values
0097 print ('data shape:',data.shape)
0098 targets = data[:,12]
0099 targets.shape
0100 print ('targets shape:', targets.shape)
0101 print ("vars in file:",df.keys())
0102
0103
0104 # In[42]:
0105
0106
0107 data = df.values
0108 ntest = 20000
0109 testindx = data.shape[0] - ntest
0110 # data.shape[0]: 438118  is no of events
0111 # data.shape[1] : 18 ==> columns
0112 # testindx = 438118-20000
0113 print ('data shape:',data.shape[0])
0114 print ('testindx: ' ,testindx)
0115 # this :testindx = 438118-20000 = 418118
0116 X_train = data[:testindx,0:12]
0117 Y_train = data[:testindx,12]
0118 X_test = data[testindx:,:]
0119 print ("shape of X_train:",X_train.shape)
0120 print ("shape of Y_train:",Y_train.shape)
0121 print ("shape of X_test:",X_test.shape)
0122 meany = np.mean(Y_train)
0123 print ("mean y:",meany)
0124 stdy = np.std(Y_train)
0125 print ("std y:", stdy)
0126
0127
0128 # In[43]:
0129
0130
0131 ############################################# marinas correction  form
0132 a0 = [0.973, 0.998,  0.992,  0.965 ]
0133 a1 =  [0,    -0.318, -0.261, -0.406]
0134 a2 = [0,     0,      0.047,  0.089]
0135 def fac0(jeta):
0136     PU_IETA_1 = 9
0137     PU_IETA_2 = 16
0138     PU_IETA_3 = 25
0139     idx = (int(jeta >= PU_IETA_1) + int(jeta >= PU_IETA_2) + int(jeta >= PU_IETA_3))
0140     return a0[idx]
0141 def fac1(jeta):
0142     PU_IETA_1 = 9
0143     PU_IETA_2 = 16
0144     PU_IETA_3 = 25
0145     idx = (int(jeta >= PU_IETA_1) + int(jeta >= PU_IETA_2) + int(jeta >= PU_IETA_3))
0146     return a1[idx]
0147 def fac2(jeta):
0148     PU_IETA_1 = 9
0149     PU_IETA_2 = 16
0150     PU_IETA_3 = 25
0151     idx = (int(jeta >= PU_IETA_1) + int(jeta >= PU_IETA_2) + int(jeta >= PU_IETA_3))
0152     return a2[idx]
0153
0154
0155 # In[44]:
0156
0157
0158 vec0 = np.vectorize(fac0)
0159 vec1 = np.vectorize(fac1)
0160 vec2 = np.vectorize(fac2)
0161
0162 X_test[:,17]
0163 eop = (X_test[:,15]/X_test[:,13])
0164 dop = (X_test[:,16]/X_test[:,13])
0165 print ("eop: ", eop)
0166 #mcorrval = vec0(abs(X_test[:,17])) + vec1(abs(X_test[:,17]))*(X_test[:,15]/X_test[:,13])*(X_test[:,16]/X_test[:,13])*( 1 + vec2(fac(abs(X_test[:,17])))*(X_test[:,16]/X_test[:,13]))
0167
0168 mcorrval = vec0(abs(X_test[:,17]))   +  vec1(abs(X_test[:,17]))*eop*dop*( 1   +  vec2(abs(X_test[:,17]))*dop)
0169
0170
0171 # In[45]:
0172
0173
0174 def propweights(y_true):
0175     weight = np.copy(y_true)
0176     weight[abs(y_true - meany) > 0] = 1.90*abs(y_true - meany)/stdy  #1.25
0177 #    weight[abs(y_true - meany) > stdy] = 1.75*abs((weight[abs(y_true - meany) > stdy]) - meany)/(stdy)
0178     weight[abs(y_true - meany) < stdy] =  1
0179     print ("wieght : ", weight)
0180     return weight
0181
0182
0183 # In[46]:
0184
0185
0186 from keras import optimizers
0187 print ("creating model=========>")
0188 model = Sequential()
0189 model.add(Dense(128, input_shape=(X_train.shape[1],), activation='relu'))
0190 #model.add(Dropout(0.2))
0191 model.add(Dense(271, activation='relu',kernel_regularizer=regularizers.l2(0.01)))
0192 model.add(Dense(301, activation='relu',kernel_regularizer=regularizers.l2(0.01)))
0193 model.add(Dense(241, activation='relu',kernel_regularizer=regularizers.l2(0.01)))
0194 model.add(Dense(1))
0195
0196
0197 # In[47]:
0198
0199
0200 print ("compilation up next=======>")
0201 model.compile(loss='logcosh',optimizer='adam')
0202 model.summary()
0203 #print ("Y_train : ", Y_train)
0204 #fitting
0205 print ("fitting now=========>")
0206 history = model.fit(X_train,Y_train , batch_size=5000, epochs=50, validation_split=0.2, verbose=1,sample_weight=propweights(Y_train))
0207
0208
0209 # In[48]:
0210
0211
0212 plt.plot(history.history['loss'])
0213 plt.plot(history.history['val_loss'])
0214 plt.title('model loss')
0215 plt.ylabel('loss')
0216 plt.xlabel('epoch')
0217 plt.legend(['train', 'validation'], loc='upper left')
0218 plt.savefig(modv+'_loss_distributions_lowWinter.png')
0219 #plt.show()
0220 plt.close()
0221
0222
0223 # In[49]:
0224
0225
0226 preds = model.predict(X_test[:,0:12])
0227 targets = X_test[:,12]
0228 uncorrected = X_test[:,15]
0229 marinascorr = X_test[:,15]*mcorrval
0230 plt.hist(preds, bins =100, range=(0,200),label='PU regression',alpha=0.6)
0231 plt.hist(targets, bins =100, range=(0,200),label='no PU',alpha=0.6)
0232 plt.hist(uncorrected, bins =100, range=(0,200),label='uncorrected',alpha=0.6)
0233 #plt.hist(marinascorr, bins =100, range=(0,200),label='marinas correction',alpha=0.6)
0234 plt.yscale('log')
0235 plt.title("Energy distribution")
0236 plt.legend(loc='upper right')
0237 plt.savefig(modv+'_energy_distributions_lowWinter.png')
0238 #plt.show()
0239 plt.close()
0240
0241 preds = preds.reshape(preds.shape[0])
0242 print (preds.shape)
0243
0244
0245 # In[50]:
0246
0247
0248 plt.hist(abs(uncorrected -targets)/targets*100, bins =100, range=(0,100),label='uncorrected',alpha=0.6)
0249 #plt.hist(abs(marinascorr -targets)/targets*100, bins =100, range=(0,100),label='marinas correction',alpha=0.6)
0250 plt.hist(100*abs(preds -targets)/targets, bins =100, range=(0,100),label='PU correction',alpha=0.6)
0251 #plt.yscale('log')
0252 plt.title("error distribution")
0253 plt.legend(loc='upper right')
0254 plt.savefig(modv+'_errors_low.png')
0255 #plt.show()
0256 plt.close()
0257
0258
0259 # In[51]:
0260
0261
0262 plt.scatter(targets, uncorrected,alpha=0.3,label='uncorr')
0263 plt.scatter(targets, preds,alpha=0.3,label='PUcorr')
0264 plt.xlabel('True Values ')
0265 plt.ylabel('Predictions ')
0266 plt.legend(loc='upper right')
0267 lims = [0, 200]
0268 plt.xlim(lims)
0269 plt.ylim(lims)
0270 _ = plt.plot(lims, lims)
0271 plt.savefig(modv+'_energyscatt_lowWinter.png')
0272 #plt.show()
0273 plt.close()
0274
0275
0276 # In[52]:
0277
0278
0279 pmom= X_test[:,13]
0280 #get_ipython().run_line_magic('matplotlib', 'inline')
0281 plt.hist(targets/pmom, bins =100, range=(0,5),label='E/p noPU',alpha=0.6)
0282 #plt.hist(preds/pmom, bins =100, range=(0,5),label='E/p PUcorr',alpha=0.6)
0283 #plt.hist(uncorrected/pmom, bins =100, range=(0,5),label='E/p uncorr',alpha=0.6)
0284 plt.hist(marinascorr/pmom, bins =100, range=(0,5),label='E/p marina corr',alpha=0.6)
0285 #plt.hist(np.exp(preds.reshape((preds.shape[0])))/pmom[0:n_test_events], bins =100, range=(0,5),label='corrEnp/p',alpha=0.3)
0286 #plt.hist(preds.reshape((preds.shape[0]))/pmom[0:n_test_events], bins =100, range=(0,5),label='corrEnp/p',alpha=0.3)
0287 plt.legend(loc='upper right')
0288 plt.yscale('log')
0289 plt.title("E/p distributions")
0290 plt.savefig(modv+'_eopdist_lowWinter.png')
0291 #plt.show()
0292 plt.close()
0293
0294
0295 # In[53]:
0296
0297
0298 import os
0299 ############## save model
0300 if not os.path.exists('models'):
0301     os.makedirs('models')
0302 model.save('models/model_BarrelWinter'+modv+'.h5')
0303 #new_model_2 = load_model('my_model.h5')
0304
0305
0306 # In[ ]:
0307
0308
0309
0310