Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 11:58:56

0001 #!/usr/bin/env python
0002 # coding: utf-8
0003 
0004 # In[1]:
0005 
0006 
0007 ######################################################################################
0008 # Trains regressor and saves model for evaluation
0009 # Usage:
0010 # python3 isotrackTrainRegressorNoOptimization.py -I isotk_relval_hi.pkl  -V 1 
0011 # python3 isotrackTrainRegressorNoOptimization.py -I isotk_relval_lo.pkl  -V 2
0012 ######################################################################################
0013 
0014 
0015 import pandas as pd
0016 import numpy as np
0017 import matplotlib.pyplot as plt
0018 import argparse
0019 import tensorflow.keras
0020 from tensorflow.keras.models import Sequential
0021 from tensorflow.keras.layers import Dense
0022 from tensorflow.keras.layers import Dropout
0023 from tensorflow.keras.utils import to_categorical
0024 from tensorflow.keras.utils import plot_model
0025 from tensorflow.keras import regularizers
0026 from tensorflow.keras.wrappers.scikit_learn import KerasRegressor
0027 from sklearn.model_selection import GridSearchCV, RandomizedSearchCV,train_test_split, cross_val_score
0028 from sklearn.metrics import roc_curve, auc
0029 from tensorflow.keras.layers import Activation
0030 from tensorflow.keras import backend as K
0031 from tensorflow.keras.models import save_model
0032 import tensorflow as tf
0033 from tensorflow import keras
0034 
0035 
0036 # In[37]:
0037 
0038 
0039 parser1 = argparse.ArgumentParser()
0040 parser1.add_argument("-I", "--input",help="input file",default="isotk_relval_lowinter1.pkl")
0041 parser1.add_argument("-V", "--modelv",help="model version (any number) ",default="2")
0042 
0043 args = parser1.parse_args(args=[])
0044 
0045 fName = args.input
0046 modv = args.modelv
0047 
0048 
0049 # In[38]:
0050 
0051 
0052 print ("file Name:", fName)
0053 print ("modv :", modv)
0054 
0055 
0056 # In[39]:
0057 
0058 
0059 df = pd.read_pickle(fName)
0060 print ("vars in file:",df.keys())
0061 print("events in df original:",df.shape[0])
0062 df = df.loc[df['t_eHcal_y'] > 20]
0063 print("events in df after energy cut:",df.shape[0])
0064 df['t_eHcal_xun'] = df['t_eHcal_x']
0065 df['t_delta_un'] = df['t_delta']
0066 df['t_ieta_un'] = df['t_ieta']
0067 
0068 
0069 # In[40]:
0070 
0071 
0072 mina = []
0073 maxa = []
0074 keya = []
0075 
0076 
0077 for i in df.keys():
0078     keya.append(i)
0079     mina.append(df[i].min())
0080     maxa.append(df[i].max())
0081 
0082 print('var = ',keya)
0083 print('mina = ',mina)
0084 print('maxa = ',maxa)
0085 
0086 
0087 # In[41]:
0088 
0089 
0090 cols_to_minmax = ['t_delta', 't_hmaxNearP','t_emaxNearP', 't_hAnnular', 't_eAnnular','t_pt','t_nVtx','t_ieta','t_eHcal10', 't_eHcal30','t_rhoh','t_eHcal_x']
0091 #df[cols_to_stand] = df[cols_to_stand].apply(lambda x: (x - x.mean()) /(x.std()))
0092 #df[cols_to_minmax] = df[cols_to_minmax].apply(lambda x: (x - x.mean()) /  (x.max() - x.min()))
0093 #                                            #(x.max() - x.min()))
0094 df[cols_to_minmax] = df[cols_to_minmax].apply(lambda x: (x - x.min()) /  (x.max() - x.min()))
0095 
0096 data = df.values
0097 print ('data shape:',data.shape)
0098 targets = data[:,12]
0099 targets.shape
0100 print ('targets shape:', targets.shape)
0101 print ("vars in file:",df.keys())
0102 
0103 
0104 # In[42]:
0105 
0106 
0107 data = df.values
0108 ntest = 20000
0109 testindx = data.shape[0] - ntest
0110 # data.shape[0]: 438118  is no of events 
0111 # data.shape[1] : 18 ==> columns 
0112 # testindx = 438118-20000
0113 print ('data shape:',data.shape[0])
0114 print ('testindx: ' ,testindx)
0115 # this :testindx = 438118-20000 = 418118
0116 X_train = data[:testindx,0:12]
0117 Y_train = data[:testindx,12]
0118 X_test = data[testindx:,:]
0119 print ("shape of X_train:",X_train.shape)
0120 print ("shape of Y_train:",Y_train.shape)
0121 print ("shape of X_test:",X_test.shape)
0122 meany = np.mean(Y_train)
0123 print ("mean y:",meany)
0124 stdy = np.std(Y_train)
0125 print ("std y:", stdy)
0126 
0127 
0128 # In[43]:
0129 
0130 
0131 ############################################# marinas correction  form
0132 a0 = [0.973, 0.998,  0.992,  0.965 ]
0133 a1 =  [0,    -0.318, -0.261, -0.406]
0134 a2 = [0,     0,      0.047,  0.089]
0135 def fac0(jeta):
0136     PU_IETA_1 = 9
0137     PU_IETA_2 = 16
0138     PU_IETA_3 = 25
0139     idx = (int(jeta >= PU_IETA_1) + int(jeta >= PU_IETA_2) + int(jeta >= PU_IETA_3))
0140     return a0[idx]
0141 def fac1(jeta):
0142     PU_IETA_1 = 9
0143     PU_IETA_2 = 16
0144     PU_IETA_3 = 25
0145     idx = (int(jeta >= PU_IETA_1) + int(jeta >= PU_IETA_2) + int(jeta >= PU_IETA_3))
0146     return a1[idx]
0147 def fac2(jeta):
0148     PU_IETA_1 = 9
0149     PU_IETA_2 = 16
0150     PU_IETA_3 = 25
0151     idx = (int(jeta >= PU_IETA_1) + int(jeta >= PU_IETA_2) + int(jeta >= PU_IETA_3))
0152     return a2[idx]
0153 
0154 
0155 # In[44]:
0156 
0157 
0158 vec0 = np.vectorize(fac0)
0159 vec1 = np.vectorize(fac1)
0160 vec2 = np.vectorize(fac2)
0161 
0162 X_test[:,17]
0163 eop = (X_test[:,15]/X_test[:,13])
0164 dop = (X_test[:,16]/X_test[:,13])
0165 print ("eop: ", eop)
0166 #mcorrval = vec0(abs(X_test[:,17])) + vec1(abs(X_test[:,17]))*(X_test[:,15]/X_test[:,13])*(X_test[:,16]/X_test[:,13])*( 1 + vec2(fac(abs(X_test[:,17])))*(X_test[:,16]/X_test[:,13]))
0167 
0168 mcorrval = vec0(abs(X_test[:,17]))   +  vec1(abs(X_test[:,17]))*eop*dop*( 1   +  vec2(abs(X_test[:,17]))*dop)
0169 
0170 
0171 # In[45]:
0172 
0173 
0174 def propweights(y_true):
0175     weight = np.copy(y_true)
0176     weight[abs(y_true - meany) > 0] = 1.90*abs(y_true - meany)/stdy  #1.25
0177 #    weight[abs(y_true - meany) > stdy] = 1.75*abs((weight[abs(y_true - meany) > stdy]) - meany)/(stdy)
0178     weight[abs(y_true - meany) < stdy] =  1
0179     print ("wieght : ", weight)
0180     return weight
0181 
0182 
0183 # In[46]:
0184 
0185 
0186 from keras import optimizers
0187 print ("creating model=========>")
0188 model = Sequential()
0189 model.add(Dense(128, input_shape=(X_train.shape[1],), activation='relu'))
0190 #model.add(Dropout(0.2))
0191 model.add(Dense(271, activation='relu',kernel_regularizer=regularizers.l2(0.01)))
0192 model.add(Dense(301, activation='relu',kernel_regularizer=regularizers.l2(0.01)))
0193 model.add(Dense(241, activation='relu',kernel_regularizer=regularizers.l2(0.01)))
0194 model.add(Dense(1))
0195 
0196 
0197 # In[47]:
0198 
0199 
0200 print ("compilation up next=======>")
0201 model.compile(loss='logcosh',optimizer='adam')
0202 model.summary()
0203 #print ("Y_train : ", Y_train)
0204 #fitting
0205 print ("fitting now=========>")
0206 history = model.fit(X_train,Y_train , batch_size=5000, epochs=50, validation_split=0.2, verbose=1,sample_weight=propweights(Y_train))
0207 
0208 
0209 # In[48]:
0210 
0211 
0212 plt.plot(history.history['loss'])
0213 plt.plot(history.history['val_loss'])
0214 plt.title('model loss')
0215 plt.ylabel('loss')
0216 plt.xlabel('epoch')
0217 plt.legend(['train', 'validation'], loc='upper left')
0218 plt.savefig(modv+'_loss_distributions_lowWinter.png')
0219 #plt.show() 
0220 plt.close()
0221 
0222 
0223 # In[49]:
0224 
0225 
0226 preds = model.predict(X_test[:,0:12])
0227 targets = X_test[:,12]
0228 uncorrected = X_test[:,15]
0229 marinascorr = X_test[:,15]*mcorrval
0230 plt.hist(preds, bins =100, range=(0,200),label='PU regression',alpha=0.6)
0231 plt.hist(targets, bins =100, range=(0,200),label='no PU',alpha=0.6)
0232 plt.hist(uncorrected, bins =100, range=(0,200),label='uncorrected',alpha=0.6)
0233 #plt.hist(marinascorr, bins =100, range=(0,200),label='marinas correction',alpha=0.6)
0234 plt.yscale('log')
0235 plt.title("Energy distribution")
0236 plt.legend(loc='upper right')
0237 plt.savefig(modv+'_energy_distributions_lowWinter.png')
0238 #plt.show() 
0239 plt.close()
0240 
0241 preds = preds.reshape(preds.shape[0])
0242 print (preds.shape)
0243 
0244 
0245 # In[50]:
0246 
0247 
0248 plt.hist(abs(uncorrected -targets)/targets*100, bins =100, range=(0,100),label='uncorrected',alpha=0.6)
0249 #plt.hist(abs(marinascorr -targets)/targets*100, bins =100, range=(0,100),label='marinas correction',alpha=0.6)
0250 plt.hist(100*abs(preds -targets)/targets, bins =100, range=(0,100),label='PU correction',alpha=0.6)
0251 #plt.yscale('log')
0252 plt.title("error distribution")
0253 plt.legend(loc='upper right')
0254 plt.savefig(modv+'_errors_low.png')
0255 #plt.show() 
0256 plt.close()
0257 
0258 
0259 # In[51]:
0260 
0261 
0262 plt.scatter(targets, uncorrected,alpha=0.3,label='uncorr')
0263 plt.scatter(targets, preds,alpha=0.3,label='PUcorr')
0264 plt.xlabel('True Values ')
0265 plt.ylabel('Predictions ')
0266 plt.legend(loc='upper right')
0267 lims = [0, 200]
0268 plt.xlim(lims)
0269 plt.ylim(lims)
0270 _ = plt.plot(lims, lims)
0271 plt.savefig(modv+'_energyscatt_lowWinter.png')
0272 #plt.show() 
0273 plt.close()
0274 
0275 
0276 # In[52]:
0277 
0278 
0279 pmom= X_test[:,13]
0280 #get_ipython().run_line_magic('matplotlib', 'inline')
0281 plt.hist(targets/pmom, bins =100, range=(0,5),label='E/p noPU',alpha=0.6)
0282 #plt.hist(preds/pmom, bins =100, range=(0,5),label='E/p PUcorr',alpha=0.6)
0283 #plt.hist(uncorrected/pmom, bins =100, range=(0,5),label='E/p uncorr',alpha=0.6)
0284 plt.hist(marinascorr/pmom, bins =100, range=(0,5),label='E/p marina corr',alpha=0.6)
0285 #plt.hist(np.exp(preds.reshape((preds.shape[0])))/pmom[0:n_test_events], bins =100, range=(0,5),label='corrEnp/p',alpha=0.3)
0286 #plt.hist(preds.reshape((preds.shape[0]))/pmom[0:n_test_events], bins =100, range=(0,5),label='corrEnp/p',alpha=0.3)
0287 plt.legend(loc='upper right')
0288 plt.yscale('log')
0289 plt.title("E/p distributions") 
0290 plt.savefig(modv+'_eopdist_lowWinter.png')
0291 #plt.show() 
0292 plt.close()
0293 
0294 
0295 # In[53]:
0296 
0297 
0298 import os
0299 ############## save model
0300 if not os.path.exists('models'):
0301     os.makedirs('models')
0302 model.save('models/model_BarrelWinter'+modv+'.h5')
0303 #new_model_2 = load_model('my_model.h5')
0304 
0305 
0306 # In[ ]:
0307 
0308 
0309 
0310