File indexing completed on 2023-03-17 11:09:55
0001
0002
0003 from __future__ import print_function
0004 import sys
0005 import os
0006
0007 """
0008 arguments [<list-of-processes>]
0009 description:
0010 creates crab.cfg, multicrab.cfg, harvest_*.py
0011 if dbs is set:
0012 prints number of events found in dataset
0013 if no argument is provided looks for all available datsets for release
0014 user can edit multicrab and confirm process list as needed
0015 nuno@cern.ch 09.04
0016 """
0017
0018 def print_def():
0019 print("Usage:", sys.argv[0], "[list_of_processes]")
0020 print("Examples:")
0021 print("harvestRelVal.py")
0022 print("harvestRelVal.py /RelValTTbar/CMSSW_3_1_0_pre4_STARTUP_30X_v1/GEN-SIM-RECO")
0023 print("harvestRelVal.py <dataset_list.txt>")
0024
0025 def check_dbs():
0026 if os.getenv('DBSCMD_HOME','NOTSET') == 'NOTSET' :
0027 return 0
0028 return 1
0029
0030 def check_nevts_dset(dset):
0031 if not is_dbs :
0032 return -1
0033 ntot=0
0034 for afile in api.listFiles(path=str(dset)):
0035 nevts = afile['NumberOfEvents']
0036 ntot += nevts
0037
0038 return ntot
0039
0040 def make_dqmname(s):
0041 return 'DQM_V0001_R000000001' + s.replace('/','__') + '.root'
0042
0043 def get_name_from_dsetpath(ds):
0044 fs = ds.split('/')
0045 fa = fs[1].replace('RelVal','')
0046 return fa
0047
0048 def get_cond_from_dsetpath(ds) :
0049 ca = ds.split('/')[2].replace(cmssw_ver+'_','').replace('IDEAL_','').replace('STARTUP_','').replace('_FastSim','')
0050 cb = ca[:ca.find('v')-1]
0051 if cb[0].find('3') == -1 or len(cb) > 3:
0052 print("problem extracting condition for", ds, " : ", cb, '(len:',len(cb),')')
0053 if cb.find('31X') != -1:
0054 cb = '31X'
0055 elif cb.find('30X') != -1:
0056 cb = '30X'
0057 else:
0058 print("skipping", cb)
0059 return 0
0060 print("condition found:", cb)
0061 else :
0062 print("good condition for", ds, " : ", cb, '(len:',len(cb),')')
0063 return cb
0064
0065
0066 def make_dbs_list(dbslf) :
0067 if not is_dbs :
0068 return
0069 flis = open(dbslf,'w')
0070 for ads in api.listDatasetPaths() :
0071 if ads.find('RelVal') != -1 \
0072 or ads.find(cmssw_ver) != -1 \
0073 or ads.find("/GEN-SIM") != -1 :
0074
0075 flis.write(ads + '\n')
0076 flis.close()
0077 print('Generated dataset list', dbslf, 'from dbs.')
0078
0079
0080
0081
0082
0083 def read_ds_file() :
0084 if not os.path.exists(dsfile) :
0085 print("problem reading file", dsfile)
0086 sys.exit(30)
0087 fin = open(dsfile,'r')
0088 for dset in fin.readlines():
0089 d = dset.replace('\n','')
0090 if d.find('#') == -1 :
0091 dsetpaths.append(d)
0092 else :
0093 print('skipping:', d)
0094 fin.close()
0095 print('Using data set list in ', dsfile)
0096
0097 def check_dset() :
0098
0099 for s in dsetpaths:
0100 if s.find(cmssw_ver) == -1 :
0101 dsetpaths.remove(s)
0102 print('Inconsistency found with datset and cmssw version (', cmssw_ver, ')' \
0103 ': \t ', s, ' has been removed.')
0104
0105 for s in dsetpaths[:]:
0106 cond = get_cond_from_dsetpath(s)
0107 if cond == 0 :
0108 dsetpaths.remove(s)
0109
0110 nSamples = len(dsetpaths)
0111 if nSamples == 0 :
0112 print("Empty input list, exit.")
0113 sys.exit(12)
0114 else :
0115 print('Processing', nSamples, 'data sets.')
0116
0117 nSampleEvts = list()
0118 for s in dsetpaths:
0119 nSampleEvts.append(check_nevts_dset(s))
0120 print('number of events per dataset:', nSampleEvts)
0121
0122 def find_dqmref(ds) :
0123 if not do_reference :
0124 return 'NONE'
0125 cp = cmssw_ver[-1:]
0126 ip = (int)(cp) - 1
0127 ref_ver = cmssw_ver.replace(cp,str(ip))
0128
0129 ref_dir = "/castor/cern.ch/user/n/nuno/relval/harvest/" + ref_ver + "/"
0130 ref_dsf = make_dqmname(ds.replace(cmssw_ver, ref_ver))
0131 gls = " | grep root | grep "
0132
0133 gls += ref_dsf[:-25]
0134 gls += "| awk '{print $9}' "
0135
0136 command = "rfcp " + ref_dir + "`rfdir " + ref_dir + gls + "` ."
0137
0138 os.system(command)
0139 tmpfile = "ref.txt"
0140 command = "ls -rtl *" + gls + " > " + tmpfile
0141
0142 os.system(command)
0143 the_ref = 'NONE'
0144 if os.path.exists(tmpfile) :
0145 fin = open(tmpfile,'r')
0146 ref = fin.readline().replace('\n','')
0147
0148 fin.close()
0149 if os.path.exists(ref) :
0150 the_ref = ref
0151 else :
0152 the_ref = 'NONE'
0153 print("Found reference file:", the_ref)
0154 return the_ref
0155
0156 def create_harvest(ds) :
0157 raw_cmsdriver = "cmsDriver.py harvest -s HARVESTING:validationHarvesting --mc --conditions FrontierConditions_GlobalTag,STARTUP_30X::All --harvesting AtJobEnd --no_exec -n -1"
0158 cmsdriver = raw_cmsdriver
0159 cond = get_cond_from_dsetpath(ds)
0160 if cond == 0 :
0161 print('unexpected problem with conditions')
0162 sys.exit(50)
0163 cmsdriver = cmsdriver.replace('30X',cond)
0164 fin_name="harvest_HARVESTING_STARTUP.py"
0165 if ds.find('IDEAL') != -1 :
0166 cmsdriver = cmsdriver.replace('STARTUP','IDEAL')
0167 fin_name = fin_name.replace('STARTUP','IDEAL')
0168 if ds.find('FastSim') != -1:
0169 cmsdriver = cmsdriver.replace('validationHarvesting','validationHarvestingFS')
0170 if ds.find('PileUp') != -1:
0171 cmsdriver = cmsdriver.replace('validationHarvesting','validationHarvestingPU')
0172
0173
0174 if os.path.exists(fin_name) :
0175 os.system("rm " + fin_name)
0176 print("executing cmsdriver command:\n\t", cmsdriver)
0177 os.system(cmsdriver)
0178 if not os.path.exists(fin_name) :
0179 print('problem with cmsdriver file name')
0180 sys.exit(40)
0181 os.system("touch " + fin_name)
0182 hf = make_harv_name(ds)
0183 os.system('mv ' + fin_name + " " + hf)
0184 out = open(hf, 'a')
0185 out.write("\n\n##additions to cmsDriver output \n")
0186 out.write("process.dqmSaver.workflow = '" + ds + "'\n")
0187 if is_dbs :
0188 out.write("process.source.fileNames = cms.untracked.vstring(\n")
0189 for afile in api.listFiles(path=ds):
0190 out.write(" '%s',\n" % afile['LogicalFileName'])
0191 out.write(")\n")
0192
0193 dqmref = find_dqmref(ds);
0194 if not dqmref == 'NONE' :
0195 out.write("process.dqmSaver.referenceHandling = 'all'\n")
0196
0197 out.close()
0198
0199 def create_mcrab(set, fcrab, fout):
0200 out = open(fout, 'w')
0201 out.write('[MULTICRAB]')
0202 out.write('\ncfg=' + fcrab)
0203 out.write('\n\n[COMMON]')
0204 nevt = -1
0205 njob = 1
0206 out.write('\nCMSSW.total_number_of_events=' + (str)(nevt) )
0207 out.write('\nCMSSW.number_of_jobs=' + (str)(njob) )
0208 for s in set:
0209 append_sample_mcrab(s, out)
0210 out.close()
0211
0212 def make_harv_name(dset) :
0213 return 'harvest_' + get_name_from_dsetpath(dset) + '.py'
0214
0215 def append_sample_mcrab(dsetp, fout):
0216 dqm = make_dqmname(dsetp)
0217 sample = get_name_from_dsetpath(dsetp)
0218 hf = make_harv_name(dsetp)
0219 if not os.path.exists(hf) :
0220 print('problem creating multicrab, file', hf, 'does not exist')
0221 sys.exit(17)
0222 fout.write('\n\n[' + sample + ']')
0223 fout.write('\nCMSSW.pset=' + hf)
0224 fout.write('\nCMSSW.datasetpath=' + dsetp)
0225 fout.write('\nCMSSW.output_file=' + dqm)
0226
0227 dqmref = find_dqmref(dsetp);
0228 if not dqmref == 'NONE' :
0229 fout.write('\nUSER.additional_input_files=' + dqmref)
0230
0231 def create_crab(ds) :
0232 dqmout = make_dqmname(ds)
0233 hf = make_harv_name(ds)
0234 out = open(f_crab, 'w')
0235 out.write(crab_block)
0236 out.write('\npset=' + hf)
0237 out.write('datasetpath=' + ds)
0238 out.write('\noutput_file=' + dqmout)
0239 out.close()
0240
0241 crab_block = """
0242 [CRAB]
0243 jobtype = cmssw
0244 scheduler = glite
0245
0246 [EDG]
0247 remove_default_blacklist=1
0248 rb = CERN
0249
0250 [USER]
0251 return_data = 1
0252 #copy_data = 1
0253 #storage_element=srm-cms.cern.ch
0254 #storage_path=/srm/managerv2?SFN=/castor/cern.ch
0255 #user_remote_dir=/user/n/nuno/test
0256 publish_data=0
0257 thresholdLevel=70
0258 eMail=nuno@cern.ch
0259
0260 [CMSSW]
0261 total_number_of_events=-1
0262 show_prod = 1
0263 number_of_jobs=1
0264 """
0265
0266
0267
0268 input_type = ''
0269 argin = ''
0270 dsfile = ''
0271 do_reference = False
0272 if len(sys.argv) > 2 :
0273 print_def()
0274 sys.exit(10)
0275 elif len(sys.argv) == 1 :
0276 print("Will search for available datasets.")
0277 input_type = 'none'
0278 elif len(sys.argv) == 2 :
0279 argin = sys.argv[1]
0280 if os.path.exists(argin) :
0281 dsfile = argin
0282
0283 input_type = 'file'
0284 elif argin.find('CMSSW') != -1 and argin.find('RelVal'):
0285 print('Using specified data set', argin)
0286 input_type = 'ds'
0287 else :
0288 print('Invalid argument: process list, dataset or file', \
0289 argin, 'does not exist.')
0290 sys.exit(11)
0291
0292
0293 is_dbs = check_dbs()
0294 if not is_dbs:
0295 print("dbs not set!")
0296 else:
0297 print("dbs home:", os.getenv('DBSCMD_HOME'))
0298 from DBSAPI.dbsApi import DbsApi
0299 from DBSAPI.dbsException import *
0300 from DBSAPI.dbsApiException import *
0301 from DBSAPI.dbsOptions import DbsOptionParser
0302 optManager = DbsOptionParser()
0303 (opts,args) = optManager.getOpt()
0304
0305 args={}
0306 args['url']= "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet"
0307 api = DbsApi(args)
0308
0309
0310 cmssw_ver = os.getenv('CMSSW_VERSION','NOTSET')
0311 if cmssw_ver == 'NOTSET' :
0312 print("""
0313 cmssw not set!
0314 example:
0315 scramv1 p CMSSW CMSSW_3_1_0_pre5
0316 cd CMSSW_3_1_0_pre5/src
0317 eval `scramv1 runtime -sh`
0318 cd -
0319 """)
0320 sys.exit(12)
0321 else :
0322 print("Using cmssw version:", cmssw_ver)
0323
0324
0325
0326 dsetpaths = list()
0327
0328 if input_type == 'none' :
0329 if not is_dbs :
0330 print("no dataset specified, and dbs isn't set...")
0331 print_def()
0332 sys.exit(13)
0333 else :
0334 dsfile = cmssw_ver + "_dbslist.txt"
0335 make_dbs_list(dsfile)
0336 read_ds_file()
0337 elif input_type == 'file' :
0338 read_ds_file()
0339 elif input_type == 'ds' :
0340 dsetpaths.append(argin)
0341
0342
0343
0344 check_dset()
0345
0346
0347 print('data sets:', dsetpaths)
0348 dslproc = open("dset_processed.txt", 'w')
0349 for s in dsetpaths :
0350 dslproc.write(s+'\n')
0351 dslproc.close()
0352
0353
0354
0355 create_harvest(dsetpaths[0])
0356
0357
0358 f_crab = 'crab.cfg'
0359 create_crab(dsetpaths[0])
0360
0361
0362 for s in dsetpaths:
0363 create_harvest(s)
0364
0365
0366 f_multi_crab = 'multicrab.cfg'
0367 create_mcrab(dsetpaths, f_crab, f_multi_crab)
0368
0369
0370
0371 harvfilelist = list()
0372 for s in dsetpaths:
0373 harvfilelist.append(make_harv_name(s))
0374
0375 print('\nCreated:\n\t %(pwd)s/%(cf)s \n\t %(pwd)s/%(mc)s' \
0376 % {'pwd' : os.environ["PWD"],'cf' : f_crab, 'mc' : f_multi_crab})
0377 print("\tIndividual harvest py's:\n\t", harvfilelist)
0378
0379 print("Done.")