File indexing completed on 2024-11-26 02:34:21
0001
0002
0003 import sys
0004 import os
0005
0006 """
0007 arguments [<list-of-processes>]
0008 description:
0009 creates crab.cfg, multicrab.cfg, harvest_*.py
0010 if dbs is set:
0011 prints number of events found in dataset
0012 if no argument is provided looks for all available datsets for release
0013 user can edit multicrab and confirm process list as needed
0014 nuno@cern.ch 09.04
0015 """
0016
0017 def print_def():
0018 print("Usage:", sys.argv[0], "[list_of_processes]")
0019 print("Examples:")
0020 print("harvestRelVal.py")
0021 print("harvestRelVal.py /RelValTTbar/CMSSW_3_1_0_pre4_STARTUP_30X_v1/GEN-SIM-RECO")
0022 print("harvestRelVal.py <dataset_list.txt>")
0023
0024 def check_dbs():
0025 if os.getenv('DBSCMD_HOME','NOTSET') == 'NOTSET' :
0026 return 0
0027 return 1
0028
0029 def check_nevts_dset(dset):
0030 if not is_dbs :
0031 return -1
0032 ntot=0
0033 for afile in api.listFiles(path=str(dset)):
0034 nevts = afile['NumberOfEvents']
0035 ntot += nevts
0036
0037 return ntot
0038
0039 def make_dqmname(s):
0040 return 'DQM_V0001_R000000001' + s.replace('/','__') + '.root'
0041
0042 def get_name_from_dsetpath(ds):
0043 fs = ds.split('/')
0044 fa = fs[1].replace('RelVal','')
0045 return fa
0046
0047 def get_cond_from_dsetpath(ds) :
0048 ca = ds.split('/')[2].replace(cmssw_ver+'_','').replace('IDEAL_','').replace('STARTUP_','').replace('_FastSim','')
0049 cb = ca[:ca.find('v')-1]
0050 if cb[0].find('3') == -1 or len(cb) > 3:
0051 print("problem extracting condition for", ds, " : ", cb, '(len:',len(cb),')')
0052 if cb.find('31X') != -1:
0053 cb = '31X'
0054 elif cb.find('30X') != -1:
0055 cb = '30X'
0056 else:
0057 print("skipping", cb)
0058 return 0
0059 print("condition found:", cb)
0060 else :
0061 print("good condition for", ds, " : ", cb, '(len:',len(cb),')')
0062 return cb
0063
0064
0065 def make_dbs_list(dbslf) :
0066 if not is_dbs :
0067 return
0068 flis = open(dbslf,'w')
0069 for ads in api.listDatasetPaths() :
0070 if ads.find('RelVal') != -1 \
0071 or ads.find(cmssw_ver) != -1 \
0072 or ads.find("/GEN-SIM") != -1 :
0073
0074 flis.write(ads + '\n')
0075 flis.close()
0076 print('Generated dataset list', dbslf, 'from dbs.')
0077
0078
0079
0080
0081
0082 def read_ds_file() :
0083 if not os.path.exists(dsfile) :
0084 print("problem reading file", dsfile)
0085 sys.exit(30)
0086 fin = open(dsfile,'r')
0087 for dset in fin.readlines():
0088 d = dset.replace('\n','')
0089 if d.find('#') == -1 :
0090 dsetpaths.append(d)
0091 else :
0092 print('skipping:', d)
0093 fin.close()
0094 print('Using data set list in ', dsfile)
0095
0096 def check_dset() :
0097
0098 for s in dsetpaths:
0099 if s.find(cmssw_ver) == -1 :
0100 dsetpaths.remove(s)
0101 print('Inconsistency found with datset and cmssw version (', cmssw_ver, ')' \
0102 ': \t ', s, ' has been removed.')
0103
0104 for s in dsetpaths[:]:
0105 cond = get_cond_from_dsetpath(s)
0106 if cond == 0 :
0107 dsetpaths.remove(s)
0108
0109 nSamples = len(dsetpaths)
0110 if nSamples == 0 :
0111 print("Empty input list, exit.")
0112 sys.exit(12)
0113 else :
0114 print('Processing', nSamples, 'data sets.')
0115
0116 nSampleEvts = list()
0117 for s in dsetpaths:
0118 nSampleEvts.append(check_nevts_dset(s))
0119 print('number of events per dataset:', nSampleEvts)
0120
0121 def find_dqmref(ds) :
0122 if not do_reference :
0123 return 'NONE'
0124 cp = cmssw_ver[-1:]
0125 ip = (int)(cp) - 1
0126 ref_ver = cmssw_ver.replace(cp,str(ip))
0127
0128 ref_dir = "/castor/cern.ch/user/n/nuno/relval/harvest/" + ref_ver + "/"
0129 ref_dsf = make_dqmname(ds.replace(cmssw_ver, ref_ver))
0130 gls = " | grep root | grep "
0131
0132 gls += ref_dsf[:-25]
0133 gls += "| awk '{print $9}' "
0134
0135 command = "rfcp " + ref_dir + "`rfdir " + ref_dir + gls + "` ."
0136
0137 os.system(command)
0138 tmpfile = "ref.txt"
0139 command = "ls -rtl *" + gls + " > " + tmpfile
0140
0141 os.system(command)
0142 the_ref = 'NONE'
0143 if os.path.exists(tmpfile) :
0144 fin = open(tmpfile,'r')
0145 ref = fin.readline().replace('\n','')
0146
0147 fin.close()
0148 if os.path.exists(ref) :
0149 the_ref = ref
0150 else :
0151 the_ref = 'NONE'
0152 print("Found reference file:", the_ref)
0153 return the_ref
0154
0155 def create_harvest(ds) :
0156 raw_cmsdriver = "cmsDriver.py harvest -s HARVESTING:validationHarvesting --mc --conditions FrontierConditions_GlobalTag,STARTUP_30X::All --harvesting AtJobEnd --no_exec -n -1"
0157 cmsdriver = raw_cmsdriver
0158 cond = get_cond_from_dsetpath(ds)
0159 if cond == 0 :
0160 print('unexpected problem with conditions')
0161 sys.exit(50)
0162 cmsdriver = cmsdriver.replace('30X',cond)
0163 fin_name="harvest_HARVESTING_STARTUP.py"
0164 if ds.find('IDEAL') != -1 :
0165 cmsdriver = cmsdriver.replace('STARTUP','IDEAL')
0166 fin_name = fin_name.replace('STARTUP','IDEAL')
0167 if ds.find('FastSim') != -1:
0168 cmsdriver = cmsdriver.replace('validationHarvesting','validationHarvestingFS')
0169 if ds.find('PileUp') != -1:
0170 cmsdriver = cmsdriver.replace('validationHarvesting','validationHarvestingPU')
0171
0172
0173 if os.path.exists(fin_name) :
0174 os.system("rm " + fin_name)
0175 print("executing cmsdriver command:\n\t", cmsdriver)
0176 os.system(cmsdriver)
0177 if not os.path.exists(fin_name) :
0178 print('problem with cmsdriver file name')
0179 sys.exit(40)
0180 os.system("touch " + fin_name)
0181 hf = make_harv_name(ds)
0182 os.system('mv ' + fin_name + " " + hf)
0183 out = open(hf, 'a')
0184 out.write("\n\n##additions to cmsDriver output \n")
0185 out.write("process.dqmSaver.workflow = '" + ds + "'\n")
0186 if is_dbs :
0187 out.write("process.source.fileNames = cms.untracked.vstring(\n")
0188 for afile in api.listFiles(path=ds):
0189 out.write(" '%s',\n" % afile['LogicalFileName'])
0190 out.write(")\n")
0191
0192 dqmref = find_dqmref(ds);
0193 if not dqmref == 'NONE' :
0194 out.write("process.dqmSaver.referenceHandling = 'all'\n")
0195
0196 out.close()
0197
0198 def create_mcrab(set, fcrab, fout):
0199 out = open(fout, 'w')
0200 out.write('[MULTICRAB]')
0201 out.write('\ncfg=' + fcrab)
0202 out.write('\n\n[COMMON]')
0203 nevt = -1
0204 njob = 1
0205 out.write('\nCMSSW.total_number_of_events=' + (str)(nevt) )
0206 out.write('\nCMSSW.number_of_jobs=' + (str)(njob) )
0207 for s in set:
0208 append_sample_mcrab(s, out)
0209 out.close()
0210
0211 def make_harv_name(dset) :
0212 return 'harvest_' + get_name_from_dsetpath(dset) + '.py'
0213
0214 def append_sample_mcrab(dsetp, fout):
0215 dqm = make_dqmname(dsetp)
0216 sample = get_name_from_dsetpath(dsetp)
0217 hf = make_harv_name(dsetp)
0218 if not os.path.exists(hf) :
0219 print('problem creating multicrab, file', hf, 'does not exist')
0220 sys.exit(17)
0221 fout.write('\n\n[' + sample + ']')
0222 fout.write('\nCMSSW.pset=' + hf)
0223 fout.write('\nCMSSW.datasetpath=' + dsetp)
0224 fout.write('\nCMSSW.output_file=' + dqm)
0225
0226 dqmref = find_dqmref(dsetp);
0227 if not dqmref == 'NONE' :
0228 fout.write('\nUSER.additional_input_files=' + dqmref)
0229
0230 def create_crab(ds) :
0231 dqmout = make_dqmname(ds)
0232 hf = make_harv_name(ds)
0233 out = open(f_crab, 'w')
0234 out.write(crab_block)
0235 out.write('\npset=' + hf)
0236 out.write('datasetpath=' + ds)
0237 out.write('\noutput_file=' + dqmout)
0238 out.close()
0239
0240 crab_block = """
0241 [CRAB]
0242 jobtype = cmssw
0243 scheduler = glite
0244
0245 [EDG]
0246 remove_default_blacklist=1
0247 rb = CERN
0248
0249 [USER]
0250 return_data = 1
0251 #copy_data = 1
0252 #storage_element=srm-cms.cern.ch
0253 #storage_path=/srm/managerv2?SFN=/castor/cern.ch
0254 #user_remote_dir=/user/n/nuno/test
0255 publish_data=0
0256 thresholdLevel=70
0257 eMail=nuno@cern.ch
0258
0259 [CMSSW]
0260 total_number_of_events=-1
0261 show_prod = 1
0262 number_of_jobs=1
0263 """
0264
0265
0266
0267 input_type = ''
0268 argin = ''
0269 dsfile = ''
0270 do_reference = False
0271 if len(sys.argv) > 2 :
0272 print_def()
0273 sys.exit(10)
0274 elif len(sys.argv) == 1 :
0275 print("Will search for available datasets.")
0276 input_type = 'none'
0277 elif len(sys.argv) == 2 :
0278 argin = sys.argv[1]
0279 if os.path.exists(argin) :
0280 dsfile = argin
0281
0282 input_type = 'file'
0283 elif argin.find('CMSSW') != -1 and argin.find('RelVal'):
0284 print('Using specified data set', argin)
0285 input_type = 'ds'
0286 else :
0287 print('Invalid argument: process list, dataset or file', \
0288 argin, 'does not exist.')
0289 sys.exit(11)
0290
0291
0292 is_dbs = check_dbs()
0293 if not is_dbs:
0294 print("dbs not set!")
0295 else:
0296 print("dbs home:", os.getenv('DBSCMD_HOME'))
0297 from DBSAPI.dbsApi import DbsApi
0298 from DBSAPI.dbsException import *
0299 from DBSAPI.dbsApiException import *
0300 from DBSAPI.dbsOptions import DbsOptionParser
0301 optManager = DbsOptionParser()
0302 (opts,args) = optManager.getOpt()
0303
0304 args={}
0305 args['url']= "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet"
0306 api = DbsApi(args)
0307
0308
0309 cmssw_ver = os.getenv('CMSSW_VERSION','NOTSET')
0310 if cmssw_ver == 'NOTSET' :
0311 print("""
0312 cmssw not set!
0313 example:
0314 scramv1 p CMSSW CMSSW_3_1_0_pre5
0315 cd CMSSW_3_1_0_pre5/src
0316 eval `scramv1 runtime -sh`
0317 cd -
0318 """)
0319 sys.exit(12)
0320 else :
0321 print("Using cmssw version:", cmssw_ver)
0322
0323
0324
0325 dsetpaths = list()
0326
0327 if input_type == 'none' :
0328 if not is_dbs :
0329 print("no dataset specified, and dbs isn't set...")
0330 print_def()
0331 sys.exit(13)
0332 else :
0333 dsfile = cmssw_ver + "_dbslist.txt"
0334 make_dbs_list(dsfile)
0335 read_ds_file()
0336 elif input_type == 'file' :
0337 read_ds_file()
0338 elif input_type == 'ds' :
0339 dsetpaths.append(argin)
0340
0341
0342
0343 check_dset()
0344
0345
0346 print('data sets:', dsetpaths)
0347 dslproc = open("dset_processed.txt", 'w')
0348 for s in dsetpaths :
0349 dslproc.write(s+'\n')
0350 dslproc.close()
0351
0352
0353
0354 create_harvest(dsetpaths[0])
0355
0356
0357 f_crab = 'crab.cfg'
0358 create_crab(dsetpaths[0])
0359
0360
0361 for s in dsetpaths:
0362 create_harvest(s)
0363
0364
0365 f_multi_crab = 'multicrab.cfg'
0366 create_mcrab(dsetpaths, f_crab, f_multi_crab)
0367
0368
0369
0370 harvfilelist = list()
0371 for s in dsetpaths:
0372 harvfilelist.append(make_harv_name(s))
0373
0374 print('\nCreated:\n\t %(pwd)s/%(cf)s \n\t %(pwd)s/%(mc)s' \
0375 % {'pwd' : os.environ["PWD"],'cf' : f_crab, 'mc' : f_multi_crab})
0376 print("\tIndividual harvest py's:\n\t", harvfilelist)
0377
0378 print("Done.")