Line Code
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139
#!/usr/bin/env python

import sys
import os

#Check arg,settings

if len(sys.argv) != 2 : 
    print("""
    Usage: create_harvesting_py.py  <dataset>
    example:
    create_harvesting_py.py \
     /RelValTTbar/CMSSW_3_1_0_pre4_STARTUP_30X_v1/GEN-SIM-RECO
    """)
    sys.exit(10) 
    
#Get data files of dataset to be processed
if os.getenv('DBSCMD_HOME','NOTSET') == 'NOTSET' :
    print("dbs not set!")
    sys.exit(11)

if os.getenv('CMSSW_VERSION','NOTSET') == 'NOTSET' :
    print("""
    cmssw not set!
    example:
      cmsrel CMSSW_3_1_0_pre4
      cd CMSSW_3_1_0_pre4/src
      eval `scramv1 runtime -sh`
      cd -
    """)
    sys.exit(12) 

dsetpath = sys.argv[1]

from DBSAPI.dbsApi import DbsApi
from DBSAPI.dbsException import *
from DBSAPI.dbsApiException import *
from DBSAPI.dbsOptions import DbsOptionParser

optManager  = DbsOptionParser()
(opts,args) = optManager.getOpt()
api = DbsApi(opts.__dict__)

print("dataset: ", dsetpath)
print("data files: ")
for afile in api.listFiles(path=dsetpath):
  print("  %s" % afile['LogicalFileName'])

#Determine number of events/processes
totnevts=0
for afile in api.listFiles(path=dsetpath):
  totnevts += afile['NumberOfEvents']
njobs = 1
nevtref = 9000
if totnevts > nevtref : njobs = (int) (totnevts / 9000)
print("Total # events: ", totnevts, \
      " to be executed in ", njobs, "processes")


#Run cmsDriver command
raw_cmsdriver = "cmsDriver.py harvest -s HARVESTING:validationHarvesting --mc  --conditions FrontierConditions_GlobalTag,STARTUP_30X::All --harvesting AtJobEnd --no_exec -n -1"

print("executing cmsdriver command:\n\t", raw_cmsdriver)

os.system( '`' + raw_cmsdriver + '`' )


#Open output py
fin_name="harvest_HARVESTING_STARTUP.py"
pyout_name = "harvest.py"
os.system("touch " + fin_name)
os.system('mv ' + fin_name + " " + pyout_name )
pyout = open(pyout_name, 'a')

#Added to py config: input, output file name, dqm settings
pyout.write("\n\n##additions to cmsDriver output \n")
pyout.write("process.dqmSaver.workflow = '" + dsetpath + "'\n")
pyout.write("process.source.fileNames = cms.untracked.vstring(\n")

for afile in api.listFiles(path=dsetpath):
    pyout.write("  '%s',\n" % afile['LogicalFileName'])

pyout.write(")")
pyout.close()


#Create crab conf

crab_block = """
[CRAB]
jobtype = cmssw
scheduler = glite
#server_name = 

[EDG]
remove_default_blacklist=1
rb = CERN

[USER]
return_data = 0
copy_data = 1
storage_element=srm-cms.cern.ch
storage_path=/srm/managerv2?SFN=/castor/cern.ch/
user_remote_dir=/user/n/nuno/relval/harvest/
publish_data=0
thresholdLevel=70
eMail=nuno@cern.ch

[CMSSW]
total_number_of_events=-1
show_prod = 1
"""

crab_name="crab.cfg"
os.system("touch " + crab_name)
os.system("mv " + crab_name + " " + crab_name + "_old")

crab_cfg = open(crab_name, 'w')
crab_cfg.write(crab_block)

rootfile = "DQM_V0001_R000000001" \
           + dsetpath.replace('/','__') \
           + ".root"

crab_cfg.write("number_of_jobs=" + str(njobs) + "\n")
crab_cfg.write("pset=" + pyout_name + "\n")
crab_cfg.write("output_file=" + rootfile + "\n")
crab_cfg.write("datasetpath=" + dsetpath + "\n")


crab_cfg.close()

#os.system("cat " + pyout_name)
#print "Created crab conf:\t", crab_name,"\n"

print('\n\nCreated:\n\t %(pwd)s/%(pf)s \n\t %(pwd)s/%(cf)s' \
      % {'pwd' : os.environ["PWD"], 'pf' : pyout_name, 'cf' : crab_name})

print("Done.")