File indexing completed on 2024-04-06 12:24:22
0001
0002
0003
0004
0005
0006
0007
0008
0009 import os
0010 import sys
0011 from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
0012 import re
0013
0014 from FWCore.PythonUtilities.LumiList import LumiList
0015 import json
0016 from pprint import pprint
0017 from datetime import datetime
0018 import subprocess
0019 import Utilities.General.cmssw_das_client as das_client
0020 help = """
0021 How to use:
0022
0023 edmPickEvent.py dataset run1:lumi1:event1 run2:lumi2:event2
0024
0025 - or -
0026
0027 edmPickEvent.py dataset listOfEvents.txt
0028
0029 listOfEvents is a text file:
0030 # this line is ignored as a comment
0031 # since '#' is a valid comment character
0032 run1 lumi_section1 event1
0033 run2 lumi_section2 event2
0034
0035 For example:
0036 # run lum event
0037 46968 2 4
0038 47011 105 23
0039 47011 140 12312
0040
0041 run, lumi_section, and event are integers that you can get from
0042 edm::Event(Auxiliary)
0043
0044 dataset: it just a name of the physics dataset, if you don't know exact name
0045 you can provide a mask, e.g.: *QCD*RAW
0046
0047 For updated information see Wiki:
0048 https://twiki.cern.ch/twiki/bin/view/CMSPublic/WorkBookPickEvents
0049 """
0050
0051
0052
0053
0054
0055
0056 class Event (dict):
0057
0058 dataset = None
0059 splitRE = re.compile (r'[\s:,]+')
0060 def __init__ (self, line, **kwargs):
0061 pieces = Event.splitRE.split (line.strip())
0062 try:
0063 self['run'] = int( pieces[0] )
0064 self['lumi'] = int( pieces[1] )
0065 self['event'] = int( pieces[2] )
0066 self['dataset'] = Event.dataset
0067 except:
0068 raise RuntimeError("Can not parse '%s' as Event object" \
0069 % line.strip())
0070 if not self['dataset']:
0071 print("No dataset is defined for '%s'. Aborting." % line.strip())
0072 raise RuntimeError('Missing dataset')
0073
0074 def __getattr__ (self, key):
0075 return self[key]
0076
0077 def __str__ (self):
0078 return "run = %(run)i, lumi = %(lumi)i, event = %(event)i, dataset = %(dataset)s" % self
0079
0080
0081
0082
0083
0084
0085 def getFileNames(event, client=None):
0086 """Return files for given DAS query"""
0087 if client == 'das_client':
0088 return getFileNames_das_client(event)
0089 elif client == 'dasgoclient':
0090 return getFileNames_dasgoclient(event)
0091
0092 for path in os.getenv('PATH').split(':'):
0093 if os.path.isfile(os.path.join(path, 'dasgoclient')):
0094 return getFileNames_dasgoclient(event)
0095 return getFileNames_das_client(event)
0096
0097 def getFileNames_das_client(event):
0098 """Return files for given DAS query via das_client"""
0099 files = []
0100
0101 query = "file dataset=%(dataset)s run=%(run)i lumi=%(lumi)i | grep file.name" % event
0102 jsondict = das_client.get_data(query)
0103 status = jsondict['status']
0104 if status != 'ok':
0105 print("DAS query status: %s"%(status))
0106 return files
0107
0108 mongo_query = jsondict['mongo_query']
0109 filters = mongo_query['filters']
0110 data = jsondict['data']
0111
0112 files = []
0113 for row in data:
0114 file = [r for r in das_client.get_value(row, filters['grep'])][0]
0115 if len(file) > 0 and not file in files:
0116 files.append(file)
0117
0118 return files
0119
0120 def getFileNames_dasgoclient(event):
0121 """Return files for given DAS query via dasgoclient"""
0122 query = "file dataset=%(dataset)s run=%(run)i lumi=%(lumi)i" % event
0123 cmd = ['dasgoclient', '-query', query, '-json']
0124 proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
0125 files = []
0126 err = proc.stderr.read()
0127 if err:
0128 print("DAS error: %s" % err)
0129 print(proc.stdout.read())
0130 sys.exit(1)
0131 else:
0132 dasout = proc.stdout.read()
0133 try:
0134 for row in json.loads(dasout):
0135 for rec in row.get('file', []):
0136 fname = rec.get('name', '')
0137 if fname:
0138 files.append(fname)
0139 except:
0140 print(dasout)
0141 sys.exit(1)
0142 return files
0143
0144 def fullCPMpath():
0145 base = os.environ.get ('CMSSW_BASE')
0146 if not base:
0147 raise RuntimeError("CMSSW Environment not set")
0148 retval = "%s/src/PhysicsTools/Utilities/configuration/copyPickMerge_cfg.py" \
0149 % base
0150 if os.path.exists (retval):
0151 return retval
0152 base = os.environ.get ('CMSSW_RELEASE_BASE')
0153 retval = "%s/src/PhysicsTools/Utilities/configuration/copyPickMerge_cfg.py" \
0154 % base
0155 if os.path.exists (retval):
0156 return retval
0157 raise RuntimeError("Could not find copyPickMerge_cfg.py")
0158
0159 def guessEmail():
0160 return '%s@%s' % (subprocess.getoutput ('whoami'),
0161 '.'.join(subprocess.getoutput('hostname').split('.')[-2:]))
0162
0163 def setupCrabDict (options):
0164 date = datetime.now().strftime('%Y%m%d_%H%M%S')
0165 crab = {}
0166 base = options.base
0167 crab['runEvent'] = '%s_runEvents.txt' % base
0168 crab['copyPickMerge'] = fullCPMpath()
0169 crab['output'] = '%s.root' % base
0170 crab['crabcfg'] = '%s_crab.py' % base
0171 crab['json'] = '%s.json' % base
0172 crab['dataset'] = Event.dataset
0173 crab['email'] = options.email
0174 crab['WorkArea'] = date
0175 if options.crabCondor:
0176 crab['scheduler'] = 'condor'
0177
0178 else:
0179 crab['scheduler'] = 'remoteGlidein'
0180
0181 crab['useServer'] = ''
0182 return crab
0183
0184
0185 crabTemplate = '''
0186 ## Edited By Raman Khurana
0187 ##
0188 ## CRAB documentation : https://twiki.cern.ch/twiki/bin/view/CMSPublic/SWGuideCrab
0189 ##
0190 ## CRAB 3 parameters : https://twiki.cern.ch/twiki/bin/view/CMSPublic/CRAB3ConfigurationFile#CRAB_configuration_parameters
0191 ##
0192 ## Once you are happy with this file, please run
0193 ## crab submit
0194
0195 ## In CRAB3 the configuration file is in Python language. It consists of creating a Configuration object imported from the WMCore library:
0196
0197 from WMCore.Configuration import Configuration
0198 config = Configuration()
0199
0200 ## Once the Configuration object is created, it is possible to add new sections into it with corresponding parameters
0201 config.section_("General")
0202 config.General.requestName = 'pickEvents'
0203 config.General.workArea = 'crab_pickevents_%(WorkArea)s'
0204
0205
0206 config.section_("JobType")
0207 config.JobType.pluginName = 'Analysis'
0208 config.JobType.psetName = '%(copyPickMerge)s'
0209 config.JobType.pyCfgParams = ['eventsToProcess_load=%(runEvent)s', 'outputFile=%(output)s']
0210
0211 config.section_("Data")
0212 config.Data.inputDataset = '%(dataset)s'
0213
0214 config.Data.inputDBS = 'global'
0215 config.Data.splitting = 'LumiBased'
0216 config.Data.unitsPerJob = 5
0217 config.Data.lumiMask = '%(json)s'
0218 #config.Data.publication = True
0219 #config.Data.publishDbsUrl = 'phys03'
0220 #config.Data.publishDataName = 'CRAB3_CSA_DYJets'
0221 #config.JobType.allowNonProductionCMSSW=True
0222
0223 config.section_("Site")
0224 ## Change site name accordingly
0225 config.Site.storageSite = "T2_US_Wisconsin"
0226
0227 '''
0228
0229
0230
0231
0232
0233
0234
0235 if __name__ == "__main__":
0236 email = guessEmail()
0237 parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter, description='''This program
0238 facilitates picking specific events from a data set. For full details, please visit
0239 https://twiki.cern.ch/twiki/bin/view/CMSPublic/WorkBookPickEvents''')
0240 parser.add_argument('--output', dest='base', type=str,
0241 default='pickevents',
0242 help='Base name to use for output files (root, JSON, run and event list, etc.)")')
0243 parser.add_argument('--runInteractive', dest='runInteractive', action='store_true',
0244 help = 'Call "cmsRun" command if possible. Can take a long time.')
0245 parser.add_argument('--printInteractive', dest='printInteractive', action='store_true',
0246 help = 'Print "cmsRun" command instead of running it.')
0247 parser.add_argument('--maxEventsInteractive', dest='maxEventsInteractive', type=int,
0248 default=20,
0249 help = 'Maximum number of events allowed to be processed interactively.')
0250 parser.add_argument('--crab', dest='crab', action='store_true',
0251 help = 'Force CRAB setup instead of interactive mode')
0252 parser.add_argument('--crabCondor', dest='crabCondor', action='store_true',
0253 help = 'Tell CRAB to use Condor scheduler (FNAL or OSG sites).')
0254 parser.add_argument('--email', dest='email', type=str,
0255 default=email,
0256 help="Specify email for CRAB")
0257 das_cli = ''
0258 parser.add_argument('--das-client', dest='das_cli', type=str,
0259 default=das_cli,
0260 help="Specify das client to use")
0261 parser.add_argument("dataset", type=str)
0262 parser.add_argument("events", metavar="events_or_events.txt", type=str, nargs='+')
0263 options = parser.parse_args()
0264
0265 Event.dataset = options.dataset
0266 commentRE = re.compile (r'#.+$')
0267 colonRE = re.compile (r':')
0268 eventList = []
0269 if len (options.events) > 1 or colonRE.search (options.events[0]):
0270
0271 for piece in options.events:
0272 try:
0273 event = Event (piece)
0274 except:
0275 raise RuntimeError("'%s' is not a proper event" % piece)
0276 eventList.append (event)
0277 else:
0278
0279 source = open(options.events[0], 'r')
0280 for line in source:
0281 line = commentRE.sub ('', line)
0282 try:
0283 event = Event (line)
0284 except:
0285 print("Skipping '%s'." % line.strip())
0286 continue
0287 eventList.append(event)
0288 source.close()
0289
0290 if not eventList:
0291 print("No events defined. Aborting.")
0292 sys.exit()
0293
0294 if len (eventList) > options.maxEventsInteractive:
0295 options.crab = True
0296
0297 if options.crab:
0298
0299
0300
0301
0302 if options.runInteractive:
0303 raise RuntimeError("This job cannot be run interactively, but rather by crab. Please call without the '--runInteractive' flag or increase the '--maxEventsInteractive' value.")
0304 runsAndLumis = [ (event.run, event.lumi) for event in eventList]
0305 json = LumiList (lumis = runsAndLumis)
0306 eventsToProcess = '\n'.join(\
0307 sorted( [ "%d:%d" % (event.run, event.event) for event in eventList ] ) )
0308 crabDict = setupCrabDict (options)
0309 json.writeJSON (crabDict['json'])
0310 target = open (crabDict['runEvent'], 'w')
0311 target.write ("%s\n" % eventsToProcess)
0312 target.close()
0313 target = open (crabDict['crabcfg'], 'w')
0314 target.write (crabTemplate % crabDict)
0315 target.close
0316 print("Please visit CRAB twiki for instructions on how to setup environment for CRAB:\nhttps://twiki.cern.ch/twiki/bin/viewauth/CMS/SWGuideCrab\n")
0317 if options.crabCondor:
0318 print("You are running on condor. Please make sure you have read instructions on\nhttps://twiki.cern.ch/twiki/bin/view/CMS/CRABonLPCCAF\n")
0319 if not os.path.exists ('%s/.profile' % os.environ.get('HOME')):
0320 print("** WARNING: ** You are missing ~/.profile file. Please see CRABonLPCCAF instructions above.\n")
0321 print("Setup your environment for CRAB and edit %(crabcfg)s to make any desired changed. Then run:\n\ncrab submit -c %(crabcfg)s\n" % crabDict)
0322
0323 else:
0324
0325
0326
0327
0328 files = []
0329 eventPurgeList = []
0330 for event in eventList:
0331 eventFiles = getFileNames(event, options.das_cli)
0332 if eventFiles == ['[]']:
0333 print("** WARNING: ** According to a DAS query, run = %i; lumi = %i; event = %i not contained in %s. Skipping."%(event.run,event.lumi,event.event,event.dataset))
0334 eventPurgeList.append( event )
0335 else:
0336 files.extend( eventFiles )
0337
0338 for event in eventPurgeList:
0339 eventList.remove( event )
0340
0341 fileSet = set()
0342 uniqueFiles = []
0343 for filename in files:
0344 if filename in fileSet:
0345 continue
0346 fileSet.add (filename)
0347 uniqueFiles.append (filename)
0348 source = ','.join (uniqueFiles) + '\n'
0349 eventsToProcess = ','.join(\
0350 sorted( [ "%d:%d" % (event.run, event.event) for event in eventList ] ) )
0351 command = 'edmCopyPickMerge outputFile=%s.root \\\n eventsToProcess=%s \\\n inputFiles=%s' \
0352 % (options.base, eventsToProcess, source)
0353 print("\n%s" % command)
0354 if options.runInteractive and not options.printInteractive:
0355 os.system (command)
0356