File indexing completed on 2023-03-17 11:16:58
0001
0002
0003
0004
0005
0006
0007
0008
0009 from __future__ import print_function
0010 import os
0011 import sys
0012 import optparse
0013 import re
0014
0015 from FWCore.PythonUtilities.LumiList import LumiList
0016 import json
0017 from pprint import pprint
0018 from datetime import datetime
0019 import subprocess
0020 import Utilities.General.cmssw_das_client as das_client
0021 help = """
0022 How to use:
0023
0024 edmPickEvent.py dataset run1:lumi1:event1 run2:lumi2:event2
0025
0026 - or -
0027
0028 edmPickEvent.py dataset listOfEvents.txt
0029
0030 listOfEvents is a text file:
0031 # this line is ignored as a comment
0032 # since '#' is a valid comment character
0033 run1 lumi_section1 event1
0034 run2 lumi_section2 event2
0035
0036 For example:
0037 # run lum event
0038 46968 2 4
0039 47011 105 23
0040 47011 140 12312
0041
0042 run, lumi_section, and event are integers that you can get from
0043 edm::Event(Auxiliary)
0044
0045 dataset: it just a name of the physics dataset, if you don't know exact name
0046 you can provide a mask, e.g.: *QCD*RAW
0047
0048 For updated information see Wiki:
0049 https://twiki.cern.ch/twiki/bin/view/CMSPublic/WorkBookPickEvents
0050 """
0051
0052
0053
0054
0055
0056
0057 class Event (dict):
0058
0059 dataset = None
0060 splitRE = re.compile (r'[\s:,]+')
0061 def __init__ (self, line, **kwargs):
0062 pieces = Event.splitRE.split (line.strip())
0063 try:
0064 self['run'] = int( pieces[0] )
0065 self['lumi'] = int( pieces[1] )
0066 self['event'] = int( pieces[2] )
0067 self['dataset'] = Event.dataset
0068 except:
0069 raise RuntimeError("Can not parse '%s' as Event object" \
0070 % line.strip())
0071 if not self['dataset']:
0072 print("No dataset is defined for '%s'. Aborting." % line.strip())
0073 raise RuntimeError('Missing dataset')
0074
0075 def __getattr__ (self, key):
0076 return self[key]
0077
0078 def __str__ (self):
0079 return "run = %(run)i, lumi = %(lumi)i, event = %(event)i, dataset = %(dataset)s" % self
0080
0081
0082
0083
0084
0085
0086 def getFileNames(event, client=None):
0087 """Return files for given DAS query"""
0088 if client == 'das_client':
0089 return getFileNames_das_client(event)
0090 elif client == 'dasgoclient':
0091 return getFileNames_dasgoclient(event)
0092
0093 for path in os.getenv('PATH').split(':'):
0094 if os.path.isfile(os.path.join(path, 'dasgoclient')):
0095 return getFileNames_dasgoclient(event)
0096 return getFileNames_das_client(event)
0097
0098 def getFileNames_das_client(event):
0099 """Return files for given DAS query via das_client"""
0100 files = []
0101
0102 query = "file dataset=%(dataset)s run=%(run)i lumi=%(lumi)i | grep file.name" % event
0103 jsondict = das_client.get_data(query)
0104 status = jsondict['status']
0105 if status != 'ok':
0106 print("DAS query status: %s"%(status))
0107 return files
0108
0109 mongo_query = jsondict['mongo_query']
0110 filters = mongo_query['filters']
0111 data = jsondict['data']
0112
0113 files = []
0114 for row in data:
0115 file = [r for r in das_client.get_value(row, filters['grep'])][0]
0116 if len(file) > 0 and not file in files:
0117 files.append(file)
0118
0119 return files
0120
0121 def getFileNames_dasgoclient(event):
0122 """Return files for given DAS query via dasgoclient"""
0123 query = "file dataset=%(dataset)s run=%(run)i lumi=%(lumi)i" % event
0124 cmd = ['dasgoclient', '-query', query, '-json']
0125 proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
0126 files = []
0127 err = proc.stderr.read()
0128 if err:
0129 print("DAS error: %s" % err)
0130 print(proc.stdout.read())
0131 sys.exit(1)
0132 else:
0133 dasout = proc.stdout.read()
0134 try:
0135 for row in json.loads(dasout):
0136 for rec in row.get('file', []):
0137 fname = rec.get('name', '')
0138 if fname:
0139 files.append(fname)
0140 except:
0141 print(dasout)
0142 sys.exit(1)
0143 return files
0144
0145 def fullCPMpath():
0146 base = os.environ.get ('CMSSW_BASE')
0147 if not base:
0148 raise RuntimeError("CMSSW Environment not set")
0149 retval = "%s/src/PhysicsTools/Utilities/configuration/copyPickMerge_cfg.py" \
0150 % base
0151 if os.path.exists (retval):
0152 return retval
0153 base = os.environ.get ('CMSSW_RELEASE_BASE')
0154 retval = "%s/src/PhysicsTools/Utilities/configuration/copyPickMerge_cfg.py" \
0155 % base
0156 if os.path.exists (retval):
0157 return retval
0158 raise RuntimeError("Could not find copyPickMerge_cfg.py")
0159
0160 def guessEmail():
0161 return '%s@%s' % (subprocess.getoutput ('whoami'),
0162 '.'.join(subprocess.getoutput('hostname').split('.')[-2:]))
0163
0164 def setupCrabDict (options):
0165 date = datetime.now().strftime('%Y%m%d_%H%M%S')
0166 crab = {}
0167 base = options.base
0168 crab['runEvent'] = '%s_runEvents.txt' % base
0169 crab['copyPickMerge'] = fullCPMpath()
0170 crab['output'] = '%s.root' % base
0171 crab['crabcfg'] = '%s_crab.py' % base
0172 crab['json'] = '%s.json' % base
0173 crab['dataset'] = Event.dataset
0174 crab['email'] = options.email
0175 crab['WorkArea'] = date
0176 if options.crabCondor:
0177 crab['scheduler'] = 'condor'
0178
0179 else:
0180 crab['scheduler'] = 'remoteGlidein'
0181
0182 crab['useServer'] = ''
0183 return crab
0184
0185
0186 crabTemplate = '''
0187 ## Edited By Raman Khurana
0188 ##
0189 ## CRAB documentation : https://twiki.cern.ch/twiki/bin/view/CMSPublic/SWGuideCrab
0190 ##
0191 ## CRAB 3 parameters : https://twiki.cern.ch/twiki/bin/view/CMSPublic/CRAB3ConfigurationFile#CRAB_configuration_parameters
0192 ##
0193 ## Once you are happy with this file, please run
0194 ## crab submit
0195
0196 ## In CRAB3 the configuration file is in Python language. It consists of creating a Configuration object imported from the WMCore library:
0197
0198 from WMCore.Configuration import Configuration
0199 config = Configuration()
0200
0201 ## Once the Configuration object is created, it is possible to add new sections into it with corresponding parameters
0202 config.section_("General")
0203 config.General.requestName = 'pickEvents'
0204 config.General.workArea = 'crab_pickevents_%(WorkArea)s'
0205
0206
0207 config.section_("JobType")
0208 config.JobType.pluginName = 'Analysis'
0209 config.JobType.psetName = '%(copyPickMerge)s'
0210 config.JobType.pyCfgParams = ['eventsToProcess_load=%(runEvent)s', 'outputFile=%(output)s']
0211
0212 config.section_("Data")
0213 config.Data.inputDataset = '%(dataset)s'
0214
0215 config.Data.inputDBS = 'global'
0216 config.Data.splitting = 'LumiBased'
0217 config.Data.unitsPerJob = 5
0218 config.Data.lumiMask = '%(json)s'
0219 #config.Data.publication = True
0220 #config.Data.publishDbsUrl = 'phys03'
0221 #config.Data.publishDataName = 'CRAB3_CSA_DYJets'
0222 #config.JobType.allowNonProductionCMSSW=True
0223
0224 config.section_("Site")
0225 ## Change site name accordingly
0226 config.Site.storageSite = "T2_US_Wisconsin"
0227
0228 '''
0229
0230
0231
0232
0233
0234
0235
0236 if __name__ == "__main__":
0237 email = guessEmail()
0238 parser = optparse.OptionParser ("Usage: %prog [options] dataset events_or_events.txt", description='''This program
0239 facilitates picking specific events from a data set. For full details, please visit
0240 https://twiki.cern.ch/twiki/bin/view/CMSPublic/WorkBookPickEvents ''')
0241 parser.add_option ('--output', dest='base', type='string',
0242 default='pickevents',
0243 help='Base name to use for output files (root, JSON, run and event list, etc.; default "%default")')
0244 parser.add_option ('--runInteractive', dest='runInteractive', action='store_true',
0245 help = 'Call "cmsRun" command if possible. Can take a long time.')
0246 parser.add_option ('--printInteractive', dest='printInteractive', action='store_true',
0247 help = 'Print "cmsRun" command instead of running it.')
0248 parser.add_option ('--maxEventsInteractive', dest='maxEventsInteractive', type='int',
0249 default=20,
0250 help = 'Maximum number of events allowed to be processed interactively.')
0251 parser.add_option ('--crab', dest='crab', action='store_true',
0252 help = 'Force CRAB setup instead of interactive mode')
0253 parser.add_option ('--crabCondor', dest='crabCondor', action='store_true',
0254 help = 'Tell CRAB to use Condor scheduler (FNAL or OSG sites).')
0255 parser.add_option ('--email', dest='email', type='string',
0256 default='',
0257 help="Specify email for CRAB (default '%s')" % email )
0258 das_cli = ''
0259 parser.add_option ('--das-client', dest='das_cli', type='string',
0260 default=das_cli,
0261 help="Specify das client to use (default '%s')" % das_cli )
0262 (options, args) = parser.parse_args()
0263
0264
0265 if len(args) < 2:
0266 parser.print_help()
0267 sys.exit(0)
0268
0269 if not options.email:
0270 options.email = email
0271
0272 Event.dataset = args.pop(0)
0273 commentRE = re.compile (r'#.+$')
0274 colonRE = re.compile (r':')
0275 eventList = []
0276 if len (args) > 1 or colonRE.search (args[0]):
0277
0278 for piece in args:
0279 try:
0280 event = Event (piece)
0281 except:
0282 raise RuntimeError("'%s' is not a proper event" % piece)
0283 eventList.append (event)
0284 else:
0285
0286 source = open(args[0], 'r')
0287 for line in source:
0288 line = commentRE.sub ('', line)
0289 try:
0290 event = Event (line)
0291 except:
0292 print("Skipping '%s'." % line.strip())
0293 continue
0294 eventList.append(event)
0295 source.close()
0296
0297 if not eventList:
0298 print("No events defined. Aborting.")
0299 sys.exit()
0300
0301 if len (eventList) > options.maxEventsInteractive:
0302 options.crab = True
0303
0304 if options.crab:
0305
0306
0307
0308
0309 if options.runInteractive:
0310 raise RuntimeError("This job cannot be run interactively, but rather by crab. Please call without the '--runInteractive' flag or increase the '--maxEventsInteractive' value.")
0311 runsAndLumis = [ (event.run, event.lumi) for event in eventList]
0312 json = LumiList (lumis = runsAndLumis)
0313 eventsToProcess = '\n'.join(\
0314 sorted( [ "%d:%d" % (event.run, event.event) for event in eventList ] ) )
0315 crabDict = setupCrabDict (options)
0316 json.writeJSON (crabDict['json'])
0317 target = open (crabDict['runEvent'], 'w')
0318 target.write ("%s\n" % eventsToProcess)
0319 target.close()
0320 target = open (crabDict['crabcfg'], 'w')
0321 target.write (crabTemplate % crabDict)
0322 target.close
0323 print("Please visit CRAB twiki for instructions on how to setup environment for CRAB:\nhttps://twiki.cern.ch/twiki/bin/viewauth/CMS/SWGuideCrab\n")
0324 if options.crabCondor:
0325 print("You are running on condor. Please make sure you have read instructions on\nhttps://twiki.cern.ch/twiki/bin/view/CMS/CRABonLPCCAF\n")
0326 if not os.path.exists ('%s/.profile' % os.environ.get('HOME')):
0327 print("** WARNING: ** You are missing ~/.profile file. Please see CRABonLPCCAF instructions above.\n")
0328 print("Setup your environment for CRAB and edit %(crabcfg)s to make any desired changed. Then run:\n\ncrab submit -c %(crabcfg)s\n" % crabDict)
0329
0330 else:
0331
0332
0333
0334
0335 files = []
0336 eventPurgeList = []
0337 for event in eventList:
0338 eventFiles = getFileNames(event, options.das_cli)
0339 if eventFiles == ['[]']:
0340 print("** WARNING: ** According to a DAS query, run = %i; lumi = %i; event = %i not contained in %s. Skipping."%(event.run,event.lumi,event.event,event.dataset))
0341 eventPurgeList.append( event )
0342 else:
0343 files.extend( eventFiles )
0344
0345 for event in eventPurgeList:
0346 eventList.remove( event )
0347
0348 fileSet = set()
0349 uniqueFiles = []
0350 for filename in files:
0351 if filename in fileSet:
0352 continue
0353 fileSet.add (filename)
0354 uniqueFiles.append (filename)
0355 source = ','.join (uniqueFiles) + '\n'
0356 eventsToProcess = ','.join(\
0357 sorted( [ "%d:%d" % (event.run, event.event) for event in eventList ] ) )
0358 command = 'edmCopyPickMerge outputFile=%s.root \\\n eventsToProcess=%s \\\n inputFiles=%s' \
0359 % (options.base, eventsToProcess, source)
0360 print("\n%s" % command)
0361 if options.runInteractive and not options.printInteractive:
0362 os.system (command)
0363