Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2023-03-17 11:16:58

0001 #!/usr/bin/env python3
0002 
0003 # Anzar Afaq         June 17, 2008
0004 # Oleksiy Atramentov June 21, 2008
0005 # Charles Plager     Sept  7, 2010
0006 # Volker Adler       Apr  16, 2014
0007 # Raman Khurana      June 18, 2015
0008 # Dinko Ferencek     June 27, 2015
0009 from __future__ import print_function
0010 import os
0011 import sys
0012 import optparse
0013 import re
0014 
0015 from FWCore.PythonUtilities.LumiList   import LumiList
0016 import json
0017 from pprint import pprint
0018 from datetime import datetime
0019 import subprocess
0020 import Utilities.General.cmssw_das_client as das_client
0021 help = """
0022 How to use:
0023 
0024 edmPickEvent.py dataset run1:lumi1:event1 run2:lumi2:event2
0025 
0026 - or -
0027 
0028 edmPickEvent.py dataset listOfEvents.txt
0029 
0030 listOfEvents is a text file:
0031 # this line is ignored as a comment
0032 # since '#' is a valid comment character
0033 run1 lumi_section1 event1
0034 run2 lumi_section2 event2
0035 
0036 For example:
0037 # run lum   event
0038 46968   2      4
0039 47011 105     23
0040 47011 140  12312
0041 
0042 run, lumi_section, and event are integers that you can get from
0043 edm::Event(Auxiliary)
0044 
0045 dataset: it just a name of the physics dataset, if you don't know exact name
0046     you can provide a mask, e.g.: *QCD*RAW
0047 
0048 For updated information see Wiki:
0049 https://twiki.cern.ch/twiki/bin/view/CMSPublic/WorkBookPickEvents
0050 """
0051 
0052 
0053 ########################
0054 ## Event helper class ##
0055 ########################
0056 
0057 class Event (dict):
0058 
0059     dataset = None
0060     splitRE = re.compile (r'[\s:,]+')
0061     def __init__ (self, line, **kwargs):
0062         pieces = Event.splitRE.split (line.strip())
0063         try:
0064             self['run']     = int( pieces[0] )
0065             self['lumi']    = int( pieces[1] )
0066             self['event']   = int( pieces[2] )
0067             self['dataset'] =  Event.dataset
0068         except:
0069             raise RuntimeError("Can not parse '%s' as Event object" \
0070                   % line.strip())
0071         if not self['dataset']:
0072             print("No dataset is defined for '%s'.  Aborting." % line.strip())
0073             raise RuntimeError('Missing dataset')
0074 
0075     def __getattr__ (self, key):
0076         return self[key]
0077 
0078     def __str__ (self):
0079         return "run = %(run)i, lumi = %(lumi)i, event = %(event)i, dataset = %(dataset)s"  % self
0080 
0081 
0082 #################
0083 ## Subroutines ##
0084 #################
0085 
0086 def getFileNames(event, client=None):
0087     """Return files for given DAS query"""
0088     if  client == 'das_client':
0089         return getFileNames_das_client(event)
0090     elif client == 'dasgoclient':
0091         return getFileNames_dasgoclient(event)
0092     # default action
0093     for path in os.getenv('PATH').split(':'):
0094         if  os.path.isfile(os.path.join(path, 'dasgoclient')):
0095             return getFileNames_dasgoclient(event)
0096     return getFileNames_das_client(event)
0097 
0098 def getFileNames_das_client(event):
0099     """Return files for given DAS query via das_client"""
0100     files = []
0101 
0102     query = "file dataset=%(dataset)s run=%(run)i lumi=%(lumi)i | grep file.name" % event
0103     jsondict = das_client.get_data(query)
0104     status = jsondict['status']
0105     if status != 'ok':
0106         print("DAS query status: %s"%(status))
0107         return files
0108 
0109     mongo_query = jsondict['mongo_query']
0110     filters = mongo_query['filters']
0111     data = jsondict['data']
0112 
0113     files = []
0114     for row in data:
0115         file = [r for r in das_client.get_value(row, filters['grep'])][0]
0116         if len(file) > 0 and not file in files:
0117             files.append(file)
0118 
0119     return files
0120 
0121 def getFileNames_dasgoclient(event):
0122     """Return files for given DAS query via dasgoclient"""
0123     query = "file dataset=%(dataset)s run=%(run)i lumi=%(lumi)i" % event
0124     cmd = ['dasgoclient', '-query', query, '-json']
0125     proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
0126     files = []
0127     err = proc.stderr.read()
0128     if  err:
0129         print("DAS error: %s" % err)
0130         print(proc.stdout.read())
0131         sys.exit(1)
0132     else:
0133         dasout = proc.stdout.read()
0134         try:
0135             for row in json.loads(dasout):
0136                 for rec in row.get('file', []):
0137                     fname = rec.get('name', '')
0138                     if fname:
0139                         files.append(fname)
0140         except:
0141             print(dasout)
0142             sys.exit(1)
0143     return files
0144 
0145 def fullCPMpath():
0146     base = os.environ.get ('CMSSW_BASE')
0147     if not base:
0148         raise RuntimeError("CMSSW Environment not set")
0149     retval = "%s/src/PhysicsTools/Utilities/configuration/copyPickMerge_cfg.py" \
0150              % base
0151     if os.path.exists (retval):
0152         return retval
0153     base = os.environ.get ('CMSSW_RELEASE_BASE')
0154     retval = "%s/src/PhysicsTools/Utilities/configuration/copyPickMerge_cfg.py" \
0155              % base
0156     if os.path.exists (retval):
0157         return retval
0158     raise RuntimeError("Could not find copyPickMerge_cfg.py")
0159 
0160 def guessEmail():
0161     return '%s@%s' % (subprocess.getoutput ('whoami'),
0162                       '.'.join(subprocess.getoutput('hostname').split('.')[-2:]))
0163 
0164 def setupCrabDict (options):
0165     date = datetime.now().strftime('%Y%m%d_%H%M%S')
0166     crab = {}
0167     base = options.base
0168     crab['runEvent']        = '%s_runEvents.txt' % base
0169     crab['copyPickMerge']   = fullCPMpath()
0170     crab['output']          = '%s.root' % base
0171     crab['crabcfg']         = '%s_crab.py' % base
0172     crab['json']            = '%s.json' % base
0173     crab['dataset']         = Event.dataset
0174     crab['email']           = options.email
0175     crab['WorkArea']        = date
0176     if options.crabCondor:
0177         crab['scheduler'] = 'condor'
0178 #        crab['useServer'] = ''
0179     else:
0180         crab['scheduler'] = 'remoteGlidein'
0181 #        crab['useServer'] = 'use_server              = 1'
0182     crab['useServer'] = ''
0183     return crab
0184 
0185 # crab template
0186 crabTemplate = '''
0187 ## Edited By Raman Khurana
0188 ##
0189 ## CRAB documentation : https://twiki.cern.ch/twiki/bin/view/CMSPublic/SWGuideCrab
0190 ##
0191 ## CRAB 3 parameters : https://twiki.cern.ch/twiki/bin/view/CMSPublic/CRAB3ConfigurationFile#CRAB_configuration_parameters
0192 ##
0193 ## Once you are happy with this file, please run
0194 ## crab submit
0195 
0196 ## In CRAB3 the configuration file is in Python language. It consists of creating a Configuration object imported from the WMCore library: 
0197 
0198 from WMCore.Configuration import Configuration
0199 config = Configuration()
0200 
0201 ##  Once the Configuration object is created, it is possible to add new sections into it with corresponding parameters
0202 config.section_("General")
0203 config.General.requestName = 'pickEvents'
0204 config.General.workArea = 'crab_pickevents_%(WorkArea)s'
0205 
0206 
0207 config.section_("JobType")
0208 config.JobType.pluginName = 'Analysis'
0209 config.JobType.psetName = '%(copyPickMerge)s'
0210 config.JobType.pyCfgParams = ['eventsToProcess_load=%(runEvent)s', 'outputFile=%(output)s']
0211 
0212 config.section_("Data")
0213 config.Data.inputDataset = '%(dataset)s'
0214 
0215 config.Data.inputDBS = 'global'
0216 config.Data.splitting = 'LumiBased'
0217 config.Data.unitsPerJob = 5
0218 config.Data.lumiMask = '%(json)s'
0219 #config.Data.publication = True
0220 #config.Data.publishDbsUrl = 'phys03'
0221 #config.Data.publishDataName = 'CRAB3_CSA_DYJets'
0222 #config.JobType.allowNonProductionCMSSW=True
0223 
0224 config.section_("Site")
0225 ## Change site name accordingly
0226 config.Site.storageSite = "T2_US_Wisconsin"
0227 
0228 '''
0229 
0230 ########################
0231 ## ################## ##
0232 ## ## Main Program ## ##
0233 ## ################## ##
0234 ########################
0235 
0236 if __name__ == "__main__":
0237     email = guessEmail()
0238     parser = optparse.OptionParser ("Usage: %prog [options] dataset events_or_events.txt", description='''This program
0239 facilitates picking specific events from a data set.  For full details, please visit
0240 https://twiki.cern.ch/twiki/bin/view/CMSPublic/WorkBookPickEvents ''')
0241     parser.add_option ('--output', dest='base', type='string',
0242                        default='pickevents',
0243                        help='Base name to use for output files (root, JSON, run and event list, etc.; default "%default")')
0244     parser.add_option ('--runInteractive', dest='runInteractive', action='store_true',
0245                        help = 'Call "cmsRun" command if possible.  Can take a long time.')
0246     parser.add_option ('--printInteractive', dest='printInteractive', action='store_true',
0247                        help = 'Print "cmsRun" command instead of running it.')
0248     parser.add_option ('--maxEventsInteractive', dest='maxEventsInteractive', type='int',
0249                        default=20,
0250                        help = 'Maximum number of events allowed to be processed interactively.')
0251     parser.add_option ('--crab', dest='crab', action='store_true',
0252                        help = 'Force CRAB setup instead of interactive mode')
0253     parser.add_option ('--crabCondor', dest='crabCondor', action='store_true',
0254                        help = 'Tell CRAB to use Condor scheduler (FNAL or OSG sites).')
0255     parser.add_option ('--email', dest='email', type='string',
0256                        default='',
0257                        help="Specify email for CRAB (default '%s')" % email )
0258     das_cli = ''
0259     parser.add_option ('--das-client', dest='das_cli', type='string',
0260                        default=das_cli,
0261                        help="Specify das client to use (default '%s')" % das_cli )
0262     (options, args) = parser.parse_args()
0263 
0264 
0265     if len(args) < 2:
0266         parser.print_help()
0267         sys.exit(0)
0268 
0269     if not options.email:
0270         options.email = email
0271 
0272     Event.dataset = args.pop(0)
0273     commentRE = re.compile (r'#.+$')
0274     colonRE   = re.compile (r':')
0275     eventList = []
0276     if len (args) > 1 or colonRE.search (args[0]):
0277         # events are coming in from the command line
0278         for piece in args:
0279             try:
0280                 event = Event (piece)
0281             except:
0282                 raise RuntimeError("'%s' is not a proper event" % piece)
0283             eventList.append (event)
0284     else:
0285         # read events from file
0286         source = open(args[0], 'r')
0287         for line in source:
0288             line = commentRE.sub ('', line)
0289             try:
0290                 event = Event (line)
0291             except:
0292                 print("Skipping '%s'." % line.strip())
0293                 continue
0294             eventList.append(event)
0295         source.close()
0296 
0297     if not eventList:
0298         print("No events defined.  Aborting.")
0299         sys.exit()
0300 
0301     if len (eventList) > options.maxEventsInteractive:
0302         options.crab = True
0303 
0304     if options.crab:
0305 
0306         ##########
0307         ## CRAB ##
0308         ##########
0309         if options.runInteractive:
0310             raise RuntimeError("This job cannot be run interactively, but rather by crab.  Please call without the '--runInteractive' flag or increase the '--maxEventsInteractive' value.")
0311         runsAndLumis = [ (event.run, event.lumi) for event in eventList]
0312         json = LumiList (lumis = runsAndLumis)
0313         eventsToProcess = '\n'.join(\
0314           sorted( [ "%d:%d" % (event.run, event.event) for event in eventList ] ) )
0315         crabDict = setupCrabDict (options)
0316         json.writeJSON (crabDict['json'])
0317         target = open (crabDict['runEvent'], 'w')
0318         target.write ("%s\n" % eventsToProcess)
0319         target.close()
0320         target = open (crabDict['crabcfg'], 'w')
0321         target.write (crabTemplate % crabDict)
0322         target.close
0323         print("Please visit CRAB twiki for instructions on how to setup environment for CRAB:\nhttps://twiki.cern.ch/twiki/bin/viewauth/CMS/SWGuideCrab\n")
0324         if options.crabCondor:
0325             print("You are running on condor.  Please make sure you have read instructions on\nhttps://twiki.cern.ch/twiki/bin/view/CMS/CRABonLPCCAF\n")
0326             if not os.path.exists ('%s/.profile' % os.environ.get('HOME')):
0327                 print("** WARNING: ** You are missing ~/.profile file.  Please see CRABonLPCCAF instructions above.\n")
0328         print("Setup your environment for CRAB and edit %(crabcfg)s to make any desired changed.  Then run:\n\ncrab submit -c %(crabcfg)s\n" % crabDict)
0329 
0330     else:
0331 
0332         #################
0333         ## Interactive ##
0334         #################
0335         files = []
0336         eventPurgeList = []
0337         for event in eventList:
0338             eventFiles = getFileNames(event, options.das_cli)
0339             if eventFiles == ['[]']: # event not contained in the input dataset
0340                 print("** WARNING: ** According to a DAS query, run = %i; lumi = %i; event = %i not contained in %s.  Skipping."%(event.run,event.lumi,event.event,event.dataset))
0341                 eventPurgeList.append( event )
0342             else:
0343                 files.extend( eventFiles )
0344         # Purge events
0345         for event in eventPurgeList:
0346             eventList.remove( event )
0347         # Purge duplicate files
0348         fileSet = set()
0349         uniqueFiles = []
0350         for filename in files:
0351             if filename in fileSet:
0352                 continue
0353             fileSet.add (filename)
0354             uniqueFiles.append (filename)
0355         source = ','.join (uniqueFiles) + '\n'
0356         eventsToProcess = ','.join(\
0357           sorted( [ "%d:%d" % (event.run, event.event) for event in eventList ] ) )
0358         command = 'edmCopyPickMerge outputFile=%s.root \\\n  eventsToProcess=%s \\\n  inputFiles=%s' \
0359                   % (options.base, eventsToProcess, source)
0360         print("\n%s" % command)
0361         if options.runInteractive and not options.printInteractive:
0362             os.system (command)
0363