Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:24:22

0001 #!/usr/bin/env python3
0002 
0003 # Anzar Afaq         June 17, 2008
0004 # Oleksiy Atramentov June 21, 2008
0005 # Charles Plager     Sept  7, 2010
0006 # Volker Adler       Apr  16, 2014
0007 # Raman Khurana      June 18, 2015
0008 # Dinko Ferencek     June 27, 2015
0009 import os
0010 import sys
0011 from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
0012 import re
0013 
0014 from FWCore.PythonUtilities.LumiList import LumiList
0015 import json
0016 from pprint import pprint
0017 from datetime import datetime
0018 import subprocess
0019 import Utilities.General.cmssw_das_client as das_client
0020 help = """
0021 How to use:
0022 
0023 edmPickEvent.py dataset run1:lumi1:event1 run2:lumi2:event2
0024 
0025 - or -
0026 
0027 edmPickEvent.py dataset listOfEvents.txt
0028 
0029 listOfEvents is a text file:
0030 # this line is ignored as a comment
0031 # since '#' is a valid comment character
0032 run1 lumi_section1 event1
0033 run2 lumi_section2 event2
0034 
0035 For example:
0036 # run lum   event
0037 46968   2      4
0038 47011 105     23
0039 47011 140  12312
0040 
0041 run, lumi_section, and event are integers that you can get from
0042 edm::Event(Auxiliary)
0043 
0044 dataset: it just a name of the physics dataset, if you don't know exact name
0045     you can provide a mask, e.g.: *QCD*RAW
0046 
0047 For updated information see Wiki:
0048 https://twiki.cern.ch/twiki/bin/view/CMSPublic/WorkBookPickEvents
0049 """
0050 
0051 
0052 ########################
0053 ## Event helper class ##
0054 ########################
0055 
0056 class Event (dict):
0057 
0058     dataset = None
0059     splitRE = re.compile (r'[\s:,]+')
0060     def __init__ (self, line, **kwargs):
0061         pieces = Event.splitRE.split (line.strip())
0062         try:
0063             self['run']     = int( pieces[0] )
0064             self['lumi']    = int( pieces[1] )
0065             self['event']   = int( pieces[2] )
0066             self['dataset'] =  Event.dataset
0067         except:
0068             raise RuntimeError("Can not parse '%s' as Event object" \
0069                   % line.strip())
0070         if not self['dataset']:
0071             print("No dataset is defined for '%s'.  Aborting." % line.strip())
0072             raise RuntimeError('Missing dataset')
0073 
0074     def __getattr__ (self, key):
0075         return self[key]
0076 
0077     def __str__ (self):
0078         return "run = %(run)i, lumi = %(lumi)i, event = %(event)i, dataset = %(dataset)s"  % self
0079 
0080 
0081 #################
0082 ## Subroutines ##
0083 #################
0084 
0085 def getFileNames(event, client=None):
0086     """Return files for given DAS query"""
0087     if  client == 'das_client':
0088         return getFileNames_das_client(event)
0089     elif client == 'dasgoclient':
0090         return getFileNames_dasgoclient(event)
0091     # default action
0092     for path in os.getenv('PATH').split(':'):
0093         if  os.path.isfile(os.path.join(path, 'dasgoclient')):
0094             return getFileNames_dasgoclient(event)
0095     return getFileNames_das_client(event)
0096 
0097 def getFileNames_das_client(event):
0098     """Return files for given DAS query via das_client"""
0099     files = []
0100 
0101     query = "file dataset=%(dataset)s run=%(run)i lumi=%(lumi)i | grep file.name" % event
0102     jsondict = das_client.get_data(query)
0103     status = jsondict['status']
0104     if status != 'ok':
0105         print("DAS query status: %s"%(status))
0106         return files
0107 
0108     mongo_query = jsondict['mongo_query']
0109     filters = mongo_query['filters']
0110     data = jsondict['data']
0111 
0112     files = []
0113     for row in data:
0114         file = [r for r in das_client.get_value(row, filters['grep'])][0]
0115         if len(file) > 0 and not file in files:
0116             files.append(file)
0117 
0118     return files
0119 
0120 def getFileNames_dasgoclient(event):
0121     """Return files for given DAS query via dasgoclient"""
0122     query = "file dataset=%(dataset)s run=%(run)i lumi=%(lumi)i" % event
0123     cmd = ['dasgoclient', '-query', query, '-json']
0124     proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
0125     files = []
0126     err = proc.stderr.read()
0127     if  err:
0128         print("DAS error: %s" % err)
0129         print(proc.stdout.read())
0130         sys.exit(1)
0131     else:
0132         dasout = proc.stdout.read()
0133         try:
0134             for row in json.loads(dasout):
0135                 for rec in row.get('file', []):
0136                     fname = rec.get('name', '')
0137                     if fname:
0138                         files.append(fname)
0139         except:
0140             print(dasout)
0141             sys.exit(1)
0142     return files
0143 
0144 def fullCPMpath():
0145     base = os.environ.get ('CMSSW_BASE')
0146     if not base:
0147         raise RuntimeError("CMSSW Environment not set")
0148     retval = "%s/src/PhysicsTools/Utilities/configuration/copyPickMerge_cfg.py" \
0149              % base
0150     if os.path.exists (retval):
0151         return retval
0152     base = os.environ.get ('CMSSW_RELEASE_BASE')
0153     retval = "%s/src/PhysicsTools/Utilities/configuration/copyPickMerge_cfg.py" \
0154              % base
0155     if os.path.exists (retval):
0156         return retval
0157     raise RuntimeError("Could not find copyPickMerge_cfg.py")
0158 
0159 def guessEmail():
0160     return '%s@%s' % (subprocess.getoutput ('whoami'),
0161                       '.'.join(subprocess.getoutput('hostname').split('.')[-2:]))
0162 
0163 def setupCrabDict (options):
0164     date = datetime.now().strftime('%Y%m%d_%H%M%S')
0165     crab = {}
0166     base = options.base
0167     crab['runEvent']        = '%s_runEvents.txt' % base
0168     crab['copyPickMerge']   = fullCPMpath()
0169     crab['output']          = '%s.root' % base
0170     crab['crabcfg']         = '%s_crab.py' % base
0171     crab['json']            = '%s.json' % base
0172     crab['dataset']         = Event.dataset
0173     crab['email']           = options.email
0174     crab['WorkArea']        = date
0175     if options.crabCondor:
0176         crab['scheduler'] = 'condor'
0177 #        crab['useServer'] = ''
0178     else:
0179         crab['scheduler'] = 'remoteGlidein'
0180 #        crab['useServer'] = 'use_server              = 1'
0181     crab['useServer'] = ''
0182     return crab
0183 
0184 # crab template
0185 crabTemplate = '''
0186 ## Edited By Raman Khurana
0187 ##
0188 ## CRAB documentation : https://twiki.cern.ch/twiki/bin/view/CMSPublic/SWGuideCrab
0189 ##
0190 ## CRAB 3 parameters : https://twiki.cern.ch/twiki/bin/view/CMSPublic/CRAB3ConfigurationFile#CRAB_configuration_parameters
0191 ##
0192 ## Once you are happy with this file, please run
0193 ## crab submit
0194 
0195 ## In CRAB3 the configuration file is in Python language. It consists of creating a Configuration object imported from the WMCore library: 
0196 
0197 from WMCore.Configuration import Configuration
0198 config = Configuration()
0199 
0200 ##  Once the Configuration object is created, it is possible to add new sections into it with corresponding parameters
0201 config.section_("General")
0202 config.General.requestName = 'pickEvents'
0203 config.General.workArea = 'crab_pickevents_%(WorkArea)s'
0204 
0205 
0206 config.section_("JobType")
0207 config.JobType.pluginName = 'Analysis'
0208 config.JobType.psetName = '%(copyPickMerge)s'
0209 config.JobType.pyCfgParams = ['eventsToProcess_load=%(runEvent)s', 'outputFile=%(output)s']
0210 
0211 config.section_("Data")
0212 config.Data.inputDataset = '%(dataset)s'
0213 
0214 config.Data.inputDBS = 'global'
0215 config.Data.splitting = 'LumiBased'
0216 config.Data.unitsPerJob = 5
0217 config.Data.lumiMask = '%(json)s'
0218 #config.Data.publication = True
0219 #config.Data.publishDbsUrl = 'phys03'
0220 #config.Data.publishDataName = 'CRAB3_CSA_DYJets'
0221 #config.JobType.allowNonProductionCMSSW=True
0222 
0223 config.section_("Site")
0224 ## Change site name accordingly
0225 config.Site.storageSite = "T2_US_Wisconsin"
0226 
0227 '''
0228 
0229 ########################
0230 ## ################## ##
0231 ## ## Main Program ## ##
0232 ## ################## ##
0233 ########################
0234 
0235 if __name__ == "__main__":
0236     email = guessEmail()
0237     parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter, description='''This program
0238 facilitates picking specific events from a data set.  For full details, please visit
0239 https://twiki.cern.ch/twiki/bin/view/CMSPublic/WorkBookPickEvents''')
0240     parser.add_argument('--output', dest='base', type=str,
0241                         default='pickevents',
0242                         help='Base name to use for output files (root, JSON, run and event list, etc.)")')
0243     parser.add_argument('--runInteractive', dest='runInteractive', action='store_true',
0244                         help = 'Call "cmsRun" command if possible.  Can take a long time.')
0245     parser.add_argument('--printInteractive', dest='printInteractive', action='store_true',
0246                         help = 'Print "cmsRun" command instead of running it.')
0247     parser.add_argument('--maxEventsInteractive', dest='maxEventsInteractive', type=int,
0248                         default=20,
0249                         help = 'Maximum number of events allowed to be processed interactively.')
0250     parser.add_argument('--crab', dest='crab', action='store_true',
0251                         help = 'Force CRAB setup instead of interactive mode')
0252     parser.add_argument('--crabCondor', dest='crabCondor', action='store_true',
0253                         help = 'Tell CRAB to use Condor scheduler (FNAL or OSG sites).')
0254     parser.add_argument('--email', dest='email', type=str,
0255                         default=email,
0256                         help="Specify email for CRAB")
0257     das_cli = ''
0258     parser.add_argument('--das-client', dest='das_cli', type=str,
0259                         default=das_cli,
0260                         help="Specify das client to use")
0261     parser.add_argument("dataset", type=str)
0262     parser.add_argument("events", metavar="events_or_events.txt", type=str, nargs='+')
0263     options = parser.parse_args()
0264 
0265     Event.dataset = options.dataset
0266     commentRE = re.compile (r'#.+$')
0267     colonRE   = re.compile (r':')
0268     eventList = []
0269     if len (options.events) > 1 or colonRE.search (options.events[0]):
0270         # events are coming in from the command line
0271         for piece in options.events:
0272             try:
0273                 event = Event (piece)
0274             except:
0275                 raise RuntimeError("'%s' is not a proper event" % piece)
0276             eventList.append (event)
0277     else:
0278         # read events from file
0279         source = open(options.events[0], 'r')
0280         for line in source:
0281             line = commentRE.sub ('', line)
0282             try:
0283                 event = Event (line)
0284             except:
0285                 print("Skipping '%s'." % line.strip())
0286                 continue
0287             eventList.append(event)
0288         source.close()
0289 
0290     if not eventList:
0291         print("No events defined.  Aborting.")
0292         sys.exit()
0293 
0294     if len (eventList) > options.maxEventsInteractive:
0295         options.crab = True
0296 
0297     if options.crab:
0298 
0299         ##########
0300         ## CRAB ##
0301         ##########
0302         if options.runInteractive:
0303             raise RuntimeError("This job cannot be run interactively, but rather by crab.  Please call without the '--runInteractive' flag or increase the '--maxEventsInteractive' value.")
0304         runsAndLumis = [ (event.run, event.lumi) for event in eventList]
0305         json = LumiList (lumis = runsAndLumis)
0306         eventsToProcess = '\n'.join(\
0307           sorted( [ "%d:%d" % (event.run, event.event) for event in eventList ] ) )
0308         crabDict = setupCrabDict (options)
0309         json.writeJSON (crabDict['json'])
0310         target = open (crabDict['runEvent'], 'w')
0311         target.write ("%s\n" % eventsToProcess)
0312         target.close()
0313         target = open (crabDict['crabcfg'], 'w')
0314         target.write (crabTemplate % crabDict)
0315         target.close
0316         print("Please visit CRAB twiki for instructions on how to setup environment for CRAB:\nhttps://twiki.cern.ch/twiki/bin/viewauth/CMS/SWGuideCrab\n")
0317         if options.crabCondor:
0318             print("You are running on condor.  Please make sure you have read instructions on\nhttps://twiki.cern.ch/twiki/bin/view/CMS/CRABonLPCCAF\n")
0319             if not os.path.exists ('%s/.profile' % os.environ.get('HOME')):
0320                 print("** WARNING: ** You are missing ~/.profile file.  Please see CRABonLPCCAF instructions above.\n")
0321         print("Setup your environment for CRAB and edit %(crabcfg)s to make any desired changed.  Then run:\n\ncrab submit -c %(crabcfg)s\n" % crabDict)
0322 
0323     else:
0324 
0325         #################
0326         ## Interactive ##
0327         #################
0328         files = []
0329         eventPurgeList = []
0330         for event in eventList:
0331             eventFiles = getFileNames(event, options.das_cli)
0332             if eventFiles == ['[]']: # event not contained in the input dataset
0333                 print("** WARNING: ** According to a DAS query, run = %i; lumi = %i; event = %i not contained in %s.  Skipping."%(event.run,event.lumi,event.event,event.dataset))
0334                 eventPurgeList.append( event )
0335             else:
0336                 files.extend( eventFiles )
0337         # Purge events
0338         for event in eventPurgeList:
0339             eventList.remove( event )
0340         # Purge duplicate files
0341         fileSet = set()
0342         uniqueFiles = []
0343         for filename in files:
0344             if filename in fileSet:
0345                 continue
0346             fileSet.add (filename)
0347             uniqueFiles.append (filename)
0348         source = ','.join (uniqueFiles) + '\n'
0349         eventsToProcess = ','.join(\
0350           sorted( [ "%d:%d" % (event.run, event.event) for event in eventList ] ) )
0351         command = 'edmCopyPickMerge outputFile=%s.root \\\n  eventsToProcess=%s \\\n  inputFiles=%s' \
0352                   % (options.base, eventsToProcess, source)
0353         print("\n%s" % command)
0354         if options.runInteractive and not options.printInteractive:
0355             os.system (command)
0356