Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2023-03-17 11:10:00

0001 #!/usr/bin/env python
0002 # adapted to python and extended (by Andre Holzner) based on a
0003 # shell script by Michael Anderson
0004 #
0005 # To use, type for example:
0006 #  hltValidate 3_5_5
0007 #
0008 # This script runs validation on new relval samples.
0009 # Requires version of CMSSW given.
0010 #
0011 # Michael Anderson
0012 # Sept 15, 2009
0013 
0014 from __future__ import print_function
0015 import sys, os, shutil
0016 #----------------------------------------------------------------------
0017 # parameters
0018 #----------------------------------------------------------------------
0019 # CMSSW Module to check out & compile
0020 module="HLTriggerOffline/Egamma"
0021 
0022 # Root file name outputted by running module
0023 outputRootFile="DQM_V0001_HLT_R000000001.root"
0024 
0025 
0026 # Datasets to run on
0027 # could actually get rid of the version string in the datasets
0028 # as we explicitly require the release in the DBS query ?
0029 knownDatasets = {
0030     "diGamma" : {
0031         "dataset": "/RelValH130GGgluonfusion*/CMSSW_%(version)s*/GEN-SIM-DIGI-RAW-HLTDEBUG",
0032         "output":  "DiGamma_%(version)s.root", 
0033         },
0034 
0035     "photonJet" : {
0036         "dataset": "/RelValPhotonJets_Pt_10*/CMSSW_%(version)s*/GEN-SIM-DIGI-RAW-HLTDEBUG",
0037         "output":  "GammaJet_%(version)s.root", 
0038         },
0039     
0040     "zee" : {
0041         "dataset": "/RelValZEE*/CMSSW_%(version)s*/GEN-SIM-DIGI-RAW-HLTDEBUG",
0042         "output":  "ZEE_%(version)s.root",
0043         },
0044 
0045     "wen" : {
0046         "dataset": "/RelValWE*/CMSSW_%(version)s*/GEN-SIM-DIGI-RAW-HLTDEBUG",
0047         "output":  "WEN_%(version)s.root",
0048         },
0049 } 
0050 
0051 #----------------------------------------------------------------------
0052 
0053 def execCmd(cmd):
0054     retval = os.system(cmd)
0055     if retval != 0:
0056         raise Exception("failed to execute command '" + cmd + "', exit status = " + str(retval))
0057 
0058 #----------------------------------------------------------------------
0059 
0060 # code based on PhysicsTools.PatAlgos.tools.helpers.MassSearchReplaceAnyInputTagVisitor
0061 # to replace the process names of all input tags found in sequences.
0062 #
0063 # necessary e.g. when one has to run the HLT and thus must later on
0064 # use a different process name
0065 
0066 class ReplaceProcessNameOfInputTags(object):
0067     """Visitor that travels within a cms.Sequence and replaces 
0068        It will climb down within PSets, VPSets and VInputTags to find its target.
0069 
0070        Useful e.g. for replacing the process names of all input tags where the 
0071        process name was specified explicitly.
0072     """
0073 
0074     #----------------------------------------
0075     def __init__(self,origProcessName,newProcessName,verbose=False):
0076         self._origProcessName = origProcessName
0077         self._newProcessName  = newProcessName
0078         # self._moduleName   = ''
0079         self._verbose=verbose
0080 
0081     #----------------------------------------
0082 
0083     def doIt(self,pset,base):
0084         if isinstance(pset, cms._Parameterizable):
0085             for name in pset.parameters_().keys():
0086                 # if I use pset.parameters_().items() I get copies of the parameter values
0087                 # so I can't modify the nested pset
0088                 value = getattr(pset,name) 
0089                 type = value.pythonTypeName()
0090                 if type == 'cms.PSet':  
0091                     self.doIt(value,base+"."+name)
0092                 elif type == 'cms.VPSet':
0093                     for (i,ps) in enumerate(value): self.doIt(ps, "%s.%s[%d]"%(base,name,i) )
0094                 elif type == 'cms.VInputTag':
0095                     for (i,n) in enumerate(value): 
0096                          # VInputTag can be declared as a list of strings, so ensure that n is formatted correctly
0097                          n = self.standardizeInputTagFmt(n)
0098                          if self._verbose:print("FOUND TAG:",value[i])
0099 
0100                          if value[i].processName == self._origProcessName:
0101                              if self._verbose: print("REPLACING")
0102                              value[i].processName = self._newProcessName
0103                          else:
0104                              if self._verbose: print("NOT REPLACING")
0105 
0106                 elif type == 'cms.InputTag':
0107                     if self._verbose:print("FOUND TAG:",value)                        
0108 
0109                     if value.processName == self._origProcessName:
0110                         if self._verbose:print("REPLACING")
0111                         value.processName = self._newProcessName
0112                     else:
0113                         if self._verbose:print("NOT REPLACING")
0114 
0115     #----------------------------------------
0116     @staticmethod 
0117     def standardizeInputTagFmt(inputTag):
0118        ''' helper function to ensure that the InputTag is defined as cms.InputTag(str) and not as a plain str '''
0119        if not isinstance(inputTag, cms.InputTag):
0120           return cms.InputTag(inputTag)
0121        return inputTag
0122 
0123     #----------------------------------------
0124     def enter(self,visitee):
0125         label = ''
0126         try:    label = visitee.label()
0127         except AttributeError: label = '<Module not in a Process>'
0128         self.doIt(visitee, label)
0129 
0130     #----------------------------------------
0131     def leave(self,visitee):
0132         pass
0133 
0134     #----------------------------------------
0135 
0136 #----------------------------------------------------------------------
0137 
0138 def findCMSSWreleaseDir(version):
0139     """ runs scramv1 list to find the directory of the given CMSSW release.
0140 
0141     Sometimes it happens that there is more than one line in the scram output
0142     for the same release (and even the same directory). In general, just
0143     the first matching line is returned.
0144     
0145     """
0146 
0147     import re
0148 
0149     if not version.startswith("CMSSW_"):
0150         version = "CMSSW_" + version
0151 
0152     for line in os.popen('scramv1 list -c CMSSW').readlines():
0153 
0154         line = line.split('\n')[0].strip()
0155 
0156         project, release, directory = re.split('\s+',line)
0157 
0158         if release == version:
0159             return directory
0160 
0161 #----------------------------------------------------------------------
0162 def findDataSetFromSampleName(sampleSpec, version, cdToReleaseDir):
0163     """ from the given sample specification (e.g. photonJet), tries to get
0164     the relval dataset from DBS for the given CMSSW version.
0165 
0166     If more than one sample is found, the user is prompted
0167     to select one.
0168     """
0169 
0170     # Find the dataset in DBS using command. This actually
0171     # could find more than one dataset.
0172 
0173     datasetToSearchFor= knownDatasets[sampleSpec]['dataset'] % { "version": version }
0174 
0175     dbs_cmd = 'das_client.py --query=dataset=' + datasetToSearchFor + ' | grep "HLTDEBUG"'
0176 
0177     cmssw_release_dir = findCMSSWreleaseDir(version)
0178 
0179     cmd_parts = []
0180 
0181     if cdToReleaseDir:
0182         cmd_parts.extend([
0183             'cd ' + cmssw_release_dir,
0184             "eval `scramv1 runtime -sh`",
0185             "cd - > /dev/null",   # this seems to print a line in some cases
0186             ])
0187 
0188     cmd_parts.append(dbs_cmd)
0189 
0190     allDatasetsToCheck=os.popen("  && ".join(cmd_parts)).readlines()
0191     allDatasetsToCheck = [ x.strip() for x in allDatasetsToCheck ]
0192 
0193     if len(allDatasetsToCheck) == 1:
0194         datasetToCheck = allDatasetsToCheck[0]
0195     elif len(allDatasetsToCheck) == 0:
0196         print("failed to find dataset in dbs")
0197         print()
0198         print("dbs command was:")
0199         print(dbs_cmd)
0200         sys.exit(1)
0201     else:
0202         # more than one dataset found
0203         print("found the following matching datasets, please select one:")
0204 
0205         for i in range(len(allDatasetsToCheck)):
0206             print("  %2d: %s" % (i, allDatasetsToCheck[i]))
0207 
0208         print("your choice:", end=' ')
0209         choice = sys.stdin.readline()
0210         choice = int(choice)
0211 
0212         datasetToCheck = allDatasetsToCheck[choice]
0213 
0214         print("selected",datasetToCheck)
0215 
0216     ###################################
0217 
0218 
0219     ###################################
0220     # Make sure dataset was found
0221     print("Looked for dataset matching " + datasetToSearchFor)
0222 
0223     print("found")
0224     print("  ",datasetToCheck)
0225     print()
0226 
0227     return datasetToCheck
0228 
0229 #----------------------------------------------------------------------
0230 def createProjectArea(version):
0231     """creates a new scram project area for the given release
0232     and chdirs to it """
0233 
0234     print("Setting up CMSSW_" + version + " environment")
0235     execCmd("scramv1 project CMSSW CMSSW_" + version)
0236     os.chdir("CMSSW_" + version + "/src")
0237 
0238 
0239 #----------------------------------------------------------------------
0240 
0241 def ensureProjectAreaNotExisting(version):
0242     # refuse to run if the release area exists already
0243     # (can mix tags and samples etc.)
0244 
0245     project_dir = "CMSSW_" + version
0246 
0247     if os.path.exists(project_dir):
0248         print("the project directory " + project_dir + " already exists.", file=sys.stderr)
0249         print("Refusing to continue as this might cause unexpected results.", file=sys.stderr)
0250         sys.exit(1)
0251 
0252 #----------------------------------------------------------------------
0253 
0254 def cleanVersion(version):
0255     """ removes CMSSW_ from the version string if it starts with it """
0256 
0257     prefix = "CMSSW_"
0258 
0259     if version.startswith(prefix):
0260         return version[len(prefix):]
0261     else:
0262         return version
0263 
0264 #----------------------------------------------------------------------
0265 
0266 def getCMSSWVersionFromEnvironment():
0267     """ determines the CMSSW version from environment variables """
0268 
0269     varname = "CMSSW_VERSION"
0270 
0271     if varname not in os.environ:
0272         print("The environment variable " + varname + " is not set.", file=sys.stderr)
0273         print("It looks like you have not initialized a runtime", file=sys.stderr)
0274         print("environment for CMSSW but want to use the 'current one'.", file=sys.stderr)
0275         print(file=sys.stderr)
0276         print("Try running cmsenv and then run this script again.", file=sys.stderr)
0277         sys.exit(1)
0278 
0279     return cleanVersion(os.environ[varname])
0280 
0281 #----------------------------------------------------------------------
0282 # main
0283 #----------------------------------------------------------------------
0284 from optparse import OptionParser
0285 
0286 parser = OptionParser("""
0287 
0288   usage: %prog [options] [sample] [cmssw-version]
0289 
0290     e.g. %prog photonJet 3_5_6
0291          %prog --this-project-area photonJet
0292          %prog --file=rfio://castor/cern.ch/cms/store/... 3_5_6
0293          %prog --file=rfio://castor/cern.ch/cms/store/... --this-project-area
0294          
0295   Produces the histogram files for E/gamma path validation.
0296 
0297   sample is required unless input files are specified directly using the --file=... option.
0298 
0299   cmssw-version is required unless the option --this-project-area is given
0300 
0301 """
0302 )
0303 
0304 parser.add_option("--file",
0305                   dest="direct_input_files",
0306                   default = [],
0307                   type="str",
0308                   action="append", # append to list
0309                   help="run directly from the ROOT file given. Option can be specified multiple times.",
0310                   metavar="FILE")
0311 
0312 
0313 parser.add_option("--hlt-process",
0314                   dest="hlt_process_name",
0315                   default = None,
0316                   type="str",
0317                   help="Specify the name of the HLT process. Useful e.g. when running on a file produced by yourself with a different process name.",
0318                   metavar="PROC")
0319 
0320 parser.add_option("--cvstag",
0321                   dest="cvstag",
0322                   default = "HEAD",
0323                   type="str",
0324                   help="CVS tag to be used for module " + module + ". Default is to use the HEAD revision.",
0325                   metavar="TAG")
0326 
0327 
0328 parser.add_option("--cfg",
0329                   dest="configFile",
0330                   default = None,
0331                   type="str",
0332                   help="Base config file (relative to HLTriggerOffline/Egamma if using files from CVS or relative to the current path if the option --this-project-area is given) to run with cmsRun. Change this e.g. when you want to run on data instead of MC.",
0333                   metavar="CFG_FILE.py")
0334 
0335 parser.add_option("--cfg-add",
0336                   dest="cfg_add",
0337                   default = [],
0338                   type="str",
0339                   action="append", # append to list
0340                   help="line to add to the generated cmsRun configuration file. Can be specified several times",
0341                   metavar="CFG_LINE")
0342 
0343 parser.add_option("--num-events",
0344                   dest="num_events",
0345                   default = None,
0346                   type="int",
0347                   help="set maxEvents to run over a limited number of events",
0348                   metavar="NUM")
0349 
0350 
0351 parser.add_option("--this-project-area",
0352                   dest="useThisProjectArea",
0353                   default = False,
0354                   action = "store_true",
0355                   help="instead of creating a new project area and checking out files from CVS, use the current CMSSW project area in use",
0356                   )
0357 
0358 parser.add_option("--follow",
0359                   dest="followCmsRunoutput",
0360                   default = False,
0361                   action = "store_true",
0362                   help="show output of cmsRun task (in addition to writing it to a log file)",
0363                   )
0364 
0365 parser.add_option("--data",
0366                   dest="isData",
0367                   default = False,
0368                   action = "store_true",
0369                   help="run on real data file",
0370                   )
0371 
0372 (options, ARGV) = parser.parse_args()
0373 
0374 sampleSpec = None
0375 
0376 #----------------------------------------
0377 # sanity checks
0378 #----------------------------------------
0379 
0380 
0381 if options.useThisProjectArea:
0382     version = getCMSSWVersionFromEnvironment()
0383 
0384 # default (input) config file
0385 
0386 if options.configFile == None:
0387 
0388     if options.useThisProjectArea:
0389         options.configFile = os.path.join(os.environ['CMSSW_BASE'],"src/HLTriggerOffline/Egamma/test/test_cfg.py")
0390     else:
0391         options.configFile = "test/test_cfg.py"
0392 
0393 #----------------------------------------
0394 # TODO: we should do things which take more than
0395 #       a second only AFTER checking the consistency
0396 #       of the command line arguments...
0397 
0398 if len(options.direct_input_files) == 0:
0399     if len(ARGV) < 1:
0400         print("No data sample specified. Try the -h option to get more detailed usage help.", file=sys.stderr)
0401         print(file=sys.stderr)
0402         print("known samples are: " + " ".join(knownDatasets.keys()), file=sys.stderr)
0403         print(file=sys.stderr)
0404         sys.exit(1)
0405 
0406     sampleSpec = ARGV.pop(0)
0407 
0408     # check whether we know the specified sample
0409     if sampleSpec not in knownDatasets:
0410         print("unknown sample " + sampleSpec + ", known samples are: " + " ".join(knownDatasets.keys()), file=sys.stderr)
0411         sys.exit(1)
0412 
0413     if not options.useThisProjectArea:
0414 
0415         if len(ARGV) < 1:
0416             print("No CMSSW version specified. Try the -h option to get more detailed usage help.", file=sys.stderr)
0417             print(file=sys.stderr)
0418             sys.exit(1)
0419 
0420         version= cleanVersion(ARGV.pop())
0421 
0422         ensureProjectAreaNotExisting(version)
0423         createProjectArea(version)
0424     else:
0425         # get cmssw version from the environment
0426         # this was actually done already before
0427         pass
0428     
0429 
0430     datasetToCheck = findDataSetFromSampleName(sampleSpec, version, not options.useThisProjectArea)
0431 
0432     # Get the file names in the dataset path, and format it for python files
0433     print("\n\nGetting file names for")
0434     print("  ",datasetToCheck)
0435 
0436     cmssw_release_dir = findCMSSWreleaseDir(version)
0437     cmd_parts = []
0438 
0439     if not options.useThisProjectArea:
0440         cmd_parts.extend([
0441         'cd ' + cmssw_release_dir,
0442         "eval `scramv1 runtime -sh`",
0443         "cd -",
0444             ])
0445 
0446     cmd_parts.append("das_client.py --query='file dataset=" + datasetToCheck + "'")
0447 
0448 
0449     FILES=os.popen(" && ".join(cmd_parts)).readlines()
0450     FILES=[ x.strip() for x in FILES ]
0451     FILES=[ x for x in FILES if x.endswith('.root') ]
0452 
0453 else:
0454     # input files were specified explicitly (instead of a dataset)
0455     FILES = options.direct_input_files[:]
0456 
0457     datasetToCheck = "(undefined dataset)"
0458 
0459     if not options.useThisProjectArea:
0460         if len(ARGV) < 1:
0461             print("No CMSSW version specified. Try the -h option to get more detailed usage help.", file=sys.stderr)
0462             print(file=sys.stderr)
0463             sys.exit(1)
0464 
0465         version=cleanVersion(ARGV.pop(0))
0466         ensureProjectAreaNotExisting(version)
0467         createProjectArea(version)
0468     else:
0469         # get cmssw version from the environment
0470         # this was actually done already before
0471         pass
0472 
0473 #----------------------------------------
0474 
0475 if len(ARGV) != 0:
0476     print("too many positional (non-option) arguments specified. Try the -h option to get more detailed usage help.", file=sys.stderr)
0477     print(file=sys.stderr)
0478     sys.exit(1)
0479 
0480 #----------------------------------------
0481 # determine the absolute path of the input configuration
0482 # file 
0483 #----------------------------------------
0484 
0485 if options.useThisProjectArea:
0486     absoluteInputConfigFile = options.configFile
0487 
0488     import tempfile
0489     absoluteOutputConfigFile = tempfile.NamedTemporaryFile(suffix = ".py").name
0490 
0491 else:
0492     # we have already chdird into the project area and into src/
0493     
0494     absoluteInputConfigFile = os.path.join(
0495         os.path.join(os.getcwd(),module),
0496         options.configFile)
0497 
0498 
0499     absoluteOutputConfigFile = os.path.join(
0500         os.path.join(os.getcwd(),module),
0501         "test_cfg_new.py")
0502 
0503 #----------------------------------------
0504 
0505 
0506 ###################################
0507 # Check out module and build it
0508 
0509 if not options.useThisProjectArea:
0510     print("Checking out tag '" + options.cvstag + "' of " + module)
0511     execCmd(" cvs -Q co -r " + options.cvstag + " " + module)
0512 
0513     execCmd("scramv1 b")
0514     os.chdir(module)
0515 
0516 #--------------------
0517 # check if the (possibly user specified) config file does exist
0518 # or not. Note that we can do this only AFTER the CVS checkout
0519 if not os.path.exists(absoluteInputConfigFile):
0520     print("config file " + absoluteInputConfigFile + " does not exist", file=sys.stderr)
0521     print(os.getcwd())
0522     sys.exit(1)
0523 #--------------------
0524 
0525 # Place file names in python config file
0526 print("taking config file " + absoluteInputConfigFile + " and copying to " + absoluteOutputConfigFile) 
0527 
0528 #----------------------------------------
0529 # append things to the config file
0530 #----------------------------------------
0531 fout = open(absoluteOutputConfigFile,"w")
0532 
0533 # first copy all the lines of the original config file
0534 fout.write(open(absoluteInputConfigFile).read())
0535 
0536 print("process.source.fileNames = " + str(FILES), file=fout)
0537 print("process.post.dataSet = cms.untracked.string('" + datasetToCheck +"')", file=fout)
0538 
0539 # replace all HLT process names by something
0540 # else if specified by the user
0541 
0542 if options.hlt_process_name != None:
0543     # ugly code ahead, may disturb some viewers...
0544     #
0545     # dump the source code of the replacing code into
0546     # the CMSSW python configuration file
0547     import inspect
0548 
0549     print("#----------------------------------------", file=fout)
0550     print("# replace explicit specifications of HLT process name by " + options.hlt_process_name, file=fout)
0551     print("#----------------------------------------", file=fout)
0552     print(inspect.getsource(ReplaceProcessNameOfInputTags), file=fout)
0553     print(file=fout)
0554     print("for seq in process.sequences.values():", file=fout)
0555     print("""    seq.visit(ReplaceProcessNameOfInputTags("HLT","%s"))""" % options.hlt_process_name, file=fout)
0556 
0557 # check for additional configuration text specified
0558 
0559 if len(options.cfg_add) > 0:
0560     print(file=fout)
0561     print("#----------------------------------------", file=fout)
0562 
0563     for line in options.cfg_add:
0564         print("# additional string specified on the command line", file=fout)
0565         print(line, file=fout)
0566         
0567     print(file=fout)
0568     print("#----------------------------------------", file=fout)
0569 
0570 #----------------------------------------
0571 # max. events to run on
0572 if options.num_events != None:
0573     print(file=fout)
0574     print("#----------------------------------------", file=fout)
0575     print("# maximum number of events specified", file=fout)
0576     print("#----------------------------------------", file=fout)
0577     print("process.maxEvents = cms.untracked.PSet( input = cms.untracked.int32(%d) )" % options.num_events, file=fout)
0578     print("#----------------------------------------", file=fout)
0579 
0580 #----------------------------------------
0581 # run on real data
0582 if options.isData and absoluteInputConfigFile.find("testEmDQM_cfg.py") != -1:
0583     print(file=fout)
0584     print("#----------------------------------------", file=fout)
0585     print("# Running on real data sample", file=fout)
0586     print("#----------------------------------------", file=fout)
0587     print("process.emdqm.isData = cms.bool(True)", file=fout) 
0588     print("#----------------------------------------", file=fout)
0589 
0590 #----------------------------------------
0591 # close config file
0592 fout.close()
0593 
0594 #----------------------------------------
0595 logfile = os.path.join(os.getcwd(),"log")
0596 
0597 if os.path.exists(logfile):
0598     print("the log file (" + logfile + ") exists already, this might causing problems", file=sys.stderr)
0599     print("with your shell. Stopping here.", file=sys.stderr)
0600     sys.exit(1)
0601 
0602 
0603 print("Starting cmsRun " + absoluteOutputConfigFile + " >& " + logfile)
0604 
0605 cmd = "eval `scramv1 runtime -sh` && cmsRun " + absoluteOutputConfigFile
0606 
0607 if options.followCmsRunoutput:
0608     cmd += " 2>&1 | tee " + logfile
0609 else:
0610     cmd += " >& " + logfile
0611 execCmd(cmd )
0612 
0613 # check whether the expected output file was created
0614 # and rename it 
0615 
0616 if os.path.exists(outputRootFile):
0617 
0618     if sampleSpec != None:
0619         # a sample (e.g. wen or zee etc.) was specified
0620 
0621         renameOutputTo=knownDatasets[sampleSpec]['output'] % { "version" : version }
0622 
0623         shutil.move(outputRootFile, renameOutputTo)
0624         print("Created")
0625         print("  ",os.getcwd() + "/" + renameOutputTo)
0626     else:
0627         print("Created")
0628         print("  ",os.getcwd() + "/" + outputRootFile)
0629 
0630 else: 
0631 
0632     print("cmsRun failed to create " + outputRootFile)
0633     print("See log file:")
0634     print("   ",os.getcwd() + "/log")
0635