Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-11-26 02:34:21

0001 #!/usr/bin/env python
0002 # adapted to python and extended (by Andre Holzner) based on a
0003 # shell script by Michael Anderson
0004 #
0005 # To use, type for example:
0006 #  hltValidate 3_5_5
0007 #
0008 # This script runs validation on new relval samples.
0009 # Requires version of CMSSW given.
0010 #
0011 # Michael Anderson
0012 # Sept 15, 2009
0013 
0014 import sys, os, shutil
0015 #----------------------------------------------------------------------
0016 # parameters
0017 #----------------------------------------------------------------------
0018 # CMSSW Module to check out & compile
0019 module="HLTriggerOffline/Egamma"
0020 
0021 # Root file name outputted by running module
0022 outputRootFile="DQM_V0001_HLT_R000000001.root"
0023 
0024 
0025 # Datasets to run on
0026 # could actually get rid of the version string in the datasets
0027 # as we explicitly require the release in the DBS query ?
0028 knownDatasets = {
0029     "diGamma" : {
0030         "dataset": "/RelValH130GGgluonfusion*/CMSSW_%(version)s*/GEN-SIM-DIGI-RAW-HLTDEBUG",
0031         "output":  "DiGamma_%(version)s.root", 
0032         },
0033 
0034     "photonJet" : {
0035         "dataset": "/RelValPhotonJets_Pt_10*/CMSSW_%(version)s*/GEN-SIM-DIGI-RAW-HLTDEBUG",
0036         "output":  "GammaJet_%(version)s.root", 
0037         },
0038     
0039     "zee" : {
0040         "dataset": "/RelValZEE*/CMSSW_%(version)s*/GEN-SIM-DIGI-RAW-HLTDEBUG",
0041         "output":  "ZEE_%(version)s.root",
0042         },
0043 
0044     "wen" : {
0045         "dataset": "/RelValWE*/CMSSW_%(version)s*/GEN-SIM-DIGI-RAW-HLTDEBUG",
0046         "output":  "WEN_%(version)s.root",
0047         },
0048 } 
0049 
0050 #----------------------------------------------------------------------
0051 
0052 def execCmd(cmd):
0053     retval = os.system(cmd)
0054     if retval != 0:
0055         raise Exception("failed to execute command '" + cmd + "', exit status = " + str(retval))
0056 
0057 #----------------------------------------------------------------------
0058 
0059 # code based on PhysicsTools.PatAlgos.tools.helpers.MassSearchReplaceAnyInputTagVisitor
0060 # to replace the process names of all input tags found in sequences.
0061 #
0062 # necessary e.g. when one has to run the HLT and thus must later on
0063 # use a different process name
0064 
0065 class ReplaceProcessNameOfInputTags(object):
0066     """Visitor that travels within a cms.Sequence and replaces 
0067        It will climb down within PSets, VPSets and VInputTags to find its target.
0068 
0069        Useful e.g. for replacing the process names of all input tags where the 
0070        process name was specified explicitly.
0071     """
0072 
0073     #----------------------------------------
0074     def __init__(self,origProcessName,newProcessName,verbose=False):
0075         self._origProcessName = origProcessName
0076         self._newProcessName  = newProcessName
0077         # self._moduleName   = ''
0078         self._verbose=verbose
0079 
0080     #----------------------------------------
0081 
0082     def doIt(self,pset,base):
0083         if isinstance(pset, cms._Parameterizable):
0084             for name in pset.parameters_().keys():
0085                 # if I use pset.parameters_().items() I get copies of the parameter values
0086                 # so I can't modify the nested pset
0087                 value = getattr(pset,name) 
0088                 type = value.pythonTypeName()
0089                 if type == 'cms.PSet':  
0090                     self.doIt(value,base+"."+name)
0091                 elif type == 'cms.VPSet':
0092                     for (i,ps) in enumerate(value): self.doIt(ps, "%s.%s[%d]"%(base,name,i) )
0093                 elif type == 'cms.VInputTag':
0094                     for (i,n) in enumerate(value): 
0095                          # VInputTag can be declared as a list of strings, so ensure that n is formatted correctly
0096                          n = self.standardizeInputTagFmt(n)
0097                          if self._verbose:print("FOUND TAG:",value[i])
0098 
0099                          if value[i].processName == self._origProcessName:
0100                              if self._verbose: print("REPLACING")
0101                              value[i].processName = self._newProcessName
0102                          else:
0103                              if self._verbose: print("NOT REPLACING")
0104 
0105                 elif type == 'cms.InputTag':
0106                     if self._verbose:print("FOUND TAG:",value)                        
0107 
0108                     if value.processName == self._origProcessName:
0109                         if self._verbose:print("REPLACING")
0110                         value.processName = self._newProcessName
0111                     else:
0112                         if self._verbose:print("NOT REPLACING")
0113 
0114     #----------------------------------------
0115     @staticmethod 
0116     def standardizeInputTagFmt(inputTag):
0117        ''' helper function to ensure that the InputTag is defined as cms.InputTag(str) and not as a plain str '''
0118        if not isinstance(inputTag, cms.InputTag):
0119           return cms.InputTag(inputTag)
0120        return inputTag
0121 
0122     #----------------------------------------
0123     def enter(self,visitee):
0124         label = ''
0125         try:    label = visitee.label()
0126         except AttributeError: label = '<Module not in a Process>'
0127         self.doIt(visitee, label)
0128 
0129     #----------------------------------------
0130     def leave(self,visitee):
0131         pass
0132 
0133     #----------------------------------------
0134 
0135 #----------------------------------------------------------------------
0136 
0137 def findCMSSWreleaseDir(version):
0138     """ runs scramv1 list to find the directory of the given CMSSW release.
0139 
0140     Sometimes it happens that there is more than one line in the scram output
0141     for the same release (and even the same directory). In general, just
0142     the first matching line is returned.
0143     
0144     """
0145 
0146     import re
0147 
0148     if not version.startswith("CMSSW_"):
0149         version = "CMSSW_" + version
0150 
0151     for line in os.popen('scramv1 list -c CMSSW').readlines():
0152 
0153         line = line.split('\n')[0].strip()
0154 
0155         project, release, directory = re.split('\s+',line)
0156 
0157         if release == version:
0158             return directory
0159 
0160 #----------------------------------------------------------------------
0161 def findDataSetFromSampleName(sampleSpec, version, cdToReleaseDir):
0162     """ from the given sample specification (e.g. photonJet), tries to get
0163     the relval dataset from DBS for the given CMSSW version.
0164 
0165     If more than one sample is found, the user is prompted
0166     to select one.
0167     """
0168 
0169     # Find the dataset in DBS using command. This actually
0170     # could find more than one dataset.
0171 
0172     datasetToSearchFor= knownDatasets[sampleSpec]['dataset'] % { "version": version }
0173 
0174     dbs_cmd = 'das_client.py --query=dataset=' + datasetToSearchFor + ' | grep "HLTDEBUG"'
0175 
0176     cmssw_release_dir = findCMSSWreleaseDir(version)
0177 
0178     cmd_parts = []
0179 
0180     if cdToReleaseDir:
0181         cmd_parts.extend([
0182             'cd ' + cmssw_release_dir,
0183             "eval `scramv1 runtime -sh`",
0184             "cd - > /dev/null",   # this seems to print a line in some cases
0185             ])
0186 
0187     cmd_parts.append(dbs_cmd)
0188 
0189     allDatasetsToCheck=os.popen("  && ".join(cmd_parts)).readlines()
0190     allDatasetsToCheck = [ x.strip() for x in allDatasetsToCheck ]
0191 
0192     if len(allDatasetsToCheck) == 1:
0193         datasetToCheck = allDatasetsToCheck[0]
0194     elif len(allDatasetsToCheck) == 0:
0195         print("failed to find dataset in dbs")
0196         print()
0197         print("dbs command was:")
0198         print(dbs_cmd)
0199         sys.exit(1)
0200     else:
0201         # more than one dataset found
0202         print("found the following matching datasets, please select one:")
0203 
0204         for i in range(len(allDatasetsToCheck)):
0205             print("  %2d: %s" % (i, allDatasetsToCheck[i]))
0206 
0207         print("your choice:", end=' ')
0208         choice = sys.stdin.readline()
0209         choice = int(choice)
0210 
0211         datasetToCheck = allDatasetsToCheck[choice]
0212 
0213         print("selected",datasetToCheck)
0214 
0215     ###################################
0216 
0217 
0218     ###################################
0219     # Make sure dataset was found
0220     print("Looked for dataset matching " + datasetToSearchFor)
0221 
0222     print("found")
0223     print("  ",datasetToCheck)
0224     print()
0225 
0226     return datasetToCheck
0227 
0228 #----------------------------------------------------------------------
0229 def createProjectArea(version):
0230     """creates a new scram project area for the given release
0231     and chdirs to it """
0232 
0233     print("Setting up CMSSW_" + version + " environment")
0234     execCmd("scramv1 project CMSSW CMSSW_" + version)
0235     os.chdir("CMSSW_" + version + "/src")
0236 
0237 
0238 #----------------------------------------------------------------------
0239 
0240 def ensureProjectAreaNotExisting(version):
0241     # refuse to run if the release area exists already
0242     # (can mix tags and samples etc.)
0243 
0244     project_dir = "CMSSW_" + version
0245 
0246     if os.path.exists(project_dir):
0247         print("the project directory " + project_dir + " already exists.", file=sys.stderr)
0248         print("Refusing to continue as this might cause unexpected results.", file=sys.stderr)
0249         sys.exit(1)
0250 
0251 #----------------------------------------------------------------------
0252 
0253 def cleanVersion(version):
0254     """ removes CMSSW_ from the version string if it starts with it """
0255 
0256     prefix = "CMSSW_"
0257 
0258     if version.startswith(prefix):
0259         return version[len(prefix):]
0260     else:
0261         return version
0262 
0263 #----------------------------------------------------------------------
0264 
0265 def getCMSSWVersionFromEnvironment():
0266     """ determines the CMSSW version from environment variables """
0267 
0268     varname = "CMSSW_VERSION"
0269 
0270     if varname not in os.environ:
0271         print("The environment variable " + varname + " is not set.", file=sys.stderr)
0272         print("It looks like you have not initialized a runtime", file=sys.stderr)
0273         print("environment for CMSSW but want to use the 'current one'.", file=sys.stderr)
0274         print(file=sys.stderr)
0275         print("Try running cmsenv and then run this script again.", file=sys.stderr)
0276         sys.exit(1)
0277 
0278     return cleanVersion(os.environ[varname])
0279 
0280 #----------------------------------------------------------------------
0281 # main
0282 #----------------------------------------------------------------------
0283 from optparse import OptionParser
0284 
0285 parser = OptionParser("""
0286 
0287   usage: %prog [options] [sample] [cmssw-version]
0288 
0289     e.g. %prog photonJet 3_5_6
0290          %prog --this-project-area photonJet
0291          %prog --file=rfio://castor/cern.ch/cms/store/... 3_5_6
0292          %prog --file=rfio://castor/cern.ch/cms/store/... --this-project-area
0293          
0294   Produces the histogram files for E/gamma path validation.
0295 
0296   sample is required unless input files are specified directly using the --file=... option.
0297 
0298   cmssw-version is required unless the option --this-project-area is given
0299 
0300 """
0301 )
0302 
0303 parser.add_option("--file",
0304                   dest="direct_input_files",
0305                   default = [],
0306                   type="str",
0307                   action="append", # append to list
0308                   help="run directly from the ROOT file given. Option can be specified multiple times.",
0309                   metavar="FILE")
0310 
0311 
0312 parser.add_option("--hlt-process",
0313                   dest="hlt_process_name",
0314                   default = None,
0315                   type="str",
0316                   help="Specify the name of the HLT process. Useful e.g. when running on a file produced by yourself with a different process name.",
0317                   metavar="PROC")
0318 
0319 parser.add_option("--cvstag",
0320                   dest="cvstag",
0321                   default = "HEAD",
0322                   type="str",
0323                   help="CVS tag to be used for module " + module + ". Default is to use the HEAD revision.",
0324                   metavar="TAG")
0325 
0326 
0327 parser.add_option("--cfg",
0328                   dest="configFile",
0329                   default = None,
0330                   type="str",
0331                   help="Base config file (relative to HLTriggerOffline/Egamma if using files from CVS or relative to the current path if the option --this-project-area is given) to run with cmsRun. Change this e.g. when you want to run on data instead of MC.",
0332                   metavar="CFG_FILE.py")
0333 
0334 parser.add_option("--cfg-add",
0335                   dest="cfg_add",
0336                   default = [],
0337                   type="str",
0338                   action="append", # append to list
0339                   help="line to add to the generated cmsRun configuration file. Can be specified several times",
0340                   metavar="CFG_LINE")
0341 
0342 parser.add_option("--num-events",
0343                   dest="num_events",
0344                   default = None,
0345                   type="int",
0346                   help="set maxEvents to run over a limited number of events",
0347                   metavar="NUM")
0348 
0349 
0350 parser.add_option("--this-project-area",
0351                   dest="useThisProjectArea",
0352                   default = False,
0353                   action = "store_true",
0354                   help="instead of creating a new project area and checking out files from CVS, use the current CMSSW project area in use",
0355                   )
0356 
0357 parser.add_option("--follow",
0358                   dest="followCmsRunoutput",
0359                   default = False,
0360                   action = "store_true",
0361                   help="show output of cmsRun task (in addition to writing it to a log file)",
0362                   )
0363 
0364 parser.add_option("--data",
0365                   dest="isData",
0366                   default = False,
0367                   action = "store_true",
0368                   help="run on real data file",
0369                   )
0370 
0371 (options, ARGV) = parser.parse_args()
0372 
0373 sampleSpec = None
0374 
0375 #----------------------------------------
0376 # sanity checks
0377 #----------------------------------------
0378 
0379 
0380 if options.useThisProjectArea:
0381     version = getCMSSWVersionFromEnvironment()
0382 
0383 # default (input) config file
0384 
0385 if options.configFile == None:
0386 
0387     if options.useThisProjectArea:
0388         options.configFile = os.path.join(os.environ['CMSSW_BASE'],"src/HLTriggerOffline/Egamma/test/test_cfg.py")
0389     else:
0390         options.configFile = "test/test_cfg.py"
0391 
0392 #----------------------------------------
0393 # TODO: we should do things which take more than
0394 #       a second only AFTER checking the consistency
0395 #       of the command line arguments...
0396 
0397 if len(options.direct_input_files) == 0:
0398     if len(ARGV) < 1:
0399         print("No data sample specified. Try the -h option to get more detailed usage help.", file=sys.stderr)
0400         print(file=sys.stderr)
0401         print("known samples are: " + " ".join(knownDatasets.keys()), file=sys.stderr)
0402         print(file=sys.stderr)
0403         sys.exit(1)
0404 
0405     sampleSpec = ARGV.pop(0)
0406 
0407     # check whether we know the specified sample
0408     if sampleSpec not in knownDatasets:
0409         print("unknown sample " + sampleSpec + ", known samples are: " + " ".join(knownDatasets.keys()), file=sys.stderr)
0410         sys.exit(1)
0411 
0412     if not options.useThisProjectArea:
0413 
0414         if len(ARGV) < 1:
0415             print("No CMSSW version specified. Try the -h option to get more detailed usage help.", file=sys.stderr)
0416             print(file=sys.stderr)
0417             sys.exit(1)
0418 
0419         version= cleanVersion(ARGV.pop())
0420 
0421         ensureProjectAreaNotExisting(version)
0422         createProjectArea(version)
0423     else:
0424         # get cmssw version from the environment
0425         # this was actually done already before
0426         pass
0427     
0428 
0429     datasetToCheck = findDataSetFromSampleName(sampleSpec, version, not options.useThisProjectArea)
0430 
0431     # Get the file names in the dataset path, and format it for python files
0432     print("\n\nGetting file names for")
0433     print("  ",datasetToCheck)
0434 
0435     cmssw_release_dir = findCMSSWreleaseDir(version)
0436     cmd_parts = []
0437 
0438     if not options.useThisProjectArea:
0439         cmd_parts.extend([
0440         'cd ' + cmssw_release_dir,
0441         "eval `scramv1 runtime -sh`",
0442         "cd -",
0443             ])
0444 
0445     cmd_parts.append("das_client.py --query='file dataset=" + datasetToCheck + "'")
0446 
0447 
0448     FILES=os.popen(" && ".join(cmd_parts)).readlines()
0449     FILES=[ x.strip() for x in FILES ]
0450     FILES=[ x for x in FILES if x.endswith('.root') ]
0451 
0452 else:
0453     # input files were specified explicitly (instead of a dataset)
0454     FILES = options.direct_input_files[:]
0455 
0456     datasetToCheck = "(undefined dataset)"
0457 
0458     if not options.useThisProjectArea:
0459         if len(ARGV) < 1:
0460             print("No CMSSW version specified. Try the -h option to get more detailed usage help.", file=sys.stderr)
0461             print(file=sys.stderr)
0462             sys.exit(1)
0463 
0464         version=cleanVersion(ARGV.pop(0))
0465         ensureProjectAreaNotExisting(version)
0466         createProjectArea(version)
0467     else:
0468         # get cmssw version from the environment
0469         # this was actually done already before
0470         pass
0471 
0472 #----------------------------------------
0473 
0474 if len(ARGV) != 0:
0475     print("too many positional (non-option) arguments specified. Try the -h option to get more detailed usage help.", file=sys.stderr)
0476     print(file=sys.stderr)
0477     sys.exit(1)
0478 
0479 #----------------------------------------
0480 # determine the absolute path of the input configuration
0481 # file 
0482 #----------------------------------------
0483 
0484 if options.useThisProjectArea:
0485     absoluteInputConfigFile = options.configFile
0486 
0487     import tempfile
0488     absoluteOutputConfigFile = tempfile.NamedTemporaryFile(suffix = ".py").name
0489 
0490 else:
0491     # we have already chdird into the project area and into src/
0492     
0493     absoluteInputConfigFile = os.path.join(
0494         os.path.join(os.getcwd(),module),
0495         options.configFile)
0496 
0497 
0498     absoluteOutputConfigFile = os.path.join(
0499         os.path.join(os.getcwd(),module),
0500         "test_cfg_new.py")
0501 
0502 #----------------------------------------
0503 
0504 
0505 ###################################
0506 # Check out module and build it
0507 
0508 if not options.useThisProjectArea:
0509     print("Checking out tag '" + options.cvstag + "' of " + module)
0510     execCmd(" cvs -Q co -r " + options.cvstag + " " + module)
0511 
0512     execCmd("scramv1 b")
0513     os.chdir(module)
0514 
0515 #--------------------
0516 # check if the (possibly user specified) config file does exist
0517 # or not. Note that we can do this only AFTER the CVS checkout
0518 if not os.path.exists(absoluteInputConfigFile):
0519     print("config file " + absoluteInputConfigFile + " does not exist", file=sys.stderr)
0520     print(os.getcwd())
0521     sys.exit(1)
0522 #--------------------
0523 
0524 # Place file names in python config file
0525 print("taking config file " + absoluteInputConfigFile + " and copying to " + absoluteOutputConfigFile) 
0526 
0527 #----------------------------------------
0528 # append things to the config file
0529 #----------------------------------------
0530 fout = open(absoluteOutputConfigFile,"w")
0531 
0532 # first copy all the lines of the original config file
0533 fout.write(open(absoluteInputConfigFile).read())
0534 
0535 print("process.source.fileNames = " + str(FILES), file=fout)
0536 print("process.post.dataSet = cms.untracked.string('" + datasetToCheck +"')", file=fout)
0537 
0538 # replace all HLT process names by something
0539 # else if specified by the user
0540 
0541 if options.hlt_process_name != None:
0542     # ugly code ahead, may disturb some viewers...
0543     #
0544     # dump the source code of the replacing code into
0545     # the CMSSW python configuration file
0546     import inspect
0547 
0548     print("#----------------------------------------", file=fout)
0549     print("# replace explicit specifications of HLT process name by " + options.hlt_process_name, file=fout)
0550     print("#----------------------------------------", file=fout)
0551     print(inspect.getsource(ReplaceProcessNameOfInputTags), file=fout)
0552     print(file=fout)
0553     print("for seq in process.sequences.values():", file=fout)
0554     print("""    seq.visit(ReplaceProcessNameOfInputTags("HLT","%s"))""" % options.hlt_process_name, file=fout)
0555 
0556 # check for additional configuration text specified
0557 
0558 if len(options.cfg_add) > 0:
0559     print(file=fout)
0560     print("#----------------------------------------", file=fout)
0561 
0562     for line in options.cfg_add:
0563         print("# additional string specified on the command line", file=fout)
0564         print(line, file=fout)
0565         
0566     print(file=fout)
0567     print("#----------------------------------------", file=fout)
0568 
0569 #----------------------------------------
0570 # max. events to run on
0571 if options.num_events != None:
0572     print(file=fout)
0573     print("#----------------------------------------", file=fout)
0574     print("# maximum number of events specified", file=fout)
0575     print("#----------------------------------------", file=fout)
0576     print("process.maxEvents = cms.untracked.PSet( input = cms.untracked.int32(%d) )" % options.num_events, file=fout)
0577     print("#----------------------------------------", file=fout)
0578 
0579 #----------------------------------------
0580 # run on real data
0581 if options.isData and absoluteInputConfigFile.find("testEmDQM_cfg.py") != -1:
0582     print(file=fout)
0583     print("#----------------------------------------", file=fout)
0584     print("# Running on real data sample", file=fout)
0585     print("#----------------------------------------", file=fout)
0586     print("process.emdqm.isData = cms.bool(True)", file=fout) 
0587     print("#----------------------------------------", file=fout)
0588 
0589 #----------------------------------------
0590 # close config file
0591 fout.close()
0592 
0593 #----------------------------------------
0594 logfile = os.path.join(os.getcwd(),"log")
0595 
0596 if os.path.exists(logfile):
0597     print("the log file (" + logfile + ") exists already, this might causing problems", file=sys.stderr)
0598     print("with your shell. Stopping here.", file=sys.stderr)
0599     sys.exit(1)
0600 
0601 
0602 print("Starting cmsRun " + absoluteOutputConfigFile + " >& " + logfile)
0603 
0604 cmd = "eval `scramv1 runtime -sh` && cmsRun " + absoluteOutputConfigFile
0605 
0606 if options.followCmsRunoutput:
0607     cmd += " 2>&1 | tee " + logfile
0608 else:
0609     cmd += " >& " + logfile
0610 execCmd(cmd )
0611 
0612 # check whether the expected output file was created
0613 # and rename it 
0614 
0615 if os.path.exists(outputRootFile):
0616 
0617     if sampleSpec != None:
0618         # a sample (e.g. wen or zee etc.) was specified
0619 
0620         renameOutputTo=knownDatasets[sampleSpec]['output'] % { "version" : version }
0621 
0622         shutil.move(outputRootFile, renameOutputTo)
0623         print("Created")
0624         print("  ",os.getcwd() + "/" + renameOutputTo)
0625     else:
0626         print("Created")
0627         print("  ",os.getcwd() + "/" + outputRootFile)
0628 
0629 else: 
0630 
0631     print("cmsRun failed to create " + outputRootFile)
0632     print("See log file:")
0633     print("   ",os.getcwd() + "/log")
0634