Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-11-25 02:29:01

0001 import sys
0002 sys.path.append("../SkimProducer")
0003 # for switching to CAF queue (condor submit)
0004 sys.path.append("../autoSubmitter") 
0005 import os
0006 import subprocess
0007 import signal
0008 import time
0009 import argparse
0010 import multiprocessing as mp
0011 
0012 def replaceAllRanges(string):
0013     if "[" in string and "]" in string:
0014         strings = []
0015         posS = string.find("[")
0016         posE = string.find("]")
0017         nums = string[posS+1:posE].split(",")
0018         expression = string[posS:posE+1]
0019         
0020         nums = string[string.find("[")+1:string.find("]")]
0021         for interval in nums.split(","):
0022             interval = interval.strip()
0023             if "-" in interval:
0024                 lowNum = int(interval.split("-")[0])
0025                 upNum = int(interval.split("-")[1])
0026                 for i in range(lowNum, upNum+1):
0027                     newstring = string[0:posS]+str(i)+string[posE+1:]
0028                     newstring = replaceAllRanges(newstring)
0029                     strings += newstring
0030             else:
0031                 newstring = string[0:posS]+interval+string[posE+1:]
0032                 newstring = replaceAllRanges(newstring)
0033                 strings += newstring
0034         return strings
0035     else:
0036         return [string,]
0037 
0038 
0039 def condorSubmitSkim(sample, caf=False):
0040     from helpers import enableCAF
0041     enableCAF(caf)
0042     path = "{base}/src/Alignment/APEEstimation/test/SkimProducer".format(base=os.environ['CMSSW_BASE'])
0043     
0044     from skimTemplates import skimScript
0045     scriptfile = "{path}/workingArea/skim_{name}.tcsh".format(path=path, name=sample)
0046     with open(scriptfile, "w") as fi:
0047         fi.write(skimScript.format(base=os.environ['CMSSW_BASE']))
0048     
0049     if caf:
0050         from skimTemplates import condorSubTemplateCAF as condorSubScript
0051     else:
0052         from skimTemplates import condorSubTemplate as condorSubScript
0053     subfile = "{path}/workingArea/skim_{name}.sub".format(path=path, name=sample)
0054     with open(subfile, "w") as fi:
0055         fi.write(condorSubScript.format(path=path, name=sample))
0056     
0057     print(subfile)
0058     subprocess.call("condor_submit {subfile}".format(subfile=subfile), shell=True)
0059     
0060 def localStartSkim(sample):
0061     base = os.environ['CMSSW_BASE']    
0062     
0063     execString = "cmsRun {base}/src/Alignment/APEEstimation/test/SkimProducer/skimProducer_cfg.py sample={sample}".format(sample=sample, base=base)
0064     print(execString)
0065     toExec = execString.split(" ")
0066     
0067     outFileName = None
0068     outFilePath = None
0069     
0070     # start cmsRun
0071     proc = subprocess.Popen(toExec, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
0072     
0073     def get_output(proc):
0074         while True:
0075             line = proc.stdout.readline().rstrip().decode()
0076             if not line:
0077                 break
0078             yield line
0079 
0080     
0081     # print output in shell while program runs, also extract output filename
0082     try:
0083         for line in get_output(proc):
0084             if "Using output name" in line:
0085                 outFileName = line.split("output name ")[1].split(".root")[0]
0086             if "Using output path" in line:
0087                 outFilePath = line.split("output path ")[1]
0088             print(sample+": "+line)
0089     except KeyboardInterrupt:
0090         #this way, the current file is closed and the skimming is finished in a way that the last opened file is actually usable
0091         
0092         print("Interrupted")
0093         proc.send_signal(signal.SIGINT)
0094         proc.wait()
0095         
0096     
0097     time.sleep(1)    
0098     print("Finished with %s, renaming files now"%(sample))
0099 
0100     ## Rename output files to match _n.root naming scheme    
0101     targetFiles = []
0102     if outFileName:
0103         for fi in os.listdir("."):
0104             if fi.split(".root")[0].startswith(outFileName):
0105                 if fi.split(".root")[0] == outFileName:
0106                     newFileName = "%s_1.root"%(outFileName)
0107                     os.rename(fi, newFileName)
0108                     targetFiles.append(newFileName)
0109                 else:
0110                     fileNoString = fi.split(".root")[0].split(outFileName)[1]
0111                     try:
0112                         fileNo = int(fileNoString)
0113                         # For (most) weird naming conventions to not mess up renaming
0114                         if len(fileNoString) != 3:
0115                             continue
0116                         
0117                         newFileName = "%s_%d.root"%(outFileName, fileNo+1)
0118                         os.rename(fi, newFileName)
0119                         targetFiles.append(newFileName)
0120                     except ValueError: 
0121                         # Catching files from previous skim with same name that were already renamed and not removed before next skim
0122                         # and files with longer names but identical parts
0123                         continue
0124     
0125     for fi in targetFiles:
0126         print(fi)
0127 
0128     if outFilePath:
0129         print("Copying files to desired path")
0130         if not os.path.isdir(outFilePath):
0131             os.makedirs(outFilePath)
0132         for fi in targetFiles:
0133             if not subprocess.call("xrdcp %s %s/"%(fi, outFilePath), shell=True):
0134                 os.remove(fi)    
0135 
0136 def main(argv):
0137     if not 'CMSSW_BASE' in os.environ:
0138         print("CMSSW evironment is not set up, do that first")
0139         exit(1)
0140     
0141     parser = argparse.ArgumentParser(description='Define which samples to skim')
0142     parser.add_argument("-s", "--sample", action="append", dest="samples", default=[],
0143                           help="Name of sample as defined in skimProducer_cfg.py. Multiple inputs possible")
0144     parser.add_argument("-c", "--condor", action="store_true", dest="condor", default=False,
0145                           help="Submit to condor, if False, the skim will be done locally on lxplus")
0146     parser.add_argument("-C", "--caf", action="store_true", dest="caf", default=False,
0147                           help="Submit to CAF queue for faster execution")
0148     parser.add_argument("-n", "--ncores", action="store", dest="ncores", default=-1, type=int,
0149                           help="Set maximum number of parallel skims to run if skimming is done locally")
0150     
0151     args = parser.parse_args()
0152     
0153     if len(args.samples) == 0:
0154         print("Usage: python startSkim.py -s <sample>")
0155         sys.exit(1)
0156     
0157     finalSamples = []
0158     for sample in args.samples:
0159         parsedSamples = replaceAllRanges(sample)
0160         finalSamples += parsedSamples
0161     
0162     args.samples = finalSamples
0163     
0164     if args.ncores<0 or args.ncores > len(args.samples):
0165         args.ncores = len(args.samples)
0166     
0167     if args.condor:
0168         # Every skim gets its own condor job. One could also submit one
0169         # job and add all jobs as arguments with small changes.
0170         for sample in args.samples:
0171             condorSubmitSkim(sample, args.caf)
0172     else:
0173         if len(args.samples) == 1:
0174             for sample in args.samples:
0175                 localStartSkim(sample) 
0176         else:
0177             try:
0178                 # Not recommended for a large number of jobs
0179                 # They will get killed by admins if they overload lxplus
0180                 pool = mp.Pool(args.ncores)
0181                 pool.map_async(localStartSkim, args.samples)
0182                 pool.close()
0183                 pool.join()
0184             except KeyboardInterrupt:
0185                 pass # The keyboard interrupt will be forwarded to the subprocesses anyway, stopping them without terminating them immediately
0186 if __name__ == "__main__":
0187     main(sys.argv)