Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-11-25 02:29:04

0001 #! /usr/bin/env python3
0002 
0003 from builtins import range
0004 import re,os,sys,shutil,math
0005 import optparse
0006 
0007 copyargs = sys.argv[:]
0008 for i in range(len(copyargs)):
0009     if copyargs[i] == "":
0010         copyargs[i] = "\"\""
0011     if copyargs[i].find(" ") != -1:
0012         copyargs[i] = "\"%s\"" % copyargs[i]
0013 commandline = " ".join(copyargs)
0014 
0015 prog = sys.argv[0]
0016 
0017 usage='./%(prog)s NBLOCKS INFILE OUTFILE [options]\n'+\
0018   'takes list of files produced by findQualityFiles.py as INFILE,\n'+\
0019   'groups them into maximum NBLOCKS blocks with approximately similar #events.'
0020 
0021 
0022 ######################################################
0023 # To parse commandline args
0024 
0025 
0026 parser=optparse.OptionParser(usage)
0027 
0028 parser.add_option("-v", "--verbose",
0029   help="debug verbosity level",
0030   type="int",
0031   default=0,
0032   dest="debug")
0033 
0034 options,args=parser.parse_args()
0035 
0036 if len(sys.argv) < 4:
0037     raise SystemError("Too few arguments.\n\n"+parser.format_help())
0038 
0039 NBLOCKS = int(sys.argv[1])
0040 INFILE = sys.argv[2]
0041 OUTFILE = sys.argv[3]
0042 
0043 
0044 
0045 def makeJobBlock(mylist, evtn):
0046     n = mylist[0][0]
0047     block = [mylist[0]]
0048     choosen = [0]
0049     while n<evtn:
0050     #print "n,evtn=",n,evtn
0051     # find the biggest unused #evt that would give n<evtn
0052         for i in range(len(mylist)):
0053             # get last not choosen i
0054             last_i=len(mylist)-1
0055             while last_i in choosen: last_i += -1
0056             if i==last_i:
0057         #print i,"last element reached"
0058                 n += mylist[i][0]
0059                 #print "   new last append: ",i, mylist[i][0], n
0060                 block.append(mylist[i])
0061                 choosen.append(i)
0062                 break
0063             if i in choosen:
0064                 #print i,"  in choosen, continue..."
0065                 continue
0066             if n+mylist[i][0]<evtn:
0067                 n += mylist[i][0]
0068                 #print "   new append: ",i, mylist[i][0], n
0069                 block.append(mylist[i])
0070                 choosen.append(i)
0071                 break
0072         if len(choosen)==len(mylist):
0073             #print " got everything"
0074             break
0075     # pick up unused elements
0076     newlist = []
0077     for i in range(len(mylist)):
0078         if not i in choosen:
0079             newlist.append(mylist[i])
0080     print("done makeJobBlock n =",n," len =",len(block))
0081     return block, newlist, n
0082 
0083 
0084 
0085 comment1RE = re.compile (r'^#.+$')
0086 fileLineRE = re.compile (r'^.*\'(.*)\'.+# (\d*).*$')
0087 #fileLineRE = re.compile (r'^.*\'(.*)\'.+# (\d*),(\d*).*$')
0088 
0089 if not os.access(INFILE, os.F_OK): 
0090     print("Cannot find input file ", INFILE)
0091     sys.exit()
0092 
0093 fin = open(INFILE, "r")
0094 lines = fin.readlines()
0095 fin.close()
0096 
0097 
0098 eventsFiles = []
0099 ntotal = 0
0100 commentLines=[]
0101 
0102 for line in lines:
0103     #line = comment1RE.sub ('', line)
0104     #line = line.strip()
0105     #if not line: continue
0106     match = comment1RE.match(line)
0107     if match:
0108         commentLines.append(line)
0109 
0110     match = fileLineRE.match(line)
0111     if match:
0112         #print int(match.group(3)), str(match.group(1))
0113         #eventsFiles.append((int(match.group(3)), str(match.group(1)), str(match.group(2))))
0114         eventsFiles.append((int(match.group(2)), str(match.group(1))))
0115         ntotal += int(match.group(2))
0116     #else: print line,
0117 
0118 if len(eventsFiles)==0:
0119     print("no file description strings found")
0120     sys.exit()
0121 
0122 #print "len=", len(eventsFiles), ntotal
0123 #tmp = set(eventsFiles)
0124 #eventsFiles = list(tmp)
0125 #ntotal = 0
0126 #for ff in eventsFiles:  ntotal += ff[0]
0127 #print "len=", len(eventsFiles), ntotal
0128 #sys.exit()
0129 
0130 eventsFiles.sort(reverse=True)
0131 #print eventsFiles
0132 
0133 evtPerJob = int(math.ceil(float(ntotal)/NBLOCKS))
0134 print("Total = ",ntotal, "  per block =", evtPerJob,"(would give total of ", evtPerJob*NBLOCKS, ")", "  list length =",len(eventsFiles))
0135 if eventsFiles[0][0] > evtPerJob:
0136     print("the biggest #evt is larger then #evt/block:",eventsFiles[0][0],">",evtPerJob)
0137     print("consider lowering NBLOCKS")
0138 
0139 
0140 jobsBlocks=[]
0141 temp = eventsFiles
0142 
0143 tt = 0
0144 for j in range(NBLOCKS):
0145     print(j)
0146     if len(temp)==0:
0147         print("done!")
0148         break
0149     block, temp, nn = makeJobBlock(temp,evtPerJob)
0150     tt+=nn
0151     if len(block)>0:
0152         jobsBlocks.append((block,nn))
0153         print(block)
0154     else:
0155         print("empty block!")
0156 
0157 print(tt)
0158 print(commandline)
0159 
0160 
0161 fout = open(OUTFILE, mode="w")
0162 
0163 fout.write("### job-split file list produced by:\n")
0164 fout.write("### "+commandline+"\n")
0165 fout.write("### Total #evt= "+str(ntotal)+"  #files ="+str(len(eventsFiles))+"  per job #evt="
0166            +str(evtPerJob)+" (would give total of"+str(evtPerJob*NBLOCKS)+")\n###\n")
0167 fout.write("### previously produced by:\n")
0168 fout.write("".join(commentLines))
0169 fout.write("\nfileNamesBlocks = [\n")
0170 
0171 commax = ","
0172 for b in range(len(jobsBlocks)):
0173     fout.write('  [ # job '+str(b)+' with nevt='+str(jobsBlocks[b][1])+'\n')
0174     comma = ","
0175     for i in range(len(jobsBlocks[b][0])):
0176         if i==len(jobsBlocks[b][0])-1:
0177             comma=""
0178         #fout.write("    '"+ jobsBlocks[b][0][i][1] +"'"+comma+" # "+ str(jobsBlocks[b][0][i][2]) +','+ str(jobsBlocks[b][0][i][0]) + "\n")
0179         fout.write("    '"+ jobsBlocks[b][0][i][1] +"'"+comma+" # "+ str(jobsBlocks[b][0][i][0]) + "\n")
0180     if b==len(jobsBlocks)-1:
0181         commax=""
0182     fout.write('  ]'+commax+'\n')
0183 fout.write(']\n')
0184 fout.close()