Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2022-03-03 02:23:12

0001 #! /usr/bin/env python3
0002 
0003 from __future__ import print_function
0004 from builtins import range
0005 import re,os,sys,shutil,math
0006 import optparse
0007 
0008 copyargs = sys.argv[:]
0009 for i in range(len(copyargs)):
0010     if copyargs[i] == "":
0011         copyargs[i] = "\"\""
0012     if copyargs[i].find(" ") != -1:
0013         copyargs[i] = "\"%s\"" % copyargs[i]
0014 commandline = " ".join(copyargs)
0015 
0016 prog = sys.argv[0]
0017 
0018 usage='./%(prog)s NBLOCKS INFILE OUTFILE [options]\n'+\
0019   'takes list of files produced by findQualityFiles.py as INFILE,\n'+\
0020   'groups them into maximum NBLOCKS blocks with approximately similar #events.'
0021 
0022 
0023 ######################################################
0024 # To parse commandline args
0025 
0026 
0027 parser=optparse.OptionParser(usage)
0028 
0029 parser.add_option("-v", "--verbose",
0030   help="debug verbosity level",
0031   type="int",
0032   default=0,
0033   dest="debug")
0034 
0035 options,args=parser.parse_args()
0036 
0037 if len(sys.argv) < 4:
0038     raise SystemError("Too few arguments.\n\n"+parser.format_help())
0039 
0040 NBLOCKS = int(sys.argv[1])
0041 INFILE = sys.argv[2]
0042 OUTFILE = sys.argv[3]
0043 
0044 
0045 
0046 def makeJobBlock(mylist, evtn):
0047     n = mylist[0][0]
0048     block = [mylist[0]]
0049     choosen = [0]
0050     while n<evtn:
0051     #print "n,evtn=",n,evtn
0052     # find the biggest unused #evt that would give n<evtn
0053         for i in range(len(mylist)):
0054             # get last not choosen i
0055             last_i=len(mylist)-1
0056             while last_i in choosen: last_i += -1
0057             if i==last_i:
0058         #print i,"last element reached"
0059                 n += mylist[i][0]
0060                 #print "   new last append: ",i, mylist[i][0], n
0061                 block.append(mylist[i])
0062                 choosen.append(i)
0063                 break
0064             if i in choosen:
0065                 #print i,"  in choosen, continue..."
0066                 continue
0067             if n+mylist[i][0]<evtn:
0068                 n += mylist[i][0]
0069                 #print "   new append: ",i, mylist[i][0], n
0070                 block.append(mylist[i])
0071                 choosen.append(i)
0072                 break
0073         if len(choosen)==len(mylist):
0074             #print " got everything"
0075             break
0076     # pick up unused elements
0077     newlist = []
0078     for i in range(len(mylist)):
0079         if not i in choosen:
0080             newlist.append(mylist[i])
0081     print("done makeJobBlock n =",n," len =",len(block))
0082     return block, newlist, n
0083 
0084 
0085 
0086 comment1RE = re.compile (r'^#.+$')
0087 fileLineRE = re.compile (r'^.*\'(.*)\'.+# (\d*).*$')
0088 #fileLineRE = re.compile (r'^.*\'(.*)\'.+# (\d*),(\d*).*$')
0089 
0090 if not os.access(INFILE, os.F_OK): 
0091     print("Cannot find input file ", INFILE)
0092     sys.exit()
0093 
0094 fin = open(INFILE, "r")
0095 lines = fin.readlines()
0096 fin.close()
0097 
0098 
0099 eventsFiles = []
0100 ntotal = 0
0101 commentLines=[]
0102 
0103 for line in lines:
0104     #line = comment1RE.sub ('', line)
0105     #line = line.strip()
0106     #if not line: continue
0107     match = comment1RE.match(line)
0108     if match:
0109         commentLines.append(line)
0110 
0111     match = fileLineRE.match(line)
0112     if match:
0113         #print int(match.group(3)), str(match.group(1))
0114         #eventsFiles.append((int(match.group(3)), str(match.group(1)), str(match.group(2))))
0115         eventsFiles.append((int(match.group(2)), str(match.group(1))))
0116         ntotal += int(match.group(2))
0117     #else: print line,
0118 
0119 if len(eventsFiles)==0:
0120     print("no file description strings found")
0121     sys.exit()
0122 
0123 #print "len=", len(eventsFiles), ntotal
0124 #tmp = set(eventsFiles)
0125 #eventsFiles = list(tmp)
0126 #ntotal = 0
0127 #for ff in eventsFiles:  ntotal += ff[0]
0128 #print "len=", len(eventsFiles), ntotal
0129 #sys.exit()
0130 
0131 eventsFiles.sort(reverse=True)
0132 #print eventsFiles
0133 
0134 evtPerJob = int(math.ceil(float(ntotal)/NBLOCKS))
0135 print("Total = ",ntotal, "  per block =", evtPerJob,"(would give total of ", evtPerJob*NBLOCKS, ")", "  list length =",len(eventsFiles))
0136 if eventsFiles[0][0] > evtPerJob:
0137     print("the biggest #evt is larger then #evt/block:",eventsFiles[0][0],">",evtPerJob)
0138     print("consider lowering NBLOCKS")
0139 
0140 
0141 jobsBlocks=[]
0142 temp = eventsFiles
0143 
0144 tt = 0
0145 for j in range(NBLOCKS):
0146     print(j)
0147     if len(temp)==0:
0148         print("done!")
0149         break
0150     block, temp, nn = makeJobBlock(temp,evtPerJob)
0151     tt+=nn
0152     if len(block)>0:
0153         jobsBlocks.append((block,nn))
0154         print(block)
0155     else:
0156         print("empty block!")
0157 
0158 print(tt)
0159 print(commandline)
0160 
0161 
0162 fout = open(OUTFILE, mode="w")
0163 
0164 fout.write("### job-split file list produced by:\n")
0165 fout.write("### "+commandline+"\n")
0166 fout.write("### Total #evt= "+str(ntotal)+"  #files ="+str(len(eventsFiles))+"  per job #evt="
0167            +str(evtPerJob)+" (would give total of"+str(evtPerJob*NBLOCKS)+")\n###\n")
0168 fout.write("### previously produced by:\n")
0169 fout.write("".join(commentLines))
0170 fout.write("\nfileNamesBlocks = [\n")
0171 
0172 commax = ","
0173 for b in range(len(jobsBlocks)):
0174     fout.write('  [ # job '+str(b)+' with nevt='+str(jobsBlocks[b][1])+'\n')
0175     comma = ","
0176     for i in range(len(jobsBlocks[b][0])):
0177         if i==len(jobsBlocks[b][0])-1:
0178             comma=""
0179         #fout.write("    '"+ jobsBlocks[b][0][i][1] +"'"+comma+" # "+ str(jobsBlocks[b][0][i][2]) +','+ str(jobsBlocks[b][0][i][0]) + "\n")
0180         fout.write("    '"+ jobsBlocks[b][0][i][1] +"'"+comma+" # "+ str(jobsBlocks[b][0][i][0]) + "\n")
0181     if b==len(jobsBlocks)-1:
0182         commax=""
0183     fout.write('  ]'+commax+'\n')
0184 fout.write(']\n')
0185 fout.close()