File indexing completed on 2024-04-06 11:56:47
0001
0002
0003 from __future__ import print_function
0004 from builtins import range
0005 import re,os,sys,shutil,math
0006 import optparse
0007
0008 copyargs = sys.argv[:]
0009 for i in range(len(copyargs)):
0010 if copyargs[i] == "":
0011 copyargs[i] = "\"\""
0012 if copyargs[i].find(" ") != -1:
0013 copyargs[i] = "\"%s\"" % copyargs[i]
0014 commandline = " ".join(copyargs)
0015
0016 prog = sys.argv[0]
0017
0018 usage='./%(prog)s NBLOCKS INFILE OUTFILE [options]\n'+\
0019 'takes list of files produced by findQualityFiles.py as INFILE,\n'+\
0020 'groups them into maximum NBLOCKS blocks with approximately similar #events.'
0021
0022
0023
0024
0025
0026
0027 parser=optparse.OptionParser(usage)
0028
0029 parser.add_option("-v", "--verbose",
0030 help="debug verbosity level",
0031 type="int",
0032 default=0,
0033 dest="debug")
0034
0035 options,args=parser.parse_args()
0036
0037 if len(sys.argv) < 4:
0038 raise SystemError("Too few arguments.\n\n"+parser.format_help())
0039
0040 NBLOCKS = int(sys.argv[1])
0041 INFILE = sys.argv[2]
0042 OUTFILE = sys.argv[3]
0043
0044
0045
0046 def makeJobBlock(mylist, evtn):
0047 n = mylist[0][0]
0048 block = [mylist[0]]
0049 choosen = [0]
0050 while n<evtn:
0051
0052
0053 for i in range(len(mylist)):
0054
0055 last_i=len(mylist)-1
0056 while last_i in choosen: last_i += -1
0057 if i==last_i:
0058
0059 n += mylist[i][0]
0060
0061 block.append(mylist[i])
0062 choosen.append(i)
0063 break
0064 if i in choosen:
0065
0066 continue
0067 if n+mylist[i][0]<evtn:
0068 n += mylist[i][0]
0069
0070 block.append(mylist[i])
0071 choosen.append(i)
0072 break
0073 if len(choosen)==len(mylist):
0074
0075 break
0076
0077 newlist = []
0078 for i in range(len(mylist)):
0079 if not i in choosen:
0080 newlist.append(mylist[i])
0081 print("done makeJobBlock n =",n," len =",len(block))
0082 return block, newlist, n
0083
0084
0085
0086 comment1RE = re.compile (r'^#.+$')
0087 fileLineRE = re.compile (r'^.*\'(.*)\'.+# (\d*).*$')
0088
0089
0090 if not os.access(INFILE, os.F_OK):
0091 print("Cannot find input file ", INFILE)
0092 sys.exit()
0093
0094 fin = open(INFILE, "r")
0095 lines = fin.readlines()
0096 fin.close()
0097
0098
0099 eventsFiles = []
0100 ntotal = 0
0101 commentLines=[]
0102
0103 for line in lines:
0104
0105
0106
0107 match = comment1RE.match(line)
0108 if match:
0109 commentLines.append(line)
0110
0111 match = fileLineRE.match(line)
0112 if match:
0113
0114
0115 eventsFiles.append((int(match.group(2)), str(match.group(1))))
0116 ntotal += int(match.group(2))
0117
0118
0119 if len(eventsFiles)==0:
0120 print("no file description strings found")
0121 sys.exit()
0122
0123
0124
0125
0126
0127
0128
0129
0130
0131 eventsFiles.sort(reverse=True)
0132
0133
0134 evtPerJob = int(math.ceil(float(ntotal)/NBLOCKS))
0135 print("Total = ",ntotal, " per block =", evtPerJob,"(would give total of ", evtPerJob*NBLOCKS, ")", " list length =",len(eventsFiles))
0136 if eventsFiles[0][0] > evtPerJob:
0137 print("the biggest #evt is larger then #evt/block:",eventsFiles[0][0],">",evtPerJob)
0138 print("consider lowering NBLOCKS")
0139
0140
0141 jobsBlocks=[]
0142 temp = eventsFiles
0143
0144 tt = 0
0145 for j in range(NBLOCKS):
0146 print(j)
0147 if len(temp)==0:
0148 print("done!")
0149 break
0150 block, temp, nn = makeJobBlock(temp,evtPerJob)
0151 tt+=nn
0152 if len(block)>0:
0153 jobsBlocks.append((block,nn))
0154 print(block)
0155 else:
0156 print("empty block!")
0157
0158 print(tt)
0159 print(commandline)
0160
0161
0162 fout = open(OUTFILE, mode="w")
0163
0164 fout.write("### job-split file list produced by:\n")
0165 fout.write("### "+commandline+"\n")
0166 fout.write("### Total #evt= "+str(ntotal)+" #files ="+str(len(eventsFiles))+" per job #evt="
0167 +str(evtPerJob)+" (would give total of"+str(evtPerJob*NBLOCKS)+")\n###\n")
0168 fout.write("### previously produced by:\n")
0169 fout.write("".join(commentLines))
0170 fout.write("\nfileNamesBlocks = [\n")
0171
0172 commax = ","
0173 for b in range(len(jobsBlocks)):
0174 fout.write(' [ # job '+str(b)+' with nevt='+str(jobsBlocks[b][1])+'\n')
0175 comma = ","
0176 for i in range(len(jobsBlocks[b][0])):
0177 if i==len(jobsBlocks[b][0])-1:
0178 comma=""
0179
0180 fout.write(" '"+ jobsBlocks[b][0][i][1] +"'"+comma+" # "+ str(jobsBlocks[b][0][i][0]) + "\n")
0181 if b==len(jobsBlocks)-1:
0182 commax=""
0183 fout.write(' ]'+commax+'\n')
0184 fout.write(']\n')
0185 fout.close()