File indexing completed on 2024-11-25 02:29:04
0001
0002
0003 from builtins import range
0004 import re,os,sys,shutil,math
0005 import optparse
0006
0007 copyargs = sys.argv[:]
0008 for i in range(len(copyargs)):
0009 if copyargs[i] == "":
0010 copyargs[i] = "\"\""
0011 if copyargs[i].find(" ") != -1:
0012 copyargs[i] = "\"%s\"" % copyargs[i]
0013 commandline = " ".join(copyargs)
0014
0015 prog = sys.argv[0]
0016
0017 usage='./%(prog)s NBLOCKS INFILE OUTFILE [options]\n'+\
0018 'takes list of files produced by findQualityFiles.py as INFILE,\n'+\
0019 'groups them into maximum NBLOCKS blocks with approximately similar #events.'
0020
0021
0022
0023
0024
0025
0026 parser=optparse.OptionParser(usage)
0027
0028 parser.add_option("-v", "--verbose",
0029 help="debug verbosity level",
0030 type="int",
0031 default=0,
0032 dest="debug")
0033
0034 options,args=parser.parse_args()
0035
0036 if len(sys.argv) < 4:
0037 raise SystemError("Too few arguments.\n\n"+parser.format_help())
0038
0039 NBLOCKS = int(sys.argv[1])
0040 INFILE = sys.argv[2]
0041 OUTFILE = sys.argv[3]
0042
0043
0044
0045 def makeJobBlock(mylist, evtn):
0046 n = mylist[0][0]
0047 block = [mylist[0]]
0048 choosen = [0]
0049 while n<evtn:
0050
0051
0052 for i in range(len(mylist)):
0053
0054 last_i=len(mylist)-1
0055 while last_i in choosen: last_i += -1
0056 if i==last_i:
0057
0058 n += mylist[i][0]
0059
0060 block.append(mylist[i])
0061 choosen.append(i)
0062 break
0063 if i in choosen:
0064
0065 continue
0066 if n+mylist[i][0]<evtn:
0067 n += mylist[i][0]
0068
0069 block.append(mylist[i])
0070 choosen.append(i)
0071 break
0072 if len(choosen)==len(mylist):
0073
0074 break
0075
0076 newlist = []
0077 for i in range(len(mylist)):
0078 if not i in choosen:
0079 newlist.append(mylist[i])
0080 print("done makeJobBlock n =",n," len =",len(block))
0081 return block, newlist, n
0082
0083
0084
0085 comment1RE = re.compile (r'^#.+$')
0086 fileLineRE = re.compile (r'^.*\'(.*)\'.+# (\d*).*$')
0087
0088
0089 if not os.access(INFILE, os.F_OK):
0090 print("Cannot find input file ", INFILE)
0091 sys.exit()
0092
0093 fin = open(INFILE, "r")
0094 lines = fin.readlines()
0095 fin.close()
0096
0097
0098 eventsFiles = []
0099 ntotal = 0
0100 commentLines=[]
0101
0102 for line in lines:
0103
0104
0105
0106 match = comment1RE.match(line)
0107 if match:
0108 commentLines.append(line)
0109
0110 match = fileLineRE.match(line)
0111 if match:
0112
0113
0114 eventsFiles.append((int(match.group(2)), str(match.group(1))))
0115 ntotal += int(match.group(2))
0116
0117
0118 if len(eventsFiles)==0:
0119 print("no file description strings found")
0120 sys.exit()
0121
0122
0123
0124
0125
0126
0127
0128
0129
0130 eventsFiles.sort(reverse=True)
0131
0132
0133 evtPerJob = int(math.ceil(float(ntotal)/NBLOCKS))
0134 print("Total = ",ntotal, " per block =", evtPerJob,"(would give total of ", evtPerJob*NBLOCKS, ")", " list length =",len(eventsFiles))
0135 if eventsFiles[0][0] > evtPerJob:
0136 print("the biggest #evt is larger then #evt/block:",eventsFiles[0][0],">",evtPerJob)
0137 print("consider lowering NBLOCKS")
0138
0139
0140 jobsBlocks=[]
0141 temp = eventsFiles
0142
0143 tt = 0
0144 for j in range(NBLOCKS):
0145 print(j)
0146 if len(temp)==0:
0147 print("done!")
0148 break
0149 block, temp, nn = makeJobBlock(temp,evtPerJob)
0150 tt+=nn
0151 if len(block)>0:
0152 jobsBlocks.append((block,nn))
0153 print(block)
0154 else:
0155 print("empty block!")
0156
0157 print(tt)
0158 print(commandline)
0159
0160
0161 fout = open(OUTFILE, mode="w")
0162
0163 fout.write("### job-split file list produced by:\n")
0164 fout.write("### "+commandline+"\n")
0165 fout.write("### Total #evt= "+str(ntotal)+" #files ="+str(len(eventsFiles))+" per job #evt="
0166 +str(evtPerJob)+" (would give total of"+str(evtPerJob*NBLOCKS)+")\n###\n")
0167 fout.write("### previously produced by:\n")
0168 fout.write("".join(commentLines))
0169 fout.write("\nfileNamesBlocks = [\n")
0170
0171 commax = ","
0172 for b in range(len(jobsBlocks)):
0173 fout.write(' [ # job '+str(b)+' with nevt='+str(jobsBlocks[b][1])+'\n')
0174 comma = ","
0175 for i in range(len(jobsBlocks[b][0])):
0176 if i==len(jobsBlocks[b][0])-1:
0177 comma=""
0178
0179 fout.write(" '"+ jobsBlocks[b][0][i][1] +"'"+comma+" # "+ str(jobsBlocks[b][0][i][0]) + "\n")
0180 if b==len(jobsBlocks)-1:
0181 commax=""
0182 fout.write(' ]'+commax+'\n')
0183 fout.write(']\n')
0184 fout.close()