File indexing completed on 2024-11-27 03:18:06
0001
0002 import sys,os,subprocess
0003 from CommonMethods import *
0004
0005 class FileObj:
0006 def __init__(self):
0007 self.run = 0
0008 self.size = 0
0009 self.fileNames = []
0010
0011 def getRunNumberFromFileName(fileName):
0012 regExp = re.search('(\D+)_(\d+)_(\d+)_(\d+)',fileName)
0013 if not regExp:
0014 return -1
0015 return long(regExp.group(3))
0016
0017
0018
0019 def main():
0020 if len(sys.argv) < 2:
0021 error = "Usage: splitter fromDir"
0022 exit(error)
0023 sourceDir = sys.argv[1] + '/'
0024
0025 fileList = ls(sourceDir,".txt")
0026
0027 fileObjList = {}
0028
0029 totalSize = 0
0030 for fileName in fileList:
0031 runNumber = getRunNumberFromFileName(fileName)
0032 if runNumber not in fileObjList:
0033 fileObjList[runNumber] = FileObj()
0034 fileObjList[runNumber].run = runNumber
0035 fileObjList[runNumber].fileNames.append(fileName)
0036 aCommand = 'ls -l '+ sourceDir + fileName
0037 output = subprocess.getstatusoutput( aCommand )
0038 fileObjList[runNumber].size += int(output[1].split(' ')[4])
0039 totalSize += int(output[1].split(' ')[4])
0040
0041 sortedKeys = sorted(fileObjList.keys())
0042
0043 split=13
0044
0045 dirSize = 0
0046 tmpList = []
0047 for run in sortedKeys:
0048 dirSize += fileObjList[run].size
0049 tmpList.append(fileObjList[run])
0050 if dirSize > totalSize/split or run == sortedKeys[len(sortedKeys)-1]:
0051 newDir = sourceDir + "Run" + str(tmpList[0].run) + "_" + str(tmpList[len(tmpList)-1].run) + "/"
0052 aCommand = 'mkdir '+ newDir
0053 output = subprocess.getstatusoutput( aCommand )
0054 print(str(100.*dirSize/totalSize) + "% " + "Run" + str(tmpList[0].run) + "_" + str(tmpList[len(tmpList)-1].run))
0055 for runs in tmpList:
0056
0057 cp(sourceDir,newDir,runs.fileNames)
0058 tmpList = []
0059 dirSize = 0
0060
0061
0062
0063
0064 print(totalSize)
0065 print(sortedKeys)
0066 exit("ok")
0067
0068
0069
0070
0071
0072
0073 if not os.path.isdir(destDir):
0074 error = "WARNING: destination directory doesn't exist! Creating it..."
0075 print(error)
0076 os.mkdir(destDir)
0077 copiedFiles = cp(sourceDir,destDir,fileList)
0078
0079 if len(copiedFiles) != len(fileList):
0080 error = "ERROR: I couldn't copy all files from castor"
0081 exit(error)
0082
0083 for fileName in fileList:
0084 fullFileName = destDir + fileName
0085 runNumber = -1;
0086 with open(fullFileName,'r') as file:
0087 for line in file:
0088 if line.find("Runnumber") != -1:
0089 tmpRun = int(line.split(' ')[1])
0090 if runNumber != -1 and tmpRun != runNumber:
0091 error = "This file (" + fileName + ") contains more than 1 run number! I don't know how to deal with it!"
0092 exit(error)
0093 runNumber = int(line.split(' ')[1])
0094 file.close()
0095 newFileName = fileName.replace("None",str(runNumber))
0096 if fileName != newFileName:
0097 aCmd = "mv " + destDir + fileName + " " + destDir + newFileName
0098 print(aCmd)
0099 output = subprocess.getstatusoutput(aCmd)
0100 if output[0] != 0:
0101 print(output[1])
0102 else:
0103 print("WARNING couldn't find keyword None in file " + fileName)
0104
0105
0106
0107
0108
0109 if __name__ == "__main__":
0110 main()