Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-11-27 03:18:06

0001 #!/usr/bin/env python3
0002 import sys,os,subprocess
0003 from CommonMethods import *
0004 
0005 class FileObj:
0006     def __init__(self):
0007         self.run       = 0
0008         self.size      = 0
0009         self.fileNames = []                 
0010 
0011 def getRunNumberFromFileName(fileName):
0012     regExp = re.search('(\D+)_(\d+)_(\d+)_(\d+)',fileName)
0013     if not regExp:
0014         return -1
0015     return long(regExp.group(3))
0016                 
0017 
0018 
0019 def main():
0020     if len(sys.argv) < 2:
0021         error = "Usage: splitter fromDir"
0022         exit(error)
0023     sourceDir = sys.argv[1] + '/'
0024 
0025     fileList = ls(sourceDir,".txt")
0026 
0027     fileObjList = {}
0028 
0029     totalSize = 0
0030     for fileName in fileList:
0031         runNumber = getRunNumberFromFileName(fileName)
0032         if runNumber not in fileObjList:
0033             fileObjList[runNumber] = FileObj()
0034             fileObjList[runNumber].run = runNumber 
0035         fileObjList[runNumber].fileNames.append(fileName) 
0036         aCommand  = 'ls -l '+ sourceDir + fileName 
0037         output = subprocess.getstatusoutput( aCommand )
0038         fileObjList[runNumber].size += int(output[1].split(' ')[4])
0039         totalSize += int(output[1].split(' ')[4]) 
0040 
0041     sortedKeys = sorted(fileObjList.keys())
0042 
0043     split=13
0044 
0045     dirSize = 0
0046     tmpList = []
0047     for run in sortedKeys:
0048         dirSize += fileObjList[run].size
0049         tmpList.append(fileObjList[run])
0050         if dirSize > totalSize/split or run == sortedKeys[len(sortedKeys)-1]:
0051             newDir = sourceDir + "Run" + str(tmpList[0].run) + "_" + str(tmpList[len(tmpList)-1].run) + "/"
0052             aCommand  = 'mkdir '+ newDir
0053             output = subprocess.getstatusoutput( aCommand )
0054             print(str(100.*dirSize/totalSize) + "% " + "Run" + str(tmpList[0].run) + "_" + str(tmpList[len(tmpList)-1].run)) 
0055             for runs in tmpList:
0056                 #print 'cp '+ sourceDir + runs.fileNames[0] + " " + newDir
0057                 cp(sourceDir,newDir,runs.fileNames) 
0058             tmpList = []
0059             dirSize = 0
0060         
0061 
0062 
0063     
0064     print(totalSize)
0065     print(sortedKeys) 
0066     exit("ok")    
0067 
0068 
0069 
0070 
0071 
0072 
0073     if not os.path.isdir(destDir):
0074         error = "WARNING: destination directory doesn't exist! Creating it..."
0075         print(error)
0076         os.mkdir(destDir)
0077     copiedFiles = cp(sourceDir,destDir,fileList)
0078 
0079     if len(copiedFiles) != len(fileList):
0080         error = "ERROR: I couldn't copy all files from castor"
0081         exit(error)
0082 
0083     for fileName in fileList:
0084         fullFileName = destDir + fileName
0085         runNumber = -1;
0086         with open(fullFileName,'r') as file:
0087             for line in file:
0088                 if line.find("Runnumber") != -1:
0089                     tmpRun = int(line.split(' ')[1])
0090                     if runNumber != -1 and tmpRun != runNumber:
0091                         error = "This file (" + fileName + ") contains more than 1 run number! I don't know how to deal with it!"
0092                         exit(error)
0093                     runNumber = int(line.split(' ')[1])
0094         file.close()
0095         newFileName = fileName.replace("None",str(runNumber))
0096         if fileName != newFileName:
0097             aCmd = "mv " + destDir + fileName + " " + destDir + newFileName
0098             print(aCmd)
0099             output =  subprocess.getstatusoutput(aCmd)
0100             if output[0] != 0:
0101                 print(output[1])
0102         else:
0103             print("WARNING couldn't find keyword None in file " + fileName)
0104 
0105 
0106 
0107 
0108         
0109 if __name__ == "__main__":
0110     main()