File indexing completed on 2024-12-01 23:40:44
0001
0002
0003 from builtins import range
0004 import os, sys, stat
0005 from operator import itemgetter
0006
0007 class TreeAnalyzer(object):
0008
0009 def __init__(self, outFileName):
0010 self.dirSizes = {}
0011 self.fileSizes = {}
0012 self.outFileName = outFileName
0013 print("going to write to:",self.outFileName)
0014
0015 def analyzePath(self, dirIn) :
0016
0017 for (path, dirs, files) in os.walk(dirIn):
0018
0019 if 'CVS' in path: continue
0020 if '.glimpse_' in path: continue
0021 if 'Configuration/PyReleaseValidation/data/run/' in path: continue
0022
0023 for file in files:
0024 if '.glimpse_index' in file: continue
0025 fileName = os.path.join(path, file)
0026 fileSize = os.path.getsize(fileName)
0027 if path in self.dirSizes.keys() :
0028 self.dirSizes[path] += fileSize
0029 else:
0030 self.dirSizes[path] = fileSize
0031 if os.path.isfile(fileName):
0032 self.fileSizes[fileName] = fileSize
0033
0034 try:
0035 import json
0036 jsonFileName = self.outFileName
0037 jsonFile = open(jsonFileName, 'w')
0038 json.dump([os.path.abspath(dirIn), self.dirSizes, self.fileSizes], jsonFile)
0039 jsonFile.close()
0040 print('treeInfo info written to ', jsonFileName)
0041 except Exception as e:
0042 print("error writing json file:", str(e))
0043
0044 try:
0045 import pickle
0046 pklFileName = self.outFileName.replace('.json','.pkl')
0047 pickle.dump([os.path.abspath(dirIn), self.dirSizes, self.fileSizes], open(pklFileName, 'wb') )
0048 print('treeInfo info written to ', pklFileName)
0049 except Exception as e:
0050 print("error writing pkl file:", str(e))
0051
0052 def show(self):
0053
0054
0055
0056
0057 topDirs = sorted(self.dirSizes.items() , key=itemgetter(1), reverse=True)
0058 topFiles = sorted(self.fileSizes.items(), key=itemgetter(1), reverse=True)
0059
0060 emptyFiles = []
0061 for pair in topFiles:
0062 p, s = pair
0063 if s == 0:
0064 emptyFiles.append(p)
0065 print("found ",len(emptyFiles),"empty files. ")
0066
0067 print("found ", len(self.dirSizes), 'directories, top 10 are:')
0068 for i in range(10):
0069 print(topDirs[i])
0070
0071 print("found ", len(self.fileSizes), 'files, top 10 are:')
0072 for i in range(10):
0073 print(topFiles[i])
0074
0075
0076 def main():
0077
0078 import getopt
0079
0080 try:
0081 opts, args = getopt.getopt(sys.argv[1:], "c:o:", ['checkDir=', 'outFile='])
0082
0083 checkDir = '.'
0084 outFile = None
0085 for opt, arg in opts :
0086
0087 if opt in ('-c', "--checkDir", ):
0088 checkDir = arg
0089
0090 if opt in ('-o', "--outFile", ):
0091 outFile = arg
0092
0093 ta = TreeAnalyzer(outFile)
0094 ta.analyzePath(checkDir)
0095 ta.show()
0096
0097 except getopt.GetoptError as e:
0098 print("unknown option", str(e))
0099 sys.exit(2)
0100
0101 if __name__ == '__main__':
0102 main()
0103