File indexing completed on 2024-04-06 12:31:48
0001
0002
0003 from __future__ import print_function
0004 from builtins import range
0005 import os, sys, stat
0006 from operator import itemgetter
0007
0008 class TreeAnalyzer(object):
0009
0010 def __init__(self, outFileName):
0011 self.dirSizes = {}
0012 self.fileSizes = {}
0013 self.outFileName = outFileName
0014 print("going to write to:",self.outFileName)
0015
0016 def analyzePath(self, dirIn) :
0017
0018 for (path, dirs, files) in os.walk(dirIn):
0019
0020 if 'CVS' in path: continue
0021 if '.glimpse_' in path: continue
0022 if 'Configuration/PyReleaseValidation/data/run/' in path: continue
0023
0024 for file in files:
0025 if '.glimpse_index' in file: continue
0026 fileName = os.path.join(path, file)
0027 fileSize = os.path.getsize(fileName)
0028 if path in self.dirSizes.keys() :
0029 self.dirSizes[path] += fileSize
0030 else:
0031 self.dirSizes[path] = fileSize
0032 if os.path.isfile(fileName):
0033 self.fileSizes[fileName] = fileSize
0034
0035 try:
0036 import json
0037 jsonFileName = self.outFileName
0038 jsonFile = open(jsonFileName, 'w')
0039 json.dump([os.path.abspath(dirIn), self.dirSizes, self.fileSizes], jsonFile)
0040 jsonFile.close()
0041 print('treeInfo info written to ', jsonFileName)
0042 except Exception as e:
0043 print("error writing json file:", str(e))
0044
0045 try:
0046 import pickle
0047 pklFileName = self.outFileName.replace('.json','.pkl')
0048 pickle.dump([os.path.abspath(dirIn), self.dirSizes, self.fileSizes], open(pklFileName, 'wb') )
0049 print('treeInfo info written to ', pklFileName)
0050 except Exception as e:
0051 print("error writing pkl file:", str(e))
0052
0053 def show(self):
0054
0055
0056
0057
0058 topDirs = sorted(self.dirSizes.items() , key=itemgetter(1), reverse=True)
0059 topFiles = sorted(self.fileSizes.items(), key=itemgetter(1), reverse=True)
0060
0061 emptyFiles = []
0062 for pair in topFiles:
0063 p, s = pair
0064 if s == 0:
0065 emptyFiles.append(p)
0066 print("found ",len(emptyFiles),"empty files. ")
0067
0068 print("found ", len(self.dirSizes), 'directories, top 10 are:')
0069 for i in range(10):
0070 print(topDirs[i])
0071
0072 print("found ", len(self.fileSizes), 'files, top 10 are:')
0073 for i in range(10):
0074 print(topFiles[i])
0075
0076
0077 def main():
0078
0079 import getopt
0080
0081 try:
0082 opts, args = getopt.getopt(sys.argv[1:], "c:o:", ['checkDir=', 'outFile='])
0083
0084 checkDir = '.'
0085 outFile = None
0086 for opt, arg in opts :
0087
0088 if opt in ('-c', "--checkDir", ):
0089 checkDir = arg
0090
0091 if opt in ('-o', "--outFile", ):
0092 outFile = arg
0093
0094 ta = TreeAnalyzer(outFile)
0095 ta.analyzePath(checkDir)
0096 ta.show()
0097
0098 except getopt.GetoptError as e:
0099 print("unknown option", str(e))
0100 sys.exit(2)
0101
0102 if __name__ == '__main__':
0103 main()
0104