Skip to content

Instantly share code, notes, and snippets.

@larryhou
Last active March 21, 2020 08:05
Show Gist options
  • Save larryhou/f37a676dc731f3086b1e170e1a807efd to your computer and use it in GitHub Desktop.
Save larryhou/f37a676dc731f3086b1e170e1a807efd to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
#encoding: utf-8
class Cluster(object):
def __init__(self, name):
self.children = []
self.parent = None # type: Cluster
self.size = 0
self.name = name # type: str
self.framework = False
self.leaf = False
self.__map = {} # type: dict[str, Cluster]
def get(self, name):
if name not in self.__map:
child = self.__map[name] = Cluster(name)
self.children.append(child)
child.parent = self
return self.__map.get(name)
def caculate(self):
if not self.leaf: self.size = 0
for child in self.children:
self.size += child.caculate()
return self.size
def summarize(self, threshold = 512<<20, indent = ''):
children = []
for child in self.children:
if child.size >= threshold: children.append(child)
for n in range(len(children)):
child = children[n]
closed = n + 1 == len(children)
print '{}{}─{}'.format(indent, '└' if closed else '├', '{} {:12,}'.format(child.name, child.size))
child.summarize(threshold, indent + (' ' if closed else '│') + ' ')
def analyize(options, path):
import os
realpath = os.path.abspath(path)
basename = os.path.basename(realpath) + '.log'
if not os.path.exists(basename):
assert os.system('find {} -type f -exec stat -f \'%10z %N\' {{}} + > {}'.format(path, basename)) == 0
root = Cluster('ROOT')
data = {} # type: dict[str, int]
total = 0
with open(basename, 'r') as fp:
for line in fp.readlines():
record = line.strip()
sep = record.find(' ')
size, path = int(record[:sep]), record[sep+1:]
node = root
components = path.split('/')
if 'Library' in components: continue
for c in components:
node = node.get(c)
node.size = size
node.leaf = True
extension = node.name.split('.')[-1]
if extension not in data:
data[extension] = [0, 0]
data[extension][0] += size
data[extension][1] += 1
total += size
extensions = list(data.keys())
extensions.sort(cmp=lambda a, b: -1 if data.get(a) > data.get(b) else 1)
accumulation = 0.0
for extension in extensions:
size, count = data.get(extension)
percent = 100.0 * size / total
accumulation += percent
print '{:14,} {:5.2f}% {:5.2f}% {} #{:,}'.format(size, accumulation, percent, extension, count)
if accumulation >= options.extension_threshold: break
root.caculate()
if root.size > 0:
target = root.children[0]
print target.name
target.summarize(threshold=options.size_threshold<<20)
def main():
import argparse, sys
arguments = argparse.ArgumentParser()
arguments.add_argument('--path', '-p')
arguments.add_argument('--extension-threshold', '-x', type=float, default=99, help='单位:百分比')
arguments.add_argument('--size-threshold', '-s', type=int, default=1024, help='单位:MB')
options = arguments.parse_args(sys.argv[1:])
analyize(options, path=options.path)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment