Skip to content

Instantly share code, notes, and snippets.

@fish2000
Created October 15, 2010 22:21
Show Gist options
  • Save fish2000/629073 to your computer and use it in GitHub Desktop.
Save fish2000/629073 to your computer and use it in GitHub Desktop.
directory-recursive file suffix frequency histograms
First example: no args, lists all suffixes, use defaults for: directory (current working directory), sort order (alphabetical), graph character ('-'), graph width (50 chars).
Second example: -n (sort by file count), -t 20 (list only the top 20 suffixes), --width=35 (scale graph width to 35 chars), --imatch="^\w{3,4}$" (case-insensitive regexp -- only print suffixes that are between 3 and for chars in length), -g \* (specify a new graph character). unflagged arguments are assumed to be directories to be scanned.
See the source for all options.
OTUS-ASIO:face fish$ alias sufs='/wherever/you/put/getfilesuffixes.py'
OTUS-ASIO:face fish$ sufs
>>> /this/current/directory (total 47846)
=== 21 -
=== css 60 -
=== gif 728 -
=== gz 1 -
=== htm 1 -
=== html 56 -
=== icc 282 -
=== ico 1 -
=== jpeg 76 -
=== jpg 42181 ---------------------------------------------------
=== JPG 686 -
=== js 146 -
=== less 1 -
=== md 3 -
=== pdf 4 -
=== php 4 -
=== png 3331 ----
=== psd 7 -
=== svg 27 -
=== svn-base 93 -
=== swf 2 -
=== tif 84 -
=== tmproj 1 -
=== txt 7 -
=== zip 20 -
OTUS-ASIO:face fish$ sufs -n -t 20 --width=35 --imatch="^\w{3,4}$" -g \* ~/Screenshots/
>>> /Users/fish/Screenshots (total 28088)
=== jpg 23768 ************************************
=== JPG 2005 ***
=== png 1462 ***
=== dwg 249 *
=== html 163 *
=== pdf 102 *
=== doc 44 *
=== mpg 30 *
=== DWG 24 *
=== xls 17 *
=== gif 13 *
=== mp3 10 *
=== wmv 8 *
=== psd 7 *
=== rtf 6 *
=== txt 2 *
=== PCT 2 *
=== PDF 2 *
=== aif 2 *
=== inv 1 *
#!/usr/bin/env python
# encoding: utf-8
"""
getfilesuffixes.py
Created by FI$H 2000 on 2010-10-15.
Copyright (c) 2010 OST, LLC. All rights reserved.
"""
import sys, os, getopt, re, string
help_message = '''
Prints a list of all the file suffixes found in each DIR, with counts.
Defaults to the current directory wth no args.
$ %s DIR [DIR DIR etc ...]
''' % os.path.basename(__file__)
dirs = dict()
skips = ['DS_Store','hgignore']
onlies = []
class Usage(Exception):
def __init__(self, msg):
self.msg = msg
def getmesomesuffixes(herp):
for wdir, wsubdirs, wfiles in os.walk(herp):
for wfile in wfiles:
if wfile.rfind('.') > -1:
suf = wfile.rsplit('.').pop()
dirs[herp][suf] = dirs[herp].get(suf, 0) + 1
#printsuf = lambda ssuf, ssufcount, smaxcount: print "===\t\t\t%20s\t %3s\t %s" % (ssuf, ssufcount, "-" * (int(float(float(ssufcount) / float(smaxcount)) * 50) + 1))
def printsuf(ssuf, ssufcount, smaxcount, gchar="-", width=50):
print "===\t\t\t%20s\t %3s\t %s" % (ssuf, ssufcount, gchar * (int(float(float(ssufcount) / float(smaxcount)) * width) + 1))
def main(argv=None):
numerical = False
top = 0
match = None
exclude = None
graphchar = "-"
width = 50
if argv is None:
argv = sys.argv
try:
try:
opts, args = getopt.getopt(argv[1:], "hnt:s:o:M:X:m:x:g:w:", [
"help",
"numerical-sort",
"top=",
"skip=",
"only=",
"match=",
"exclude=",
"imatch=",
"iexclude=",
"graph-character=",
"width=",
])
except getopt.error, msg:
raise Usage(msg)
for option, value in opts:
if option in ("-h", "--help"):
raise Usage(help_message)
if option in ("-n", "--numerical"):
numerical = True
if option in ("-t", "--top"):
top = int(value)
if option in ("-s", "--skip"):
skips.extend(str(value).split(','))
if option in ("-o", "--only"):
onlies.extend(str(value).split(','))
if option in ("-M", "--match"):
match = re.compile(str(value))
if option in ("-X", "--exclude"):
exclude = re.compile(str(value))
if option in ("-m", "--imatch"):
match = re.compile(str(value), re.IGNORECASE)
if option in ("-x", "--iexclude"):
exclude = re.compile(str(value), re.IGNORECASE)
if option in ("-g", "--graph-character"):
graphchar = str(value)[0]
if option in ("-w", "--width"):
ww = int(value)
if ww > 0:
width = ww
if len(args) == 0:
args.append(os.getcwd())
for durr in [os.path.abspath(arg) for arg in args]:
if os.path.isdir(durr):
dirs[durr] = dict()
for k, v in dirs.items():
getmesomesuffixes(k)
print ""
for k, v in dirs.items():
sufs = v.items()
if not numerical:
sufs.sort(lambda s,t: cmp(s[0].lower(), t[0]))
else:
sufs.sort(lambda a,b: a[1] > b[1] and -1 or 1)
maxcount = reduce(lambda fs, ns: fs > ns and fs or ns, map(lambda t: t[1], sufs), 1)
#mincount = reduce(lambda fs, ns: fs < ns and fs or ns, map(lambda t: t[1], sufs), 1)
total = reduce(lambda fs, ns: fs + ns, map(lambda t: t[1], sufs), 0)
print ">>>\t\t\t%s (total %s)" % (k, total)
i = 0
for suf, sufcount in sufs:
if len(onlies) > 0:
try:
onlies.index(suf)
except ValueError:
pass
else:
if not match and not exclude:
printsuf(suf, sufcount, maxcount, graphchar, width)
i = i + 1
elif match and exclude:
if not exclude.search(suf):
if match.search(suf):
printsuf(suf, sufcount, maxcount, graphchar, width)
i = i + 1
elif match and not exclude:
if match.search(suf):
printsuf(suf, sufcount, maxcount, graphchar, width)
i = i + 1
elif not match and exclude:
if not exclude.search(suf):
printsuf(suf, sufcount, maxcount, graphchar, width)
i = i + 1
if i == top:
break
else:
try:
skips.index(suf)
except ValueError:
if not match and not exclude:
printsuf(suf, sufcount, maxcount, graphchar, width)
i = i + 1
elif match and exclude:
if not exclude.search(suf):
if match.search(suf):
printsuf(suf, sufcount, maxcount, graphchar, width)
i = i + 1
elif match and not exclude:
if match.search(suf):
printsuf(suf, sufcount, maxcount, graphchar, width)
i = i + 1
elif not match and exclude:
if not exclude.search(suf):
printsuf(suf, sufcount, maxcount, graphchar, width)
i = i + 1
if i == top:
break
print ""
except Usage, err:
print >> sys.stderr, sys.argv[0].split("/")[-1] + ": " + str(err.msg)
print >> sys.stderr, "\t for help use --help"
return 2
if __name__ == "__main__":
sys.exit(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment