Created
October 15, 2010 22:21
-
-
Save fish2000/629073 to your computer and use it in GitHub Desktop.
directory-recursive file suffix frequency histograms
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
First example: no args, lists all suffixes, use defaults for: directory (current working directory), sort order (alphabetical), graph character ('-'), graph width (50 chars). | |
Second example: -n (sort by file count), -t 20 (list only the top 20 suffixes), --width=35 (scale graph width to 35 chars), --imatch="^\w{3,4}$" (case-insensitive regexp -- only print suffixes that are between 3 and for chars in length), -g \* (specify a new graph character). unflagged arguments are assumed to be directories to be scanned. | |
See the source for all options. | |
OTUS-ASIO:face fish$ alias sufs='/wherever/you/put/getfilesuffixes.py' | |
OTUS-ASIO:face fish$ sufs | |
>>> /this/current/directory (total 47846) | |
=== 21 - | |
=== css 60 - | |
=== gif 728 - | |
=== gz 1 - | |
=== htm 1 - | |
=== html 56 - | |
=== icc 282 - | |
=== ico 1 - | |
=== jpeg 76 - | |
=== jpg 42181 --------------------------------------------------- | |
=== JPG 686 - | |
=== js 146 - | |
=== less 1 - | |
=== md 3 - | |
=== pdf 4 - | |
=== php 4 - | |
=== png 3331 ---- | |
=== psd 7 - | |
=== svg 27 - | |
=== svn-base 93 - | |
=== swf 2 - | |
=== tif 84 - | |
=== tmproj 1 - | |
=== txt 7 - | |
=== zip 20 - | |
OTUS-ASIO:face fish$ sufs -n -t 20 --width=35 --imatch="^\w{3,4}$" -g \* ~/Screenshots/ | |
>>> /Users/fish/Screenshots (total 28088) | |
=== jpg 23768 ************************************ | |
=== JPG 2005 *** | |
=== png 1462 *** | |
=== dwg 249 * | |
=== html 163 * | |
=== pdf 102 * | |
=== doc 44 * | |
=== mpg 30 * | |
=== DWG 24 * | |
=== xls 17 * | |
=== gif 13 * | |
=== mp3 10 * | |
=== wmv 8 * | |
=== psd 7 * | |
=== rtf 6 * | |
=== txt 2 * | |
=== PCT 2 * | |
=== PDF 2 * | |
=== aif 2 * | |
=== inv 1 * |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# encoding: utf-8 | |
""" | |
getfilesuffixes.py | |
Created by FI$H 2000 on 2010-10-15. | |
Copyright (c) 2010 OST, LLC. All rights reserved. | |
""" | |
import sys, os, getopt, re, string | |
help_message = ''' | |
Prints a list of all the file suffixes found in each DIR, with counts. | |
Defaults to the current directory wth no args. | |
$ %s DIR [DIR DIR etc ...] | |
''' % os.path.basename(__file__) | |
dirs = dict() | |
skips = ['DS_Store','hgignore'] | |
onlies = [] | |
class Usage(Exception): | |
def __init__(self, msg): | |
self.msg = msg | |
def getmesomesuffixes(herp): | |
for wdir, wsubdirs, wfiles in os.walk(herp): | |
for wfile in wfiles: | |
if wfile.rfind('.') > -1: | |
suf = wfile.rsplit('.').pop() | |
dirs[herp][suf] = dirs[herp].get(suf, 0) + 1 | |
#printsuf = lambda ssuf, ssufcount, smaxcount: print "===\t\t\t%20s\t %3s\t %s" % (ssuf, ssufcount, "-" * (int(float(float(ssufcount) / float(smaxcount)) * 50) + 1)) | |
def printsuf(ssuf, ssufcount, smaxcount, gchar="-", width=50): | |
print "===\t\t\t%20s\t %3s\t %s" % (ssuf, ssufcount, gchar * (int(float(float(ssufcount) / float(smaxcount)) * width) + 1)) | |
def main(argv=None): | |
numerical = False | |
top = 0 | |
match = None | |
exclude = None | |
graphchar = "-" | |
width = 50 | |
if argv is None: | |
argv = sys.argv | |
try: | |
try: | |
opts, args = getopt.getopt(argv[1:], "hnt:s:o:M:X:m:x:g:w:", [ | |
"help", | |
"numerical-sort", | |
"top=", | |
"skip=", | |
"only=", | |
"match=", | |
"exclude=", | |
"imatch=", | |
"iexclude=", | |
"graph-character=", | |
"width=", | |
]) | |
except getopt.error, msg: | |
raise Usage(msg) | |
for option, value in opts: | |
if option in ("-h", "--help"): | |
raise Usage(help_message) | |
if option in ("-n", "--numerical"): | |
numerical = True | |
if option in ("-t", "--top"): | |
top = int(value) | |
if option in ("-s", "--skip"): | |
skips.extend(str(value).split(',')) | |
if option in ("-o", "--only"): | |
onlies.extend(str(value).split(',')) | |
if option in ("-M", "--match"): | |
match = re.compile(str(value)) | |
if option in ("-X", "--exclude"): | |
exclude = re.compile(str(value)) | |
if option in ("-m", "--imatch"): | |
match = re.compile(str(value), re.IGNORECASE) | |
if option in ("-x", "--iexclude"): | |
exclude = re.compile(str(value), re.IGNORECASE) | |
if option in ("-g", "--graph-character"): | |
graphchar = str(value)[0] | |
if option in ("-w", "--width"): | |
ww = int(value) | |
if ww > 0: | |
width = ww | |
if len(args) == 0: | |
args.append(os.getcwd()) | |
for durr in [os.path.abspath(arg) for arg in args]: | |
if os.path.isdir(durr): | |
dirs[durr] = dict() | |
for k, v in dirs.items(): | |
getmesomesuffixes(k) | |
print "" | |
for k, v in dirs.items(): | |
sufs = v.items() | |
if not numerical: | |
sufs.sort(lambda s,t: cmp(s[0].lower(), t[0])) | |
else: | |
sufs.sort(lambda a,b: a[1] > b[1] and -1 or 1) | |
maxcount = reduce(lambda fs, ns: fs > ns and fs or ns, map(lambda t: t[1], sufs), 1) | |
#mincount = reduce(lambda fs, ns: fs < ns and fs or ns, map(lambda t: t[1], sufs), 1) | |
total = reduce(lambda fs, ns: fs + ns, map(lambda t: t[1], sufs), 0) | |
print ">>>\t\t\t%s (total %s)" % (k, total) | |
i = 0 | |
for suf, sufcount in sufs: | |
if len(onlies) > 0: | |
try: | |
onlies.index(suf) | |
except ValueError: | |
pass | |
else: | |
if not match and not exclude: | |
printsuf(suf, sufcount, maxcount, graphchar, width) | |
i = i + 1 | |
elif match and exclude: | |
if not exclude.search(suf): | |
if match.search(suf): | |
printsuf(suf, sufcount, maxcount, graphchar, width) | |
i = i + 1 | |
elif match and not exclude: | |
if match.search(suf): | |
printsuf(suf, sufcount, maxcount, graphchar, width) | |
i = i + 1 | |
elif not match and exclude: | |
if not exclude.search(suf): | |
printsuf(suf, sufcount, maxcount, graphchar, width) | |
i = i + 1 | |
if i == top: | |
break | |
else: | |
try: | |
skips.index(suf) | |
except ValueError: | |
if not match and not exclude: | |
printsuf(suf, sufcount, maxcount, graphchar, width) | |
i = i + 1 | |
elif match and exclude: | |
if not exclude.search(suf): | |
if match.search(suf): | |
printsuf(suf, sufcount, maxcount, graphchar, width) | |
i = i + 1 | |
elif match and not exclude: | |
if match.search(suf): | |
printsuf(suf, sufcount, maxcount, graphchar, width) | |
i = i + 1 | |
elif not match and exclude: | |
if not exclude.search(suf): | |
printsuf(suf, sufcount, maxcount, graphchar, width) | |
i = i + 1 | |
if i == top: | |
break | |
print "" | |
except Usage, err: | |
print >> sys.stderr, sys.argv[0].split("/")[-1] + ": " + str(err.msg) | |
print >> sys.stderr, "\t for help use --help" | |
return 2 | |
if __name__ == "__main__": | |
sys.exit(main()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment