esoergel · March 4, 2019 05:22
diff --git a/line_counts.py b/line_counts.py
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 Usage:
    $ find . | grep py$ | python line_counts.py
 """
 from __future__ import unicode_literals
 import subprocess
 import numpy

 def parse(line):
    if not line:
        return
    count, filename = line.split()
    if filename == 'total':
        return
    return (int(count), filename)

 res = subprocess.check_output(['xargs', 'wc', '-l'])
 counts_by_file = filter(None, map(parse, res.split('\n')))

 print "Biggest 5 files:"
 biggest = sorted(counts_by_file)[-5:]
 for count, filename in biggest:
    print " ", count, filename

 line_counts = [count for count, _ in counts_by_file]
 median = int(numpy.median(line_counts))
 std = int(numpy.round(numpy.std(line_counts)))

 print "matching files:", len(line_counts)
 print "total lines:", numpy.sum(line_counts)

 print "min:", numpy.min(line_counts)
 print "median:", median
 print "+1σ:", median + std
 print "max:", numpy.max(line_counts)

 # I like weighted average best - you're more likely to be working in a file
 # with longer lines, so that gives you the best idea how long files will be in your day-to-day
 print "weighted avg:", int(numpy.round(numpy.average(line_counts, weights=line_counts)))
	#!/usr/bin/env python
	# -- coding: utf-8 --
	"""
	Usage:
	$ find . \| grep py$ \| python line_counts.py
	"""
	from __future__ import unicode_literals
	import subprocess
	import numpy

	def parse(line):
	if not line:
	return
	count, filename = line.split()
	if filename == 'total':
	return
	return (int(count), filename)

	res = subprocess.check_output(['xargs', 'wc', '-l'])
	counts_by_file = filter(None, map(parse, res.split('\n')))

	print "Biggest 5 files:"
	biggest = sorted(counts_by_file)[-5:]
	for count, filename in biggest:
	print " ", count, filename

	line_counts = [count for count, _ in counts_by_file]
	median = int(numpy.median(line_counts))
	std = int(numpy.round(numpy.std(line_counts)))

	print "matching files:", len(line_counts)
	print "total lines:", numpy.sum(line_counts)

	print "min:", numpy.min(line_counts)
	print "median:", median
	print "+1σ:", median + std
	print "max:", numpy.max(line_counts)

	# I like weighted average best - you're more likely to be working in a file
	# with longer lines, so that gives you the best idea how long files will be in your day-to-day
	print "weighted avg:", int(numpy.round(numpy.average(line_counts, weights=line_counts)))