alesker · September 4, 2023 07:46
diff --git a/gitstats.py b/gitstats.py
 #!/usr/bin/env python

 import subprocess
 import re
 import operator
 import sys
 import os.path

 path = '.'
 if (len(sys.argv) > 1):
 	path = sys.argv[1]

 extensions_filter = ""
 if (len(sys.argv) > 2):
 	extensions = sys.argv[2].split(',')
 	extensions_filter = '|'.join(map(lambda ext: '\.' + ext + '$', extensions))

 excluded_paths = ""
 if (len(sys.argv) > 3):
 	excluded_paths = sys.argv[3]

 exclusions_filter = ""
 if (len(sys.argv) > 3):
 	excluded_paths = sys.argv[3].split(',')
 	exclusions_filter = "".join(map(lambda path: ' -e ' + path, excluded_paths))


 print("Path: " + path)

 git_ls_cmd = 'git ls-tree -r --name-only HEAD ' + path + ' | egrep "' + extensions_filter + '"'
 if (len(exclusions_filter) > 0):
    git_ls_cmd += ' | egrep -v ' + exclusions_filter + ''

 filelist = subprocess.getoutput(git_ls_cmd).split('\n');

 name_dict = { }

 for filename in filelist:
 	if filename == '' or not os.path.isfile(filename):
 		break
 	blamelist = subprocess.getoutput("git blame -w -C -C '" + filename + "'").split('\n') # add -C -C for copypaste detection
 	for line in blamelist:
 		components = re.findall("(\^?[0-9A-Fa-f]{7,8}).+\((.*) +(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} (\+|-)\d{4}) +[0-9]+\) (.*)", line)[0]
 		if not re.match("^[ \t]*$", components[3]):
 			#print components[1]
 			name = components[1].strip()
 			if name in name_dict:
 				name_dict[name] = name_dict[name] + 1
 			else:
 				name_dict[name] = 1


 sum_lines = 0
 for name, count in name_dict.items():
 	sum_lines += count

 sorted_name_tuple = sorted(name_dict.items(), key=operator.itemgetter(1), reverse=True)

 for name_tuple in sorted_name_tuple:
 	print('{0:>25}: {1:5} {2:6.2%}'.format(name_tuple[0], name_tuple[1], name_tuple[1] / float(sum_lines)))

 print('\n{0:>25}: {1:5}'.format("Total lines", sum_lines))
	#!/usr/bin/env python

	import subprocess
	import re
	import operator
	import sys
	import os.path

	path = '.'
	if (len(sys.argv) > 1):
	path = sys.argv[1]

	extensions_filter = ""
	if (len(sys.argv) > 2):
	extensions = sys.argv[2].split(',')
	extensions_filter = '\|'.join(map(lambda ext: '\.' + ext + '$', extensions))

	excluded_paths = ""
	if (len(sys.argv) > 3):
	excluded_paths = sys.argv[3]

	exclusions_filter = ""
	if (len(sys.argv) > 3):
	excluded_paths = sys.argv[3].split(',')
	exclusions_filter = "".join(map(lambda path: ' -e ' + path, excluded_paths))


	print("Path: " + path)

	git_ls_cmd = 'git ls-tree -r --name-only HEAD ' + path + ' \| egrep "' + extensions_filter + '"'
	if (len(exclusions_filter) > 0):
	git_ls_cmd += ' \| egrep -v ' + exclusions_filter + ''

	filelist = subprocess.getoutput(git_ls_cmd).split('\n');

	name_dict = { }

	for filename in filelist:
	if filename == '' or not os.path.isfile(filename):
	break
	blamelist = subprocess.getoutput("git blame -w -C -C '" + filename + "'").split('\n') # add -C -C for copypaste detection
	for line in blamelist:
	components = re.findall("(\^?[0-9A-Fa-f]{7,8}).+\((.) +(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} (\+\|-)\d{4}) +[0-9]+\) (.)", line)[0]
	if not re.match("^[ \t]*$", components[3]):
	#print components[1]
	name = components[1].strip()
	if name in name_dict:
	name_dict[name] = name_dict[name] + 1
	else:
	name_dict[name] = 1


	sum_lines = 0
	for name, count in name_dict.items():
	sum_lines += count

	sorted_name_tuple = sorted(name_dict.items(), key=operator.itemgetter(1), reverse=True)

	for name_tuple in sorted_name_tuple:
	print('{0:>25}: {1:5} {2:6.2%}'.format(name_tuple[0], name_tuple[1], name_tuple[1] / float(sum_lines)))

	print('\n{0:>25}: {1:5}'.format("Total lines", sum_lines))