Created
February 2, 2018 20:47
-
-
Save lukecampbell/7bd6955996bd704d0aa1899e088b60cd to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
from __future__ import print_function | |
from argparse import ArgumentParser | |
import sys | |
import os | |
import csv | |
source_file_suffixes = [ | |
'.java', | |
'.js', | |
'.c', | |
'.cpp', | |
'.cc', | |
'.sql', | |
'.groovy', | |
'.py', | |
'.sh', | |
'.rb', | |
'.ksh', | |
'.zsh', | |
] | |
def find_source_files(path): | |
''' | |
Returns files in the directory tree that are source code files | |
:param str path: Root Directory | |
''' | |
source_files = [] | |
for root, dirs, files in os.walk(path): | |
for filename in files: | |
if is_source_file(filename): | |
source_files.append(os.path.join(root, filename)) | |
return source_files | |
def is_source_file(filename): | |
''' | |
Returns true if the file is a source code file | |
:param str filename: Name of the file | |
''' | |
for suffix in source_file_suffixes: | |
if filename.endswith(suffix): | |
return True | |
return False | |
def get_sloc(filepath): | |
''' | |
Returns the number of newline characters in a file | |
:param str filepath: Path to the file | |
''' | |
newline_count = 0 | |
fd = os.open(filepath, os.O_RDONLY) | |
buf = os.read(fd, 1 << 20) | |
while len(buf) > 0: | |
for c in buf: | |
if ord(c) == 0x0A: | |
newline_count += 1 | |
buf = os.read(fd, 1 << 20) | |
os.close(fd) | |
return newline_count | |
def get_summary(paths, ignore_rules): | |
''' | |
Returns an array of tuples summarizing every file and the SLOC for each file | |
:param list paths: A list of paths | |
:param list ignore_rules: A list of patterns to ignore | |
''' | |
files = [] | |
for path in paths: | |
files.extend(find_source_files(path)) | |
summary = [] | |
total_sloc = 0 | |
for filename in files: | |
if should_ignore(filename, ignore_rules): | |
continue | |
sloc = get_sloc(filename) | |
summary.append((filename, sloc)) | |
total_sloc += sloc | |
summary.append(('total', total_sloc)) | |
return summary | |
def should_ignore(filename, ignore_rules): | |
''' | |
Returns true if the file should be ignored | |
:param str filename: The file | |
:param list ignore_rules: List of ignore patterns | |
''' | |
for rule in ignore_rules: | |
if rule in filename: | |
return True | |
return False | |
def get_longest_line(lines): | |
''' | |
Returns the length of the longest line in a list of lines | |
:param list lines: A list of lines | |
''' | |
longest_line = 0 | |
for line in lines: | |
if len(line) > longest_line: | |
longest_line = len(line) | |
return longest_line | |
def main(): | |
''' | |
Produce summary statistics for source lines of code for a directory | |
''' | |
parser = ArgumentParser(main.__doc__) | |
parser.add_argument('-o', '--output', help='Output CSV file') | |
parser.add_argument('-i', '--ignore', action='append', help='Ignore matching') | |
parser.add_argument('-v', '--verbose', action='store_true', help='Turn on verbose output') | |
parser.add_argument('paths', nargs='+', default='.', help='Directory to scan') | |
args = parser.parse_args() | |
summary = get_summary(args.paths, args.ignore or []) | |
if args.output: | |
with open(args.output, 'w') as csvfile: | |
writer = csv.writer(csvfile) | |
for row in summary: | |
writer.writerow(row) | |
if args.verbose or not args.output: | |
linelen = get_longest_line((row[0] for row in summary)) | |
for row in summary: | |
print(' '.join([row[0].ljust(linelen), str(row[1])])) | |
return 0 | |
if __name__ == '__main__': | |
sys.exit(main()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment