Last active
August 16, 2020 16:59
-
-
Save Shihab-Shahriar/2ef4e95bab7faf4cc27aa4bf5503d09a to your computer and use it in GitHub Desktop.
Count total lines of Python code in a file or folder, excluding comments or empty lines.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from io import StringIO | |
import tokenize, sys, os | |
import argparse | |
pref_skips = ('__', '.') # Skip folders that starts with ... | |
# From https://stackoverflow.com/a/2962727/4553309 | |
def remove_comnts(source): | |
io_obj = StringIO(source) | |
out = "" | |
prev_toktype = tokenize.INDENT | |
last_lineno = -1 | |
last_col = 0 | |
for tok in tokenize.generate_tokens(io_obj.readline): | |
token_type = tok[0] | |
token_string = tok[1] | |
start_line, start_col = tok[2] | |
end_line, end_col = tok[3] | |
ltext = tok[4] | |
if start_line > last_lineno: | |
last_col = 0 | |
if start_col > last_col: | |
out += (" " * (start_col - last_col)) | |
# Remove comments: | |
if token_type == tokenize.COMMENT: | |
pass | |
# This series of conditionals removes docstrings: | |
elif token_type == tokenize.STRING: | |
if prev_toktype != tokenize.INDENT: | |
# This is likely a docstring; double-check we're not inside an operator: | |
if prev_toktype != tokenize.NEWLINE: | |
if start_col > 0: | |
# Unlabelled indentation means we're inside an operator | |
out += token_string | |
else: | |
out += token_string | |
prev_toktype = token_type | |
last_col = end_col | |
last_lineno = end_line | |
return out | |
def fileLOC(f): | |
source = open(f).read() | |
source = remove_comnts(source) | |
lines = source.split("\n") | |
code_lines = [l for l in lines if l] # remove empty ones | |
return len(code_lines) | |
def main(path, individual=True): | |
if os.path.isfile(path): | |
print("PATH") | |
print(fileLOC(path)) | |
return | |
# From https://stackoverflow.com/a/13454267/4553309 | |
all_fs = [] | |
for dp, ds, fs in os.walk(path, topdown=True): | |
ds[:] = [d for d in ds if not d.startswith(pref_skips)] | |
fs = [os.path.join(dp, f) for f in fs if f.endswith('.py')] | |
all_fs += fs | |
length = {f: fileLOC(f) for f in all_fs} | |
if individual: | |
for f in all_fs: | |
print(f, " ", length[f]) | |
print("Total LOC: ", sum(length.values())) | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser() | |
parser.add_argument("path", help="Path to either file or folder") | |
parser.add_argument("-v", "--verbose", help="Print LOC of each file", | |
action="store_true") | |
parser.add_argument("-s", action='append', nargs='+', type=str, | |
help="Skip folders that start with specified prefixes") | |
args = parser.parse_args() | |
if args.s: | |
pref_skips += tuple(d for d in args.s[0]) | |
main(args.path, individual=args.verbose) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Usage:
python pyLOC.py path_to_file_or_folder
python pyLOC.py path_to_folder -v
: print LOC of each file inside folder recursivelypython pyLOC.py path_to_folder -s build test
: Skip folders whose name start withbuild
ortest
. Folder names with__
and.
prefixes are skipped automatically.