Skip to content

Instantly share code, notes, and snippets.

@japhib
Created April 8, 2022 19:41
Show Gist options
  • Select an option

  • Save japhib/d3371a129d3f3fbf4954850d4fe860e6 to your computer and use it in GitHub Desktop.

Select an option

Save japhib/d3371a129d3f3fbf4954850d4fe860e6 to your computer and use it in GitHub Desktop.
Script (in Python) for checking to make sure all public functions in Elixir files in a project are alphabetized! Use it in CI to enforce that certain files have all their functions alphabetized: `python3 check-alphabetized.py --checkall lib`
#!/usr/bin/env python3
import os
import re
import sys
from functools import total_ordering
from pprint import pprint
def printUsage():
print("""
A tool for checking to make sure that public functions in an Elixir file or project are
in alphabetical order.
Usage:
--scan <dir> Does a full scan of all files in the provided directory
and reports on which ones are mostly alphabetized.
--check <files> Checks the specified file(s) to make sure all public functions
are in alphabetical order.
--checkall <dir> Checks all files in the provided directory, and the ones
that contain the "# check_alphabetized" magic comment, it will
make sure they are alphabetized, and return an error if not.
""")
def main():
args = sys.argv
if len(args) < 3:
printUsage()
sys.exit(1)
command = args[1]
if command == "--scan":
scan_dir(args[2])
elif command == "--check":
check_files(args[2:])
elif command == "--checkall":
check_dir(args[2])
else:
print("Unknown command: {}".format(command))
sys.exit(1)
def scan_dir(dir):
for filename in all_ex_files_in_dir(dir):
modules = get_modules(filename)
for module_name, module in modules.items():
# if there are no or few functions, skip it
if len(module["funs"]) <= 5:
continue
out_of_order = distance_from_sorted(module["funs"])
# If it should be checked but they're not in order, report it
if module["check"] and out_of_order:
print("- {} in {} is NOT alphabetized! The following functions are out of order:\n\t{}"
.format(module_name, filename, '\n\t'.join([str(f) for f in out_of_order])))
# If it isn't currently being checked but they're in order, report it
if not module["check"] and not out_of_order:
print("++ {} in {} is alphabetized!".format(module_name, filename))
# If it isn't currently being checked but they're ALMOST in order, report it
elif not module["check"] and len(out_of_order) <= 5 and len(out_of_order) / len(module["funs"]) < .4:
print("+ {} in {} is ALMOST alphabetized! The following functions are out of order:\n\t{}"
.format(module_name, filename, '\n\t'.join([str(f) for f in out_of_order])))
def check_files(files):
print("Checking files: \n\t{}".format('\n\t'.join(files)))
for filename in files:
modules = get_modules(filename)
for module_name, module in modules.items():
if len(module["funs"]) == 0:
print("{} in {} has no functions".format(module_name, filename))
continue
out_of_order = distance_from_sorted(module["funs"])
if out_of_order:
print("{} in {} is NOT alphabetized! The following functions are out of order:\n\t{}"
.format(module_name, filename, '\n\t'.join([str(f) for f in out_of_order])))
else:
print("{} in {} is alphabetized!".format(module_name, filename))
def check_dir(dir):
any_failures = False
modules_checked = 0
for filename in all_ex_files_in_dir(dir):
modules = get_modules(filename)
for module_name, module in modules.items():
# if there are no functions, skip it
if len(module["funs"]) == 0 or not module["check"]:
continue
modules_checked += 1
out_of_order = distance_from_sorted(module["funs"])
if out_of_order:
any_failures = True
print("{}: module {} is not alphabetized! The following functions are out of order:\n\t{}"
.format(filename, module_name, '\n\t'.join([str(f) for f in out_of_order])))
if any_failures:
print("""
!!! Found non-alphabetized files, failing!
Note: this script checks all files containing the magic comment:
# check_alphabetized
to make sure that all the public functions are in alphabetical order.
""")
sys.exit(1)
else:
print("Checked {} modules".format(modules_checked))
def all_ex_files_in_dir(dir):
filenames = []
for subdir, dirs, files in os.walk(dir):
for file in files:
filename = os.path.join(subdir, file)
if filename.endswith('.ex') and 'test' not in filename:
filenames.append(filename)
return filenames
@total_ordering
class ModuleFun:
"""
Wrapper class for module functions, that provides sorting & pretty-printing facilities.
"""
def __init__(self, name, line_number):
self.name = name
self.line_number = line_number
def _is_valid_operand(self, other):
return hasattr(other, "name") and hasattr(other, "line_number")
def __repr__(self) -> str:
return "{}:{}".format(self.name, self.line_number)
def __eq__(self, other) -> bool:
if not self._is_valid_operand(other):
return NotImplemented
return self.name == other.name and self.line_number == other.line_number
def __lt__(self, other) -> bool:
if not self._is_valid_operand(other):
return NotImplemented
# compare as tuple
return (self.name, self.line_number) < (other.name, other.line_number)
def __hash__(self) -> int:
return hash((self.name, self.line_number))
def get_modules(filename):
with open(filename, 'r') as file:
lines = file.readlines()
# Get list of modules/functions in file
curr_module = None
modules = {}
last_function_name = None
for line_number, line in enumerate(lines):
line = line.strip()
# Check for module declarations
if line.startswith("defmodule "):
split = line.split()
curr_module = split[1]
modules[curr_module] = {"check": False, "funs": []}
continue
# Check for "# check_alphabetized" magic comment
if curr_module and line == "# check_alphabetized":
modules[curr_module]["check"] = True
# Check for (public) function declarations
if curr_module and line.startswith("def "):
regex_match = re.search(r"^def (\w+)", line)
function_name = regex_match.group(1)
if function_name != last_function_name:
modules[curr_module]["funs"].append(ModuleFun(function_name, line_number))
last_function_name = function_name
# Check for callbacks
if curr_module and line.startswith("@callback "):
regex_match = re.search(r"^@callback (\w+)", line)
function_name = regex_match.group(1)
if function_name != last_function_name:
modules[curr_module]["funs"].append(ModuleFun(function_name, line_number))
last_function_name = function_name
return modules
def distance_from_sorted(orig_list):
"""
Examines a lists to see if it's close to being sorted, and if not, how many
items are out of order. Fairly smart/complex algorithm.
"""
sorted_list = sorted(orig_list)
# A is for the original list, B is the sorted one
idxA = 0
idxB = 0
out_of_order = set()
while idxA < len(orig_list) and idxB < len(sorted_list):
itemA = orig_list[idxA]
itemB = sorted_list[idxB]
if itemA == itemB:
# Correct order, continue
idxA += 1
idxB += 1
else:
# Out of order, gotta re-synchronize iteration, swallowing items in
# either orig_list or sorted_list until we come across matching
# items again.
sync = synchronize_list(orig_list[idxA:], sorted_list[idxB:])
if sync is None:
# No matches -- all remaining functions are out of order
out_of_order.update(orig_list[idxA:])
break
skip_orig, skip_sorted = sync
out_of_order.update(orig_list[idxA:idxA+skip_orig])
idxA += skip_orig
out_of_order.update(sorted_list[idxB:idxB+skip_sorted])
idxB += skip_sorted
return sorted(list(out_of_order), key=lambda f: f.line_number)
def synchronize_list(listA, listB):
"""
Figures out how many elements you need to delete from the beginning of listA
and listB so that their first elements match again.
Returns a tuple of ints: (delete_from_A, delete_from_B)
"""
for i in range(len(listA)):
for j in range(i + 1):
itemA = listA[i] if i < len(listA) else listA[-1]
itemB = listB[j] if j < len(listB) else listB[-1]
if itemA == itemB:
return (i, j)
if i != j:
# Try again, swapping listA and listB
itemA = listA[j] if j < len(listA) else listA[-1]
itemB = listB[i] if i < len(listB) else listB[-1]
if itemA == itemB:
return (j, i)
return None
if __name__ == "__main__":
main()
@japhib
Copy link
Author

japhib commented Apr 8, 2022

I'm mainly posting this because I'm proud of the algorithm I wrote here to identify specific items in a list that are out of order. See distance_from_sorted and synchronize_list

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment