Created
April 8, 2022 19:41
-
-
Save japhib/d3371a129d3f3fbf4954850d4fe860e6 to your computer and use it in GitHub Desktop.
Script (in Python) for checking to make sure all public functions in Elixir files in a project are alphabetized! Use it in CI to enforce that certain files have all their functions alphabetized: `python3 check-alphabetized.py --checkall lib`
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| import os | |
| import re | |
| import sys | |
| from functools import total_ordering | |
| from pprint import pprint | |
| def printUsage(): | |
| print(""" | |
| A tool for checking to make sure that public functions in an Elixir file or project are | |
| in alphabetical order. | |
| Usage: | |
| --scan <dir> Does a full scan of all files in the provided directory | |
| and reports on which ones are mostly alphabetized. | |
| --check <files> Checks the specified file(s) to make sure all public functions | |
| are in alphabetical order. | |
| --checkall <dir> Checks all files in the provided directory, and the ones | |
| that contain the "# check_alphabetized" magic comment, it will | |
| make sure they are alphabetized, and return an error if not. | |
| """) | |
| def main(): | |
| args = sys.argv | |
| if len(args) < 3: | |
| printUsage() | |
| sys.exit(1) | |
| command = args[1] | |
| if command == "--scan": | |
| scan_dir(args[2]) | |
| elif command == "--check": | |
| check_files(args[2:]) | |
| elif command == "--checkall": | |
| check_dir(args[2]) | |
| else: | |
| print("Unknown command: {}".format(command)) | |
| sys.exit(1) | |
| def scan_dir(dir): | |
| for filename in all_ex_files_in_dir(dir): | |
| modules = get_modules(filename) | |
| for module_name, module in modules.items(): | |
| # if there are no or few functions, skip it | |
| if len(module["funs"]) <= 5: | |
| continue | |
| out_of_order = distance_from_sorted(module["funs"]) | |
| # If it should be checked but they're not in order, report it | |
| if module["check"] and out_of_order: | |
| print("- {} in {} is NOT alphabetized! The following functions are out of order:\n\t{}" | |
| .format(module_name, filename, '\n\t'.join([str(f) for f in out_of_order]))) | |
| # If it isn't currently being checked but they're in order, report it | |
| if not module["check"] and not out_of_order: | |
| print("++ {} in {} is alphabetized!".format(module_name, filename)) | |
| # If it isn't currently being checked but they're ALMOST in order, report it | |
| elif not module["check"] and len(out_of_order) <= 5 and len(out_of_order) / len(module["funs"]) < .4: | |
| print("+ {} in {} is ALMOST alphabetized! The following functions are out of order:\n\t{}" | |
| .format(module_name, filename, '\n\t'.join([str(f) for f in out_of_order]))) | |
| def check_files(files): | |
| print("Checking files: \n\t{}".format('\n\t'.join(files))) | |
| for filename in files: | |
| modules = get_modules(filename) | |
| for module_name, module in modules.items(): | |
| if len(module["funs"]) == 0: | |
| print("{} in {} has no functions".format(module_name, filename)) | |
| continue | |
| out_of_order = distance_from_sorted(module["funs"]) | |
| if out_of_order: | |
| print("{} in {} is NOT alphabetized! The following functions are out of order:\n\t{}" | |
| .format(module_name, filename, '\n\t'.join([str(f) for f in out_of_order]))) | |
| else: | |
| print("{} in {} is alphabetized!".format(module_name, filename)) | |
| def check_dir(dir): | |
| any_failures = False | |
| modules_checked = 0 | |
| for filename in all_ex_files_in_dir(dir): | |
| modules = get_modules(filename) | |
| for module_name, module in modules.items(): | |
| # if there are no functions, skip it | |
| if len(module["funs"]) == 0 or not module["check"]: | |
| continue | |
| modules_checked += 1 | |
| out_of_order = distance_from_sorted(module["funs"]) | |
| if out_of_order: | |
| any_failures = True | |
| print("{}: module {} is not alphabetized! The following functions are out of order:\n\t{}" | |
| .format(filename, module_name, '\n\t'.join([str(f) for f in out_of_order]))) | |
| if any_failures: | |
| print(""" | |
| !!! Found non-alphabetized files, failing! | |
| Note: this script checks all files containing the magic comment: | |
| # check_alphabetized | |
| to make sure that all the public functions are in alphabetical order. | |
| """) | |
| sys.exit(1) | |
| else: | |
| print("Checked {} modules".format(modules_checked)) | |
| def all_ex_files_in_dir(dir): | |
| filenames = [] | |
| for subdir, dirs, files in os.walk(dir): | |
| for file in files: | |
| filename = os.path.join(subdir, file) | |
| if filename.endswith('.ex') and 'test' not in filename: | |
| filenames.append(filename) | |
| return filenames | |
| @total_ordering | |
| class ModuleFun: | |
| """ | |
| Wrapper class for module functions, that provides sorting & pretty-printing facilities. | |
| """ | |
| def __init__(self, name, line_number): | |
| self.name = name | |
| self.line_number = line_number | |
| def _is_valid_operand(self, other): | |
| return hasattr(other, "name") and hasattr(other, "line_number") | |
| def __repr__(self) -> str: | |
| return "{}:{}".format(self.name, self.line_number) | |
| def __eq__(self, other) -> bool: | |
| if not self._is_valid_operand(other): | |
| return NotImplemented | |
| return self.name == other.name and self.line_number == other.line_number | |
| def __lt__(self, other) -> bool: | |
| if not self._is_valid_operand(other): | |
| return NotImplemented | |
| # compare as tuple | |
| return (self.name, self.line_number) < (other.name, other.line_number) | |
| def __hash__(self) -> int: | |
| return hash((self.name, self.line_number)) | |
| def get_modules(filename): | |
| with open(filename, 'r') as file: | |
| lines = file.readlines() | |
| # Get list of modules/functions in file | |
| curr_module = None | |
| modules = {} | |
| last_function_name = None | |
| for line_number, line in enumerate(lines): | |
| line = line.strip() | |
| # Check for module declarations | |
| if line.startswith("defmodule "): | |
| split = line.split() | |
| curr_module = split[1] | |
| modules[curr_module] = {"check": False, "funs": []} | |
| continue | |
| # Check for "# check_alphabetized" magic comment | |
| if curr_module and line == "# check_alphabetized": | |
| modules[curr_module]["check"] = True | |
| # Check for (public) function declarations | |
| if curr_module and line.startswith("def "): | |
| regex_match = re.search(r"^def (\w+)", line) | |
| function_name = regex_match.group(1) | |
| if function_name != last_function_name: | |
| modules[curr_module]["funs"].append(ModuleFun(function_name, line_number)) | |
| last_function_name = function_name | |
| # Check for callbacks | |
| if curr_module and line.startswith("@callback "): | |
| regex_match = re.search(r"^@callback (\w+)", line) | |
| function_name = regex_match.group(1) | |
| if function_name != last_function_name: | |
| modules[curr_module]["funs"].append(ModuleFun(function_name, line_number)) | |
| last_function_name = function_name | |
| return modules | |
| def distance_from_sorted(orig_list): | |
| """ | |
| Examines a lists to see if it's close to being sorted, and if not, how many | |
| items are out of order. Fairly smart/complex algorithm. | |
| """ | |
| sorted_list = sorted(orig_list) | |
| # A is for the original list, B is the sorted one | |
| idxA = 0 | |
| idxB = 0 | |
| out_of_order = set() | |
| while idxA < len(orig_list) and idxB < len(sorted_list): | |
| itemA = orig_list[idxA] | |
| itemB = sorted_list[idxB] | |
| if itemA == itemB: | |
| # Correct order, continue | |
| idxA += 1 | |
| idxB += 1 | |
| else: | |
| # Out of order, gotta re-synchronize iteration, swallowing items in | |
| # either orig_list or sorted_list until we come across matching | |
| # items again. | |
| sync = synchronize_list(orig_list[idxA:], sorted_list[idxB:]) | |
| if sync is None: | |
| # No matches -- all remaining functions are out of order | |
| out_of_order.update(orig_list[idxA:]) | |
| break | |
| skip_orig, skip_sorted = sync | |
| out_of_order.update(orig_list[idxA:idxA+skip_orig]) | |
| idxA += skip_orig | |
| out_of_order.update(sorted_list[idxB:idxB+skip_sorted]) | |
| idxB += skip_sorted | |
| return sorted(list(out_of_order), key=lambda f: f.line_number) | |
| def synchronize_list(listA, listB): | |
| """ | |
| Figures out how many elements you need to delete from the beginning of listA | |
| and listB so that their first elements match again. | |
| Returns a tuple of ints: (delete_from_A, delete_from_B) | |
| """ | |
| for i in range(len(listA)): | |
| for j in range(i + 1): | |
| itemA = listA[i] if i < len(listA) else listA[-1] | |
| itemB = listB[j] if j < len(listB) else listB[-1] | |
| if itemA == itemB: | |
| return (i, j) | |
| if i != j: | |
| # Try again, swapping listA and listB | |
| itemA = listA[j] if j < len(listA) else listA[-1] | |
| itemB = listB[i] if i < len(listB) else listB[-1] | |
| if itemA == itemB: | |
| return (j, i) | |
| return None | |
| if __name__ == "__main__": | |
| main() |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I'm mainly posting this because I'm proud of the algorithm I wrote here to identify specific items in a list that are out of order. See
distance_from_sortedandsynchronize_list