Created
April 10, 2021 21:42
-
-
Save j9ac9k/7cddc9025bb7040f888bf1e9c9fdde1c to your computer and use it in GitHub Desktop.
Generate word-lists from python modules, useful for spell checkers such as cSpell
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import importlib | |
import inspect | |
from collections import deque | |
from typing import Any, List, Set, Tuple | |
def get_nested_members(module, predicate=None) -> List[Tuple[str, Any]]: | |
return [ | |
module_info | |
for module_info in inspect.getmembers(module, predicate=predicate) | |
if not module_info[0].startswith("_") | |
] | |
def write_keywords(words: Set[str], module): | |
words = sorted(words) | |
with open(f"{module.__name__}.txt", "wt") as words_file: | |
words_file.write("\n".join(words)) | |
words_file.write("\n") | |
def make_cspell_words(words: List[str]) -> Set[str]: | |
processed = set() | |
while words: | |
word = words.pop() | |
if "_" in word: | |
components = word.split("_") | |
words.extend(components) | |
continue | |
if len(word) < 4: | |
continue | |
processed.add(word.lower()) | |
return processed | |
def main(args): | |
top_level_module = importlib.import_module(args.module) | |
words = set([args.module]) | |
frontier = deque([top_level_module]) | |
already_seen = set() | |
while frontier: | |
module = frontier.popleft() | |
if module in already_seen or (not module.__name__.startswith(args.module)): | |
continue | |
already_seen.add(module) | |
# get modules and add to the frontier | |
modules = get_nested_members(module, predicate=inspect.ismodule) | |
frontier.extend([module[1] for module in modules]) | |
# add as many key-words as we can identify | |
extracted_words = [member[0] for member in get_nested_members(module)] | |
words = words.union(make_cspell_words(extracted_words)) | |
# get as many kwargs as we can get to | |
func_or_methods = get_nested_members(module, predicate=(inspect.isfunction or inspect.ismethod)) | |
kwargs = [] | |
for _, func_or_method in func_or_methods: | |
try: | |
kwargs.extend(inspect.signature(func_or_method).parameters.keys()) | |
except ValueError: | |
# no signature can br provied | |
pass | |
except TypeError: | |
# object is not supported | |
print(f"TypeError for {func_or_method}") | |
words = words.union(make_cspell_words(kwargs)) | |
write_keywords(words, top_level_module) | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser() | |
parser.add_argument( | |
"-m", | |
"--module", | |
required=True, | |
help="Required Module to Extract keywords from") | |
args = parser.parse_args() | |
main(args) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment