dictionary_compact.json: https://github.com/matthewreagan/WebstersEnglishDictionary/blob/master/dictionary_compact.json
$ pip install richdictionary_compact.json: https://github.com/matthewreagan/WebstersEnglishDictionary/blob/master/dictionary_compact.json
$ pip install rich| import re | |
| import json | |
| import random | |
| from rich.console import Console | |
| from rich.markup import escape | |
| with open('dictionary_compact.json', 'r') as json_file: | |
| dictionary = json.load(json_file) | |
| WORD_RE = re.compile(r"[a-z']+", re.UNICODE | re.IGNORECASE) | |
| known_words: set[str] = set() | |
| unknown_words: set[str] = set() | |
| console = Console() | |
| def debug(*args, **kwargs): | |
| # console.print(*args, **kwargs) | |
| pass | |
| def analyze(sentence: str): | |
| # iterative version | |
| words = WORD_RE.findall(sentence.lower()) | |
| i = 0 | |
| with console.status("Analyzing...") as status: | |
| while words: | |
| i += 1 | |
| if i % 100 == 0: | |
| status.update(f"[bold blue]Total: {i}[reset] [green]|[reset] [bold]Analyzing...[reset] {len(words)} words left + {len(unknown_words)} unknown words") | |
| word = words.pop().lower() | |
| if word in known_words: | |
| continue | |
| elif word in unknown_words: | |
| continue | |
| if word not in dictionary: | |
| debug(f"Storing '{word}' in known_words, total: {len(known_words)}") | |
| unknown_words.add(word) | |
| # raise ValueError(f"Word '{word}' not found in dictionary") | |
| continue | |
| known_words.add(word) | |
| debug("\n[bold red]" + word) | |
| debug(escape(dictionary[word])) | |
| for word in reversed(WORD_RE.findall(dictionary[word].lower())): | |
| if word not in known_words: | |
| words.append(word) | |
| sentence = input("Enter a sentence: ") | |
| analyze(sentence) | |
| known_words = sorted(known_words) | |
| unknown_words = sorted(unknown_words) | |
| console.print(f":sparkles: To totally understand [bold blue]'{sentence}'[reset], you need to know: [green bold]{len(known_words)}[reset] words, plus [red bold]{len(unknown_words)}[reset] unknown words.") | |
| console.print(f":bulb: Random known words: {random.sample(known_words, k=5)}", highlight=True) | |
| console.print(f":fire: Random unknown words: {random.sample(unknown_words, k=5)}", highlight=True) |