Created
February 1, 2021 22:47
-
-
Save pdarragh/3474c6936ad3ead25fbe79bcad8459a0 to your computer and use it in GitHub Desktop.
New York Times Spelling Bee Solver
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
"""This module provides functions for solving the New York Times Spelling Bee | |
puzzles using a given dictionary file. | |
""" | |
import re | |
from itertools import groupby | |
from pathlib import Path | |
from typing import List, Pattern, Tuple | |
DEFAULT_DICT_FILE = Path('/usr/share/dict/words') | |
class Char(str): | |
"""A subclass of `str` that is exactly one character in length.""" | |
def __init__(self, value): | |
if not (isinstance(value, str) and len(value) == 1): | |
raise TypeError(f"not a single character: {value}") | |
super().__init__() | |
def build_regex(required_letter: Char, other_letters: List[Char]) -> Pattern: | |
"""Constructs a regular expression pattern that will match any word that | |
contains the required letter at least once, the other letters any number of | |
times, no other letters, and is at least 5 characters in length. | |
""" | |
all_chars = {required_letter, *other_letters} | |
char_class = '[' + ''.join(char for char in all_chars) + ']' | |
no_rl_class = '[' + ''.join(char for char in other_letters) + ']' | |
base_pattern = '|'.join(["{rl}{cls}{{4,}}", | |
"{nrl}{rl}{cls}{{3,}}", | |
"{nrl}{{2}}{rl}{cls}{{2,}}", | |
"{nrl}{{3}}{rl}{cls}+", | |
"{nrl}{{4,}}{rl}{cls}*"]) | |
filled_pattern = base_pattern.format( | |
rl=required_letter, | |
nrl=no_rl_class, | |
cls=char_class) | |
pattern = re.compile(filled_pattern) | |
return pattern | |
def lookup(pattern: Pattern, dict_file: Path) -> List[str]: | |
"""Iterates over the given dictionary file, attempting to match each line | |
to the given pattern. Whitespace is trimmed from the ends of each line, but | |
otherwise the line must match the pattern fully to be considered a match. | |
""" | |
results = [] | |
with open(dict_file, 'r') as df: | |
index = 0 | |
for line in df: | |
index += 1 | |
if match := pattern.fullmatch(line.strip()): | |
results.append(match.group()) | |
return results | |
def group_results(results: List[str]) -> List[Tuple[int, List[str]]]: | |
"""Groups the results by length.""" | |
groupings = groupby(sorted(results, key=len), key=len) | |
expanded = [(n, list(rs)) for (n, rs) in groupings] | |
return expanded | |
if __name__ == '__main__': | |
import argparse | |
parser = argparse.ArgumentParser() | |
parser.add_argument('required_letter', type=Char, | |
help="the letter that must be used at least once") | |
parser.add_argument('other_letters', action='extend', nargs='+', type=Char, | |
help="the other letters that may optionally be used") | |
parser.add_argument('-d', '--dict-file', type=Path, default=DEFAULT_DICT_FILE, | |
help="the file to use as a dictionary") | |
args = parser.parse_args() | |
pattern = build_regex(args.required_letter, args.other_letters) | |
results = lookup(pattern, args.dict_file) | |
grouped = group_results(results) | |
print(f"Found {len(results)} result{'s' if len(results) > 1 else ''}:") | |
for (n, group) in grouped: | |
print(f" Length {n} ({len(group)} result{'s' if len(group) > 1 else ''}):") | |
for result in group: | |
print(" " + result) | |
print(f"Regex used: {pattern.pattern}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment