Last active
May 21, 2022 07:42
-
-
Save dansanderson/f11577824cccc41747a913f5865cde5e to your computer and use it in GitHub Desktop.
Finds New York Times Spelling Bee puzzles in a word list
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# | |
# A tool to generate New York Times Spelling Bee puzzles from a word list. | |
# | |
# A Bee is a set of seven unique letters that can be used to form words. One | |
# of the letters is required to appear at least once in every word, and each | |
# word must be at least four letters long. Letters can be repeated. A Bee must | |
# have at least one word in its word list that uses all seven letters at least | |
# once (a "pangram"). | |
# | |
# Usage: | |
# python3 beefinder.py <word-list-file> <required-letter> >data.json | |
# | |
# The tool outputs the Bees in JSON format, as follows: | |
# | |
# { "s": { // The required letter | |
# "abcdehs": [ // A Bee, with the letter bank as the key | |
# "aahs", // Answers in this Bee from the word list | |
# "hahas", | |
# "hahs", | |
# ... | |
# ], ... | |
# }} | |
import collections | |
import itertools | |
import json | |
import os | |
import sys | |
ORDA = ord('a') | |
def make_wordbank(word_seq): | |
"""Makes a word bank from a sequence of words. | |
A word bank contains sets of letters and the words from the word list that | |
can be made from them. Words shorter than 4 letters are ignored, as are | |
words that require letter banks larger than 7 letters. A given letter bank | |
may be smaller than 7 letters. | |
The result is a map of letter banks to lists of words. The letter bank is | |
*not* a string: it's a 26-bit integer, where each bit represents a letter | |
the bank, starting with 'a' as the least significant bit. This | |
representation makes it easy and fast to identify members and make Bees | |
(see bees()). Use bee_str() to convert this number to a string of letters. | |
Args: | |
word_seq: A sequence of words. | |
Returns: | |
The word bank, as a mapping of numeric letter banks to lists of words. | |
""" | |
bank = collections.defaultdict(list) | |
for word in word_seq: | |
# Word must be 4 letters or longer | |
if len(word) < 4: | |
continue | |
bankbits = 0 | |
abort_word = False | |
for ltr in word: | |
if ltr < 'a' or ltr > 'z': | |
abort_word = True | |
break | |
bankbits |= 1 << ord(ltr)-ORDA | |
if abort_word: | |
continue | |
# Letterbank must be at most 7 letters | |
b = bankbits | |
c = 0 | |
while b > 0: | |
c += b & 1 | |
b >>= 1 | |
if c > 7: | |
continue | |
bank[bankbits].append(word) | |
return bank | |
def words_from_file(pth): | |
"""A generator that yields words from a word list. | |
Args: | |
pth: The file path. | |
Yields: | |
A word. (Actually a full line with the newline removed.) | |
""" | |
with open(pth) as fh: | |
for line in fh: | |
word = line.strip() | |
yield word | |
def bees(bank, required_letter='s'): | |
"""Generates Bee puzzles from a word bank. | |
This scans all combinations of seven letters where one of the seven is | |
the given required letter, and queries the word bank for words that can be | |
made with the required letter and any of the six other letters. | |
Args: | |
bank: The word bank, in the form returned by make_wordbank(). | |
required_letter: The required letter, as a lowercase single character | |
string. The default is 's'. | |
Returns: | |
The Bees, as a mapping of numeric letter banks to answer lists. (See | |
make_wordbank() for a description of numeric letter banks.) | |
""" | |
bees = collections.defaultdict(list) | |
reqnum = ord(required_letter)-ORDA | |
reqbit = 1 << reqnum | |
for banknums in itertools.combinations(range(25), 6): | |
bankbits = reqbit | |
for num in banknums: | |
if num >= reqnum: | |
num += 1 | |
bankbits |= 1 << num | |
for b in bank: | |
if bankbits & b == b and b & reqbit: | |
bees[bankbits].extend(bank[b]) | |
# Filter out bees that don't have pangrams | |
to_delete = [] | |
for b in bees: | |
has_pangram = False | |
for word in bees[b]: | |
if len(set(word)) == 7: | |
has_pangram = True | |
break | |
if not has_pangram: | |
to_delete.append(b) | |
for b in to_delete: | |
del bees[b] | |
return bees | |
def bee_str(bee): | |
"""Converts a 26-bit numeric letter bank to a string letter bank.""" | |
ltrs = [] | |
for n in range(26): | |
if bee & (1 << n): | |
ltrs.append(chr(ORDA + n)) | |
return ''.join(ltrs) | |
def do_usage(progname=None): | |
print( | |
f'Makes New York Times Spelling Bee puzzles.\n\n' | |
f'Usage: {progname or "python3 beefinder.py"} <word-list-file> ' | |
f'<letter> >data.json\n' | |
f' word-list-file: The path to the word list file.\n' | |
f' letter: The required Bee letter. Must be between a and z.\n\n' | |
f'Output is a JSON file of all Bee puzzles based on the word list.', | |
file=sys.stderr) | |
sys.exit(1) | |
def main(args): | |
if len(args) != 3: | |
print('Missing required arguments.', file=sys.stderr) | |
do_usage(args[0]) | |
wordlist_fname = args[1] | |
if not os.path.isfile(wordlist_fname): | |
print(f'"{args[1]}" is not a file.', file=sys.stderr) | |
do_usage(args[0]) | |
reqltr = args[2] | |
if len(reqltr) != 1 or reqltr < 'a' or reqltr > 'z': | |
print( | |
f'"{args[2]}" must be a letter between a and z.', | |
file=sys.stderr) | |
do_usage(args[0]) | |
bank = make_wordbank(words_from_file(wordlist_fname)) | |
bs = bees(bank, required_letter=reqltr) | |
bs_as_letters = {} | |
for b in bs: | |
bs_as_letters[bee_str(b)] = bs[b] | |
print(json.dumps({reqltr: bs_as_letters})) | |
if __name__ == '__main__': | |
sys.exit(main(sys.argv)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment