Last active
January 23, 2022 15:08
-
-
Save shadda/f0b974108eae8afeb1ca80598e68b49e to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import sys | |
import re | |
import json | |
import urllib.request | |
#Word dictionary. JSON list. | |
dictionary_url = "https://gist.githubusercontent.com/shadda/bd47678f63b421d070c13ee16eaeb1c9/raw/e532576f1919c13652605897e2898139b7835aa3/dictionary.json" | |
#Trim out any words in our dictionary that are invalid | |
bchars = r'[a-zA-Z]{5}' | |
#Letters you know are not in the word | |
nletters = 'welshbngovadmxu' | |
#Letters you know should be in the word | |
wletters = 'ip' | |
#\w = any letter. adjust as needed. | |
wpattern = r'^' + \ | |
r'\w' + \ | |
r'\w' + \ | |
r'\w' + \ | |
r'\w' + \ | |
r'\w' + \ | |
r'$' | |
data = None | |
with urllib.request.urlopen(dictionary_url) as rp: | |
data = json.load(rp) | |
data = [x.lower() for x in data if len(x) == 5 and re.match(bchars, x)] | |
#Remove any unique entries and make everything lowercase | |
data = sorted(list(set(data))) | |
#Filter words that contain letters we know aren't in the word | |
words = [x for x in data if not any(i in nletters for i in x)] | |
#Filter for words that definitely contain the letters we're interested in | |
round2 = [x for x in words if not any( w not in x for w in wletters)] | |
#Filter based on the fixed letter positions we know | |
round3 = [x for x in round2 if re.match(wpattern, x)] | |
#Any additional expressions | |
round4 = [x for x in round3 if x[1] != 'i' and x[3] != 'i' and x[4] != 'p' and x[2] != 'p'] | |
print(round4) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment