Created
January 26, 2016 05:51
-
-
Save ChrisBeaumont/b79c5f5419c205ef20d9 to your computer and use it in GitHub Desktop.
Wheel of fortune puzzle decoding
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from random import choice | |
from collections import defaultdict | |
import os | |
import pickle | |
import requests | |
from soupy import Soupy, Q | |
ROM = open('wheel.nes', 'rb').read() | |
CLUESET_PATH = 'clues.pickle' | |
CLUE_START, CLUE_STOP = 0x109d0, 0x14618 | |
MAX_LINE_LEN = 11 | |
MAX_LINES = 4 | |
TITLE = 'Title' | |
PHRASE = 'TV Quote' | |
PERSON = 'Person' | |
THING = 'Thing' | |
PLACE = 'Place' | |
EVENT = 'Event' | |
LANDMARK = 'Landmark' | |
CLEAN_CATS = { | |
u'Star & Role': PERSON, | |
u'Proper Name': PERSON, | |
u'On the Map': LANDMARK, | |
u'Title': TITLE, | |
u'Things': THING, | |
u'TV Quote': PHRASE, | |
u'Character': PERSON, | |
u'Song Lyrics': TITLE, | |
u'Food & Drink': THING, | |
u'\xa0Phrase': PHRASE, | |
u'Proper Names': PERSON, | |
u'Husband & Wife': PERSON, | |
u'Movie Title': TITLE, | |
u'Fun & Games': THING, | |
u'Living Things': THING, | |
u'Events': EVENT, | |
u'Same Letter': THING, | |
u'Same Name': THING, | |
u'Movie Quote': PHRASE, | |
u'Fictional Place': PLACE, | |
u'Person': PERSON, | |
u'Next Line Please': PHRASE, | |
u'Rhyme Time': PHRASE, | |
u'Clue': PHRASE, | |
u'Who Is It?': PERSON, | |
u'\xa0Thing': THING, | |
u'Fictional Character': PERSON, | |
u'Headline': TITLE, | |
u'College Life': THING, | |
u'On the Menu': THING, | |
u'Living Thing': THING, | |
u'Slogan': PHRASE, | |
u'Places': PLACE, | |
u'Title/Author': TITLE, | |
u'Thing': THING, | |
u'Before & After': PHRASE, | |
u'Place': PLACE, | |
u'Around the House': THING, | |
u'Phrase': PHRASE, | |
u'Landmark': LANDMARK, | |
u'Best Seller': TITLE, | |
u'Fictional Characters': PERSON, | |
u'In the Kitchen': THING, | |
u'Song Title': TITLE, | |
u'People': PERSON, | |
u'Song/Artist': TITLE, | |
u'Fictional Family': PERSON, | |
u'Classic TV': TITLE, | |
u'Slang': PHRASE, | |
u'Show Biz': THING, | |
u'Quotation': PHRASE, | |
u'TV Title': TITLE, | |
u'Event': EVENT, | |
u'Occupation': PERSON | |
} | |
def parse_clues(data): | |
""" | |
Traverse a block of encoded puzzles, yielding each decoded puzzle | |
""" | |
word = [] | |
for pos, char in enumerate(data): | |
# check for signal character | |
if char & 0x80: | |
# the correct ascii value | |
char = char & ~0x80 | |
word.append(char) | |
# determine if this is a newline or a clue boundary | |
if data[pos - 1] & 0x80: # end of clue | |
yield bytes(word).decode('ascii') | |
word = [] | |
elif not data[pos + 1] & 0x80: # newline | |
word.append(ord('\n')) | |
else: | |
word.append(char) | |
def wordwrap(clue): | |
clue = clue.replace('\n', ' ') | |
words = clue.split() | |
lines = [[]] | |
for word in words: | |
if len(word) > MAX_LINE_LEN: | |
raise ValueError("Word Too Long: %s" % word) | |
if len(word) + sum(map(len, lines[-1])) + len(lines[-1]) > MAX_LINE_LEN: | |
lines.append([word]) | |
else: | |
lines[-1].append(word) | |
if len(lines) > MAX_LINES: | |
raise ValueError("Too many lines: %s" % lines) | |
return '\n'.join(' '.join(word for word in line) for line in lines) | |
def encode(clue): | |
result = [] | |
for c in clue: | |
if c == '\n': | |
result[-1] |= 0x80 | |
else: | |
result.append(ord(c)) | |
result[-1] |= 0x80 | |
result[-2] |= 0x80 | |
return bytes(result) | |
def bday(n): | |
"""The number of samples in a collection of N where P(collision)>0.5""" | |
prob = 1.0 | |
for i in range(n + 1): | |
prob *= 1.0 * (n - i) / n | |
if prob < 0.5: | |
return i + 1 | |
def scrape(): | |
url = "https://sites.google.com/site/wheeloffortunepuzzlecompendium/home/compendium/season-%i-compendium" | |
for i in range(1, 31): | |
print(i) | |
dom = Soupy(requests.get(url % i).text, 'html5') | |
data = ( | |
dom | |
.find('td', 'sites-tile-name-content-1') | |
.find('tbody') | |
.find_all('tr') | |
.each(Q.find_all('td').each(Q.text)) | |
.val() | |
) | |
yield from ((puzzle, category) for puzzle, category, _, _ in data) | |
def rollup(pairs): | |
result = defaultdict(set) | |
for key, val in pairs: | |
result[key].add(val) | |
return {key: list(val) for key, val in result.items()} | |
def clean_new_clues(new_data): | |
for clue, cat in new_data: | |
if cat in CLEAN_CATS: | |
try: | |
encoded = encode(str(wordwrap(clue))) | |
yield (CLEAN_CATS[cat], len(encoded)), encoded | |
except ValueError: | |
pass | |
def build_clueset(): | |
if os.path.exists(CLUESET_PATH): | |
return pickle.load(open(CLUESET_PATH, 'rb')) | |
clues = list(parse_clues(ROM[CLUE_START:CLUE_STOP])) | |
encoded = list(map(encode, clues)) | |
assert b''.join(encoded) == ROM[CLUE_START: CLUE_STOP] | |
old_cats = [ | |
(PLACE, (1, 167)), | |
(PERSON, (167, 358)), | |
(THING, (358, 516)), | |
(LANDMARK, (516, 537)), | |
(PHRASE, (537, 752)), | |
(TITLE, (752, 899)), | |
(EVENT, (899, 1002)), | |
] | |
old_clues = [ | |
((cat, len(clue)), clue) | |
for cat, (start, stop) in old_cats | |
for clue in encoded[start:stop] | |
] | |
new_clues_dirty = scrape() | |
new_clues = list(clean_new_clues(new_clues_dirty)) | |
all_clues = rollup( | |
(key, clue) | |
for clueset in [old_clues, new_clues] | |
for key, clue in clueset | |
) | |
result = { | |
'old_clues': old_clues, | |
'all_clues': all_clues, | |
} | |
with open(CLUESET_PATH, 'wb') as outfile: | |
pickle.dump(result, outfile) | |
return result | |
def resample_clues(): | |
clueset = build_clueset() | |
old_clues = clueset['old_clues'] | |
all_clues = clueset['all_clues'] | |
result = [encode('WHEEL\nOF\nFORTUNE')] | |
for key, clue in old_clues: | |
result.append(choice(all_clues[key])) | |
return b''.join(result) | |
def single_char_replace(char): | |
clueset = build_clueset() | |
old_clues = clueset['old_clues'] | |
result = [encode('WHEEL\nOF\nFORTUNE')] | |
trans = list(range(256)) | |
trans[ord('A'):ord('Z')] = [ord(char)] * 26 | |
for _, clue in old_clues: | |
[clue] = parse_clues(clue) | |
new_clue = encode(clue.translate(trans)) | |
result.append(new_clue) | |
return b''.join(result) | |
def strip_newlines(): | |
clueset = build_clueset() | |
old_clues = clueset['old_clues'] | |
result = [encode('WHEEL\nOF\nFORTUNE')] | |
for _, clue in old_clues: | |
[clue] = parse_clues(clue) | |
result.append(encode(clue.replace('\n', ' '))) | |
return b''.join(result) | |
def new_rom(rom, out_path, resampler): | |
data = list(rom) | |
data[CLUE_START:CLUE_STOP] = list(resampler()) | |
with open(out_path, 'wb') as outfile: | |
outfile.write(bytes(data)) | |
if __name__ == "__main__": | |
new_rom(ROM, 'wheel2.nes', resample_clues) | |
new_rom(ROM, 'wheelx.nes', lambda: single_char_replace('X')) | |
new_rom(ROM, 'wheelquote.nes', lambda: single_char_replace("'")) | |
new_rom(ROM, 'allspace.nes', lambda: single_char_replace(" ")) | |
new_rom(ROM, 'no_newlines.nes', strip_newlines) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment