Created
February 17, 2016 01:35
-
-
Save ChrisBeaumont/2653846258f64d6910d2 to your computer and use it in GitHub Desktop.
wheel of infinite fortune
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import pickle | |
from collections import defaultdict | |
from random import choice | |
import requests | |
from soupy import Soupy, Q | |
ROM = open('wheel.nes', 'rb').read() | |
CLUESET_PATH = 'clues.pickle' | |
CLUE_START, CLUE_STOP = 0x109d0, 0x14618 | |
MAX_LINE_LEN = 11 | |
MAX_LINES = 4 | |
TITLE = 'Title' | |
PHRASE = 'TV Quote' | |
PERSON = 'Person' | |
THING = 'Thing' | |
PLACE = 'Place' | |
EVENT = 'Event' | |
LANDMARK = 'Landmark' | |
CLEAN_CATS = { | |
u'Star & Role': PERSON, | |
u'Proper Name': PERSON, | |
u'On the Map': LANDMARK, | |
u'Title': TITLE, | |
u'Things': THING, | |
u'TV Quote': PHRASE, | |
u'Character': PERSON, | |
u'Song Lyrics': TITLE, | |
u'Food & Drink': THING, | |
u'\xa0Phrase': PHRASE, | |
u'Proper Names': PERSON, | |
u'Husband & Wife': PERSON, | |
u'Movie Title': TITLE, | |
u'Fun & Games': THING, | |
u'Living Things': THING, | |
u'Events': EVENT, | |
u'Same Letter': THING, | |
u'Same Name': THING, | |
u'Movie Quote': PHRASE, | |
u'Fictional Place': PLACE, | |
u'Person': PERSON, | |
u'Next Line Please': PHRASE, | |
u'Rhyme Time': PHRASE, | |
u'Clue': PHRASE, | |
u'Who Is It?': PERSON, | |
u'\xa0Thing': THING, | |
u'Fictional Character': PERSON, | |
u'Headline': TITLE, | |
u'College Life': THING, | |
u'On the Menu': THING, | |
u'Living Thing': THING, | |
u'Slogan': PHRASE, | |
u'Places': PLACE, | |
u'Title/Author': TITLE, | |
u'Thing': THING, | |
u'Before & After': PHRASE, | |
u'Place': PLACE, | |
u'Around the House': THING, | |
u'Phrase': PHRASE, | |
u'Landmark': LANDMARK, | |
u'Best Seller': TITLE, | |
u'Fictional Characters': PERSON, | |
u'In the Kitchen': THING, | |
u'Song Title': TITLE, | |
u'People': PERSON, | |
u'Song/Artist': TITLE, | |
u'Fictional Family': PERSON, | |
u'Classic TV': TITLE, | |
u'Slang': PHRASE, | |
u'Show Biz': THING, | |
u'Quotation': PHRASE, | |
u'TV Title': TITLE, | |
u'Event': EVENT, | |
u'Occupation': PERSON | |
} | |
def parse_clues(data): | |
""" | |
Traverse a block of encoded puzzles, yielding each decoded puzzle | |
""" | |
word = [] | |
for pos, char in enumerate(data): | |
# check for signal character | |
if char & 0x80: | |
# the correct ascii value | |
char = char & ~0x80 | |
word.append(char) | |
# determine if this is a newline or a clue boundary | |
if data[pos - 1] & 0x80: # end of clue | |
yield bytes(word).decode('ascii') | |
word = [] | |
elif not data[pos + 1] & 0x80: # newline | |
word.append(ord('\n')) | |
else: | |
word.append(char) | |
def wordwrap(clue): | |
clue = clue.replace('\n', ' ') | |
words = clue.split() | |
lines = [[]] | |
for word in words: | |
if len(word) > MAX_LINE_LEN: | |
raise ValueError("Word Too Long: %s" % word) | |
if len(word) + sum(map(len, lines[-1])) + len(lines[-1]) > MAX_LINE_LEN: | |
lines.append([word]) | |
else: | |
lines[-1].append(word) | |
if len(lines) > MAX_LINES: | |
raise ValueError("Too many lines: %s" % lines) | |
return '\n'.join(' '.join(word for word in line) for line in lines) | |
def encode(clue): | |
result = [] | |
for c in clue: | |
if c == '\n': | |
result[-1] |= 0x80 | |
else: | |
result.append(ord(c)) | |
result[-1] |= 0x80 | |
result[-2] |= 0x80 | |
return bytes(result) | |
def bday(n): | |
"""The number of samples in a collection of N where P(collision)>0.5""" | |
prob = 1.0 | |
for i in range(n + 1): | |
prob *= 1.0 * (n - i) / n | |
if prob < 0.5: | |
return i + 1 | |
def scrape(): | |
url = "https://sites.google.com/site/wheeloffortunepuzzlecompendium/home/compendium/season-%i-compendium" | |
for i in range(1, 31): | |
print(i) | |
dom = Soupy(requests.get(url % i).text, 'html5') | |
data = ( | |
dom | |
.find('td', 'sites-tile-name-content-1') | |
.find('tbody') | |
.find_all('tr') | |
.each(Q.find_all('td').each(Q.text)) | |
.val() | |
) | |
yield from ((puzzle, category) for puzzle, category, _, _ in data) | |
def rollup(pairs): | |
result = defaultdict(set) | |
for key, val in pairs: | |
result[key].add(val) | |
return {key: list(val) for key, val in result.items()} | |
def clean_new_clues(new_data): | |
for clue, cat in new_data: | |
if cat in CLEAN_CATS: | |
try: | |
encoded = encode(str(wordwrap(clue))) | |
yield (CLEAN_CATS[cat], len(encoded)), encoded | |
except ValueError: | |
pass | |
def build_clueset(): | |
if os.path.exists(CLUESET_PATH): | |
return pickle.load(open(CLUESET_PATH, 'rb')) | |
clues = list(parse_clues(ROM[CLUE_START:CLUE_STOP])) | |
encoded = list(map(encode, clues)) | |
assert b''.join(encoded) == ROM[CLUE_START: CLUE_STOP] | |
old_cats = [ | |
(PLACE, (1, 167)), | |
(PERSON, (167, 358)), | |
(THING, (358, 516)), | |
(LANDMARK, (516, 537)), | |
(PHRASE, (537, 752)), | |
(TITLE, (752, 899)), | |
(EVENT, (899, 1002)), | |
] | |
old_clues = [ | |
((cat, len(clue)), clue) | |
for cat, (start, stop) in old_cats | |
for clue in encoded[start:stop] | |
] | |
new_clues_dirty = scrape() | |
new_clues = list(clean_new_clues(new_clues_dirty)) | |
all_clues = rollup( | |
(key, clue) | |
for clueset in [old_clues, new_clues] | |
for key, clue in clueset | |
) | |
result = { | |
'old_clues': old_clues, | |
'all_clues': all_clues, | |
} | |
with open(CLUESET_PATH, 'wb') as outfile: | |
pickle.dump(result, outfile) | |
return result | |
def resample_clues(): | |
clueset = build_clueset() | |
old_clues = clueset['old_clues'] | |
all_clues = clueset['all_clues'] | |
result = [encode('WHEEL\nOF\nFORTUNE')] | |
for key, clue in old_clues: | |
result.append(choice(all_clues[key])) | |
return b''.join(result) | |
def resample_random(): | |
clueset = build_clueset() | |
old_clues = clueset['old_clues'] | |
result = [encode('WHEEL\nOF\nFORTUNE')] | |
trans = list(range(256)) | |
chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' | |
trans[ord('A'):ord('Z')] = [choice(chars) for _ in range(26)] | |
for _, clue in old_clues: | |
[clue] = parse_clues(clue) | |
new_clue = encode(clue.translate(trans)) | |
result.append(new_clue) | |
return b''.join(result) | |
def single_char_replace(char): | |
clueset = build_clueset() | |
old_clues = clueset['old_clues'] | |
result = [encode('WHEEL\nOF\nFORTUNE')] | |
trans = list(range(256)) | |
trans[ord('A'):ord('Z')] = [ord(char)] * 26 | |
for _, clue in old_clues: | |
[clue] = parse_clues(clue) | |
new_clue = encode(clue.translate(trans)) | |
result.append(new_clue) | |
return b''.join(result) | |
def strip_newlines(): | |
clueset = build_clueset() | |
old_clues = clueset['old_clues'] | |
result = [encode('WHEEL\nOF\nFORTUNE')] | |
for _, clue in old_clues: | |
[clue] = parse_clues(clue) | |
result.append(encode(clue.replace('\n', ' '))) | |
return b''.join(result) | |
def new_rom(rom, out_path, resampler): | |
data = list(rom) | |
data[CLUE_START:CLUE_STOP] = list(resampler()) | |
with open(out_path, 'wb') as outfile: | |
outfile.write(bytes(data)) | |
if __name__ == "__main__": | |
new_rom(ROM, 'random.nes', resample_random) | |
new_rom(ROM, 'wheel2.nes', resample_clues) | |
new_rom(ROM, 'wheelx.nes', lambda: single_char_replace('X')) | |
new_rom(ROM, 'wheelquote.nes', lambda: single_char_replace("'")) | |
new_rom(ROM, 'allspace.nes', lambda: single_char_replace(" ")) | |
new_rom(ROM, 'no_newlines.nes', strip_newlines) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hi, I tried testing this and seem to have run across a bug where it doesn't translate ampersands (or maybe plusses) correctly. I would love to fix it, but sadly my python isn't all that great.
edit: this might work on line 126