ChrisBeaumont · January 26, 2016 05:51
diff --git a/wheel.py b/wheel.py
 from random import choice
 from collections import defaultdict
 import os
 import pickle

 import requests
 from soupy import Soupy, Q

 ROM = open('wheel.nes', 'rb').read()
 CLUESET_PATH = 'clues.pickle'
 CLUE_START, CLUE_STOP = 0x109d0, 0x14618
 MAX_LINE_LEN = 11
 MAX_LINES = 4

 TITLE = 'Title'
 PHRASE = 'TV Quote'
 PERSON = 'Person'
 THING = 'Thing'
 PLACE = 'Place'
 EVENT = 'Event'
 LANDMARK = 'Landmark'
 CLEAN_CATS = {
    u'Star & Role': PERSON,
    u'Proper Name': PERSON,
    u'On the Map': LANDMARK,
    u'Title': TITLE,
    u'Things': THING,
    u'TV Quote': PHRASE,
    u'Character': PERSON,
    u'Song Lyrics': TITLE,
    u'Food & Drink': THING,
    u'\xa0Phrase': PHRASE,
    u'Proper Names': PERSON,
    u'Husband & Wife': PERSON,
    u'Movie Title': TITLE,
    u'Fun & Games': THING,
    u'Living Things': THING,
    u'Events': EVENT,
    u'Same Letter': THING,
    u'Same Name': THING,
    u'Movie Quote': PHRASE,
    u'Fictional Place': PLACE,
    u'Person': PERSON,
    u'Next Line Please': PHRASE,
    u'Rhyme Time': PHRASE,
    u'Clue': PHRASE,
    u'Who Is It?': PERSON,
    u'\xa0Thing': THING,
    u'Fictional Character': PERSON,
    u'Headline': TITLE,
    u'College Life': THING,
    u'On the Menu': THING,
    u'Living Thing': THING,
    u'Slogan': PHRASE,
    u'Places': PLACE,
    u'Title/Author': TITLE,
    u'Thing': THING,
    u'Before & After': PHRASE,
    u'Place': PLACE,
    u'Around the House': THING,
    u'Phrase': PHRASE,
    u'Landmark': LANDMARK,
    u'Best Seller': TITLE,
    u'Fictional Characters': PERSON,
    u'In the Kitchen': THING,
    u'Song Title': TITLE,
    u'People': PERSON,
    u'Song/Artist': TITLE,
    u'Fictional Family': PERSON,
    u'Classic TV': TITLE,
    u'Slang': PHRASE,
    u'Show Biz': THING,
    u'Quotation': PHRASE,
    u'TV Title': TITLE,
    u'Event': EVENT,
    u'Occupation': PERSON
 }


 def parse_clues(data):
    """
    Traverse a block of encoded puzzles, yielding each decoded puzzle
    """
    word = []
    for pos, char in enumerate(data):

        # check for signal character
        if char & 0x80:
            # the correct ascii value
            char = char & ~0x80
            word.append(char)

            # determine if this is a newline or a clue boundary
            if data[pos - 1] & 0x80:  # end of clue
                yield bytes(word).decode('ascii')
                word = []
            elif not data[pos + 1] & 0x80:  # newline
                word.append(ord('\n'))
        else:
            word.append(char)


 def wordwrap(clue):
    clue = clue.replace('\n', ' ')
    words = clue.split()
    lines = [[]]

    for word in words:
        if len(word) > MAX_LINE_LEN:
            raise ValueError("Word Too Long: %s" % word)

        if len(word) + sum(map(len, lines[-1])) + len(lines[-1]) > MAX_LINE_LEN:
            lines.append([word])
        else:
            lines[-1].append(word)

    if len(lines) > MAX_LINES:
        raise ValueError("Too many lines: %s" % lines)

    return '\n'.join(' '.join(word for word in line) for line in lines)


 def encode(clue):
    result = []
    for c in clue:
        if c == '\n':
            result[-1] |= 0x80
        else:
            result.append(ord(c))
    result[-1] |= 0x80
    result[-2] |= 0x80
    return bytes(result)


 def bday(n):
    """The number of samples in a collection of N where P(collision)>0.5"""
    prob = 1.0
    for i in range(n + 1):
        prob *= 1.0 * (n - i) / n
        if prob < 0.5:
            return i + 1


 def scrape():
    url = "https://sites.google.com/site/wheeloffortunepuzzlecompendium/home/compendium/season-%i-compendium"
    for i in range(1, 31):
        print(i)
        dom = Soupy(requests.get(url % i).text, 'html5')
        data = (
            dom
            .find('td', 'sites-tile-name-content-1')
            .find('tbody')
            .find_all('tr')
            .each(Q.find_all('td').each(Q.text))
            .val()
        )
        yield from ((puzzle, category) for puzzle, category, _, _ in data)


 def rollup(pairs):
    result = defaultdict(set)
    for key, val in pairs:
        result[key].add(val)
    return {key: list(val) for key, val in result.items()}


 def clean_new_clues(new_data):
    for clue, cat in new_data:
        if cat in CLEAN_CATS:
            try:
                encoded = encode(str(wordwrap(clue)))
                yield (CLEAN_CATS[cat], len(encoded)), encoded
            except ValueError:
                pass


 def build_clueset():
    if os.path.exists(CLUESET_PATH):
        return pickle.load(open(CLUESET_PATH, 'rb'))

    clues = list(parse_clues(ROM[CLUE_START:CLUE_STOP]))
    encoded = list(map(encode, clues))
    assert b''.join(encoded) == ROM[CLUE_START: CLUE_STOP]

    old_cats = [
        (PLACE, (1, 167)),
        (PERSON, (167, 358)),
        (THING, (358, 516)),
        (LANDMARK, (516, 537)),
        (PHRASE, (537, 752)),
        (TITLE, (752, 899)),
        (EVENT, (899, 1002)),
    ]
    old_clues = [
        ((cat, len(clue)), clue)
        for cat, (start, stop) in old_cats
        for clue in encoded[start:stop]
    ]

    new_clues_dirty = scrape()
    new_clues = list(clean_new_clues(new_clues_dirty))

    all_clues = rollup(
        (key, clue)
        for clueset in [old_clues, new_clues]
        for key, clue in clueset
    )

    result = {
        'old_clues': old_clues,
        'all_clues': all_clues,
    }

    with open(CLUESET_PATH, 'wb') as outfile:
        pickle.dump(result, outfile)

    return result


 def resample_clues():

    clueset = build_clueset()
    old_clues = clueset['old_clues']
    all_clues = clueset['all_clues']
    result = [encode('WHEEL\nOF\nFORTUNE')]

    for key, clue in old_clues:
        result.append(choice(all_clues[key]))
    return b''.join(result)


 def single_char_replace(char):

    clueset = build_clueset()
    old_clues = clueset['old_clues']
    result = [encode('WHEEL\nOF\nFORTUNE')]

    trans = list(range(256))
    trans[ord('A'):ord('Z')] = [ord(char)] * 26

    for _, clue in old_clues:
        [clue] = parse_clues(clue)
        new_clue = encode(clue.translate(trans))
        result.append(new_clue)

    return b''.join(result)


 def strip_newlines():

    clueset = build_clueset()
    old_clues = clueset['old_clues']
    result = [encode('WHEEL\nOF\nFORTUNE')]
    for _, clue in old_clues:
        [clue] = parse_clues(clue)
        result.append(encode(clue.replace('\n', ' ')))

    return b''.join(result)


 def new_rom(rom, out_path, resampler):
    data = list(rom)
    data[CLUE_START:CLUE_STOP] = list(resampler())
    with open(out_path, 'wb') as outfile:
        outfile.write(bytes(data))


 if __name__ == "__main__":
    new_rom(ROM, 'wheel2.nes', resample_clues)
    new_rom(ROM, 'wheelx.nes', lambda: single_char_replace('X'))
    new_rom(ROM, 'wheelquote.nes', lambda: single_char_replace("'"))
    new_rom(ROM, 'allspace.nes', lambda: single_char_replace(" "))
    new_rom(ROM, 'no_newlines.nes', strip_newlines)
	from random import choice
	from collections import defaultdict
	import os
	import pickle

	import requests
	from soupy import Soupy, Q

	ROM = open('wheel.nes', 'rb').read()
	CLUESET_PATH = 'clues.pickle'
	CLUE_START, CLUE_STOP = 0x109d0, 0x14618
	MAX_LINE_LEN = 11
	MAX_LINES = 4

	TITLE = 'Title'
	PHRASE = 'TV Quote'
	PERSON = 'Person'
	THING = 'Thing'
	PLACE = 'Place'
	EVENT = 'Event'
	LANDMARK = 'Landmark'
	CLEAN_CATS = {
	u'Star & Role': PERSON,
	u'Proper Name': PERSON,
	u'On the Map': LANDMARK,
	u'Title': TITLE,
	u'Things': THING,
	u'TV Quote': PHRASE,
	u'Character': PERSON,
	u'Song Lyrics': TITLE,
	u'Food & Drink': THING,
	u'\xa0Phrase': PHRASE,
	u'Proper Names': PERSON,
	u'Husband & Wife': PERSON,
	u'Movie Title': TITLE,
	u'Fun & Games': THING,
	u'Living Things': THING,
	u'Events': EVENT,
	u'Same Letter': THING,
	u'Same Name': THING,
	u'Movie Quote': PHRASE,
	u'Fictional Place': PLACE,
	u'Person': PERSON,
	u'Next Line Please': PHRASE,
	u'Rhyme Time': PHRASE,
	u'Clue': PHRASE,
	u'Who Is It?': PERSON,
	u'\xa0Thing': THING,
	u'Fictional Character': PERSON,
	u'Headline': TITLE,
	u'College Life': THING,
	u'On the Menu': THING,
	u'Living Thing': THING,
	u'Slogan': PHRASE,
	u'Places': PLACE,
	u'Title/Author': TITLE,
	u'Thing': THING,
	u'Before & After': PHRASE,
	u'Place': PLACE,
	u'Around the House': THING,
	u'Phrase': PHRASE,
	u'Landmark': LANDMARK,
	u'Best Seller': TITLE,
	u'Fictional Characters': PERSON,
	u'In the Kitchen': THING,
	u'Song Title': TITLE,
	u'People': PERSON,
	u'Song/Artist': TITLE,
	u'Fictional Family': PERSON,
	u'Classic TV': TITLE,
	u'Slang': PHRASE,
	u'Show Biz': THING,
	u'Quotation': PHRASE,
	u'TV Title': TITLE,
	u'Event': EVENT,
	u'Occupation': PERSON
	}


	def parse_clues(data):
	"""
	Traverse a block of encoded puzzles, yielding each decoded puzzle
	"""
	word = []
	for pos, char in enumerate(data):

	# check for signal character
	if char & 0x80:
	# the correct ascii value
	char = char & ~0x80
	word.append(char)

	# determine if this is a newline or a clue boundary
	if data[pos - 1] & 0x80: # end of clue
	yield bytes(word).decode('ascii')
	word = []
	elif not data[pos + 1] & 0x80: # newline
	word.append(ord('\n'))
	else:
	word.append(char)


	def wordwrap(clue):
	clue = clue.replace('\n', ' ')
	words = clue.split()
	lines = [[]]

	for word in words:
	if len(word) > MAX_LINE_LEN:
	raise ValueError("Word Too Long: %s" % word)

	if len(word) + sum(map(len, lines[-1])) + len(lines[-1]) > MAX_LINE_LEN:
	lines.append([word])
	else:
	lines[-1].append(word)

	if len(lines) > MAX_LINES:
	raise ValueError("Too many lines: %s" % lines)

	return '\n'.join(' '.join(word for word in line) for line in lines)


	def encode(clue):
	result = []
	for c in clue:
	if c == '\n':
	result[-1] \|= 0x80
	else:
	result.append(ord(c))
	result[-1] \|= 0x80
	result[-2] \|= 0x80
	return bytes(result)


	def bday(n):
	"""The number of samples in a collection of N where P(collision)>0.5"""
	prob = 1.0
	for i in range(n + 1):
	prob = 1.0 (n - i) / n
	if prob < 0.5:
	return i + 1


	def scrape():
	url = "https://sites.google.com/site/wheeloffortunepuzzlecompendium/home/compendium/season-%i-compendium"
	for i in range(1, 31):
	print(i)
	dom = Soupy(requests.get(url % i).text, 'html5')
	data = (
	dom
	.find('td', 'sites-tile-name-content-1')
	.find('tbody')
	.find_all('tr')
	.each(Q.find_all('td').each(Q.text))
	.val()
	)
	yield from ((puzzle, category) for puzzle, category, _, _ in data)


	def rollup(pairs):
	result = defaultdict(set)
	for key, val in pairs:
	result[key].add(val)
	return {key: list(val) for key, val in result.items()}


	def clean_new_clues(new_data):
	for clue, cat in new_data:
	if cat in CLEAN_CATS:
	try:
	encoded = encode(str(wordwrap(clue)))
	yield (CLEAN_CATS[cat], len(encoded)), encoded
	except ValueError:
	pass


	def build_clueset():
	if os.path.exists(CLUESET_PATH):
	return pickle.load(open(CLUESET_PATH, 'rb'))

	clues = list(parse_clues(ROM[CLUE_START:CLUE_STOP]))
	encoded = list(map(encode, clues))
	assert b''.join(encoded) == ROM[CLUE_START: CLUE_STOP]

	old_cats = [
	(PLACE, (1, 167)),
	(PERSON, (167, 358)),
	(THING, (358, 516)),
	(LANDMARK, (516, 537)),
	(PHRASE, (537, 752)),
	(TITLE, (752, 899)),
	(EVENT, (899, 1002)),
	]
	old_clues = [
	((cat, len(clue)), clue)
	for cat, (start, stop) in old_cats
	for clue in encoded[start:stop]
	]

	new_clues_dirty = scrape()
	new_clues = list(clean_new_clues(new_clues_dirty))

	all_clues = rollup(
	(key, clue)
	for clueset in [old_clues, new_clues]
	for key, clue in clueset
	)

	result = {
	'old_clues': old_clues,
	'all_clues': all_clues,
	}

	with open(CLUESET_PATH, 'wb') as outfile:
	pickle.dump(result, outfile)

	return result


	def resample_clues():

	clueset = build_clueset()
	old_clues = clueset['old_clues']
	all_clues = clueset['all_clues']
	result = [encode('WHEEL\nOF\nFORTUNE')]

	for key, clue in old_clues:
	result.append(choice(all_clues[key]))
	return b''.join(result)


	def single_char_replace(char):

	clueset = build_clueset()
	old_clues = clueset['old_clues']
	result = [encode('WHEEL\nOF\nFORTUNE')]

	trans = list(range(256))
	trans[ord('A'):ord('Z')] = [ord(char)] * 26

	for _, clue in old_clues:
	[clue] = parse_clues(clue)
	new_clue = encode(clue.translate(trans))
	result.append(new_clue)

	return b''.join(result)


	def strip_newlines():

	clueset = build_clueset()
	old_clues = clueset['old_clues']
	result = [encode('WHEEL\nOF\nFORTUNE')]
	for _, clue in old_clues:
	[clue] = parse_clues(clue)
	result.append(encode(clue.replace('\n', ' ')))

	return b''.join(result)


	def new_rom(rom, out_path, resampler):
	data = list(rom)
	data[CLUE_START:CLUE_STOP] = list(resampler())
	with open(out_path, 'wb') as outfile:
	outfile.write(bytes(data))


	if __name__ == "__main__":
	new_rom(ROM, 'wheel2.nes', resample_clues)
	new_rom(ROM, 'wheelx.nes', lambda: single_char_replace('X'))
	new_rom(ROM, 'wheelquote.nes', lambda: single_char_replace("'"))
	new_rom(ROM, 'allspace.nes', lambda: single_char_replace(" "))
	new_rom(ROM, 'no_newlines.nes', strip_newlines)
No results found