Skip to content

Instantly share code, notes, and snippets.

@thomasboyt
Created December 5, 2012 23:33
Show Gist options
  • Save thomasboyt/4220509 to your computer and use it in GitHub Desktop.
Save thomasboyt/4220509 to your computer and use it in GitHub Desktop.

A Markov chain generator that uses the dialog from the wonderful Jerkcity webcomic as its base.

To run, download it and put Jerkcity's dialog file (decompressed) in the same directory. Run it with

python markov_city.py <max chain length>

After the initial run, it'll cache the generated table in table.json.

from xml.etree import ElementTree
import re, random, os, json, argparse
### Initial parsing & table creation
# Parse through the dialog XML file and take all of the dialogue out for parsing, removing character names
def parse_xml():
tree = ElementTree.parse("dialog.xml")
root = tree.getroot()
pattern = r"(\w+): ([^/]*)$"
all_lines = []
for issue in root.iter("issue"):
text = issue.find("dialog").text
lines = text.split("\n")
for line in lines:
result = re.match(pattern, line)
if result:
all_lines.append(result.group(2))
return all_lines
# Table structure:
# word {
# possible_following_word: <number of instances counted>
# }
def create_table(all_lines):
table = {}
for line in all_lines:
words = line.split(" ")
for index, word in enumerate(words):
if index+1 < len(words):
if word not in table:
table[word] = {}
if words[index+1] not in table[word]:
table[word][words[index+1]] = 1
else:
table[word][words[index+1]] += 1
return table
### (de)serialization. Faster than parsing & recreating the table each time it's run.
def save_table(table):
serialized = json.JSONEncoder().encode(table)
f = open("table.json", 'w')
f.write(serialized)
f.close()
def parse_table(serialized_table):
table = json.JSONDecoder().decode(serialized_table)
return table
### Chain Generation
def weighted_probability(entries, rnd):
for word, weight in entries.iteritems():
if (rnd < weight):
return word
rnd -= weight
def generate_text(table, length):
words = []
# pick a random word to start the chain
words.append(random.choice(table.keys()))
while len(words) < length:
last_idx = len(words)-1
last_word = words[last_idx]
try:
entries = table[last_word]
except KeyError:
return " ".join(words)
probabilities = {}
total_count = sum(entries.values())
choice = random.randint(0, total_count-1)
word = weighted_probability(entries, choice)
words.append(word)
return " ".join(words)
### Command line usage
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('length', type=int, help='Maximum number of words to generate.')
args = parser.parse_args()
if not os.path.isfile("table.json"):
lines = parse_xml()
table = create_table(lines)
save_table(table)
else:
f = open("table.json")
raw_table = f.read()
f.close()
table = parse_table(raw_table)
text = generate_text(table, args.length)
print text
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment