|
from xml.etree import ElementTree |
|
import re, random, os, json, argparse |
|
|
|
|
|
### Initial parsing & table creation |
|
|
|
# Parse through the dialog XML file and take all of the dialogue out for parsing, removing character names |
|
def parse_xml(): |
|
tree = ElementTree.parse("dialog.xml") |
|
root = tree.getroot() |
|
|
|
pattern = r"(\w+): ([^/]*)$" |
|
|
|
all_lines = [] |
|
for issue in root.iter("issue"): |
|
text = issue.find("dialog").text |
|
lines = text.split("\n") |
|
|
|
for line in lines: |
|
result = re.match(pattern, line) |
|
if result: |
|
all_lines.append(result.group(2)) |
|
|
|
return all_lines |
|
|
|
# Table structure: |
|
# word { |
|
# possible_following_word: <number of instances counted> |
|
# } |
|
|
|
def create_table(all_lines): |
|
table = {} |
|
|
|
for line in all_lines: |
|
words = line.split(" ") |
|
for index, word in enumerate(words): |
|
if index+1 < len(words): |
|
if word not in table: |
|
table[word] = {} |
|
if words[index+1] not in table[word]: |
|
table[word][words[index+1]] = 1 |
|
else: |
|
table[word][words[index+1]] += 1 |
|
|
|
return table |
|
|
|
|
|
### (de)serialization. Faster than parsing & recreating the table each time it's run. |
|
|
|
def save_table(table): |
|
serialized = json.JSONEncoder().encode(table) |
|
f = open("table.json", 'w') |
|
f.write(serialized) |
|
f.close() |
|
|
|
|
|
def parse_table(serialized_table): |
|
table = json.JSONDecoder().decode(serialized_table) |
|
return table |
|
|
|
|
|
### Chain Generation |
|
|
|
def weighted_probability(entries, rnd): |
|
for word, weight in entries.iteritems(): |
|
if (rnd < weight): |
|
return word |
|
rnd -= weight |
|
|
|
|
|
def generate_text(table, length): |
|
words = [] |
|
|
|
# pick a random word to start the chain |
|
words.append(random.choice(table.keys())) |
|
|
|
while len(words) < length: |
|
last_idx = len(words)-1 |
|
last_word = words[last_idx] |
|
|
|
try: |
|
entries = table[last_word] |
|
except KeyError: |
|
return " ".join(words) |
|
|
|
probabilities = {} |
|
|
|
total_count = sum(entries.values()) |
|
choice = random.randint(0, total_count-1) |
|
word = weighted_probability(entries, choice) |
|
words.append(word) |
|
|
|
return " ".join(words) |
|
|
|
|
|
### Command line usage |
|
|
|
if __name__ == "__main__": |
|
parser = argparse.ArgumentParser() |
|
parser.add_argument('length', type=int, help='Maximum number of words to generate.') |
|
|
|
args = parser.parse_args() |
|
|
|
if not os.path.isfile("table.json"): |
|
lines = parse_xml() |
|
table = create_table(lines) |
|
save_table(table) |
|
else: |
|
f = open("table.json") |
|
raw_table = f.read() |
|
f.close() |
|
|
|
table = parse_table(raw_table) |
|
|
|
text = generate_text(table, args.length) |
|
print text |