Last active
August 29, 2015 14:01
-
-
Save inky/1d79c76804f8a0bc7784 to your computer and use it in GitHub Desktop.
creepy shakespeare
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Requires shakespeare_sonnets.json from https://github.com/dariusk/corpora | |
| python transform_data.py > lines.txt | |
| python creep.py |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import random | |
| import sys | |
| PATTERNS = ( | |
| '{} ;)', | |
| '{} ;)', | |
| '{} ;)', | |
| '{} ;)', | |
| '{} ;)', | |
| '{} ;)', | |
| '{} ;)', | |
| '{} ;) x', | |
| '{} :/', | |
| '{} lol', | |
| '{} lol ;)', | |
| '{} ;) lol', | |
| 'haha {} ;)', | |
| '{} haha ;)', | |
| '{} ;) haha', | |
| ) | |
| def source_lines(): | |
| lines = [] | |
| with open('lines.txt') as fp: | |
| for line in fp: | |
| lines.append(line.strip()) | |
| return lines | |
| def creep(): | |
| line = random.choice(source_lines()) | |
| pattern = random.choice(PATTERNS) | |
| return pattern.format(line) | |
| if __name__ == '__main__': | |
| for _ in range(10): | |
| print creep() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import json | |
| SEP_CHARS = '.,;?!:' | |
| REMOVE_CHARS = '\'"' | |
| def parse_lines(s): | |
| lines = [] | |
| s = s.lower().replace('--', ' ') | |
| for sep in SEP_CHARS: | |
| s = s.replace(sep, '\n') | |
| for rem in REMOVE_CHARS: | |
| s = s.replace(rem, '') | |
| def replace_word(word): | |
| if word in ('you', 'thou'): | |
| return 'u' | |
| elif word in ('your', 'thine'): | |
| return 'ur' | |
| elif word in ('why', ): | |
| return 'y' | |
| else: | |
| return word | |
| for line in s.split('\n'): | |
| words = line.split() | |
| if 1 < len(words) < 5: | |
| lines.append(' '.join(replace_word(word) for word in words)) | |
| return lines | |
| if __name__ == '__main__': | |
| lines = [] | |
| with open('shakespeare_sonnets.json') as fp: | |
| for sonnet in json.load(fp)['sonnets']: | |
| for line in sonnet['lines']: | |
| lines.extend(parse_lines(line)) | |
| print '\n'.join(lines) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment