Created
April 25, 2013 21:52
-
-
Save nvanderw/5463517 to your computer and use it in GitHub Desktop.
Zalgo
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import division | |
import sys | |
from random import SystemRandom | |
# A list of unicode combining characters | |
COMBINING = [unichr(c) for c in xrange(0x300, 0x370)] | |
ZalgoError = KeyError | |
def zalgoify(source, rand): | |
""" | |
Given a string and a random number generator, give a zalgo string | |
""" | |
for char in source: | |
yield char # Emit the character | |
# Use geometric R.V. to determine number of additional chars | |
expected_chars = 5 | |
max_chars = len(COMBINING) | |
numchars = 0 | |
while rand.random() > 1/expected_chars and numchars < max_chars: | |
numchars += 1 | |
for ch in rand.sample(COMBINING, numchars): | |
yield ch | |
def gen_markov(source, rand): | |
""" | |
Given an input source of tokens, generate an infinite sequence of words | |
using a Markov model | |
""" | |
def get_transition_map(source): | |
""" | |
Given a source, which is an iterable sequence of words, generate a | |
dictionary of transitions. | |
The transitions dictionary maps a word to another dictionary of | |
the possible words we could emit next. | |
This second-level dictionary maps "next words" to the number of | |
times this transition has occurred in the text. | |
""" | |
transitions = {} | |
last_token = None | |
for token in source: | |
if last_token is not None: | |
if not last_token in transitions: | |
transitions[last_token] = {} | |
if not token in transitions[last_token]: | |
transitions[last_token][token] = 0 | |
transitions[last_token][token] += 1 | |
last_token = token | |
return transitions | |
def get_frequency_map(transitions): | |
""" | |
Given a map of absolute transition occurrences like the | |
output of get_transition_map, scales all of the transitions | |
from each word to sum to 1 so that each word has a | |
probability mass function of possible transitions. | |
""" | |
freqs = {} | |
for (token, trans) in transitions.iteritems(): | |
freqs[token] = {} | |
scaling_factor = 0 | |
for (next_token, count) in transitions[token].iteritems(): | |
scaling_factor += count | |
for (next_token, count) in transitions[token].iteritems(): | |
freqs[token][next_token] = count / scaling_factor | |
return freqs | |
def select_from_pmf(pmf): | |
""" | |
Given a probability mass function, which is a dictionary mapping | |
items to their probabilities, randomly choose one | |
""" | |
s = 0 | |
selector = rand.random() | |
for (item, prob) in pmf.iteritems(): | |
s += prob | |
if s > selector: | |
return item | |
freqs = get_frequency_map(get_transition_map(source)) | |
# Choose a first word randomly and begin transitioning | |
word = rand.choice(freqs.keys()) | |
while True: | |
yield word | |
try: | |
word = select_from_pmf(freqs[word]) | |
except ZalgoError: | |
word = rand.choice(freqs.keys()) | |
TEXT = """oh god the horror it comes it lurks in the shadows oh god oh god | |
oh no why why oh why no no the fear not the oh it can't be why me parsing | |
XML with regex it is torture the madness it cannot be fear as it is""" | |
def main(): | |
def words_to_characters(source): | |
for word in source: | |
for character in word: | |
yield character | |
yield " " | |
rand = SystemRandom() | |
chars = words_to_characters(gen_markov(TEXT.split(), rand)) | |
zalgoed = zalgoify(chars, rand) | |
for char in zalgoed: | |
sys.stdout.write(char) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment