nvanderw · April 25, 2013 21:52
diff --git a/zalgo.py b/zalgo.py
 from __future__ import division

 import sys
 from random import SystemRandom

 # A list of unicode combining characters
 COMBINING = [unichr(c) for c in xrange(0x300, 0x370)]

 ZalgoError = KeyError

 def zalgoify(source, rand):
    """
    Given a string and a random number generator, give a zalgo string
    """
    for char in source:
        yield char # Emit the character

        # Use geometric R.V. to determine number of additional chars
        expected_chars = 5
        max_chars = len(COMBINING)
        numchars = 0

        while rand.random() > 1/expected_chars and numchars < max_chars:
            numchars += 1

        for ch in rand.sample(COMBINING, numchars):
            yield ch

 def gen_markov(source, rand):
    """
    Given an input source of tokens, generate an infinite sequence of words
    using a Markov model
    """


    def get_transition_map(source):
        """
        Given a source, which is an iterable sequence of words, generate a
        dictionary of transitions.

        The transitions dictionary maps a word to another dictionary of
        the possible words we could emit next.

        This second-level dictionary maps "next words" to the number of
        times this transition has occurred in the text.
        """
        transitions = {}
        last_token = None
        for token in source:
            if last_token is not None:
                if not last_token in transitions:
                    transitions[last_token] = {}
                if not token in transitions[last_token]:
                    transitions[last_token][token] = 0

                transitions[last_token][token] += 1

            last_token = token
        
        return transitions 

    def get_frequency_map(transitions):
        """
        Given a map of absolute transition occurrences like the
        output of get_transition_map, scales all of the transitions
        from each word to sum to 1 so that each word has a
        probability mass function of possible transitions.
        """

        freqs = {}
        for (token, trans) in transitions.iteritems():
            freqs[token] = {}

            scaling_factor = 0
            for (next_token, count) in transitions[token].iteritems():
                scaling_factor += count

            for (next_token, count) in transitions[token].iteritems():
                freqs[token][next_token] = count / scaling_factor
        return freqs

    def select_from_pmf(pmf):
        """
        Given a probability mass function, which is a dictionary mapping
        items to their probabilities, randomly choose one
        """
        s = 0
        selector = rand.random()

        for (item, prob) in pmf.iteritems():
            s += prob
            if s > selector:
                return item

    freqs = get_frequency_map(get_transition_map(source))

    # Choose a first word randomly and begin transitioning
    word = rand.choice(freqs.keys())
    while True:
        yield word
        try:
            word = select_from_pmf(freqs[word])
        except ZalgoError:
            word = rand.choice(freqs.keys())

 TEXT = """oh god the horror it comes it lurks in the shadows oh god oh god
 oh no why why oh why no no the fear not the oh it can't be why me parsing
 XML with regex it is torture the madness it cannot be fear as it is"""

 def main():
    def words_to_characters(source):
        for word in source:
            for character in word:
                yield character
            yield " "

    rand = SystemRandom()

    chars = words_to_characters(gen_markov(TEXT.split(), rand))
    zalgoed = zalgoify(chars, rand)

    for char in zalgoed:
        sys.stdout.write(char)

 if __name__ == '__main__':
    main()
	from __future__ import division

	import sys
	from random import SystemRandom

	# A list of unicode combining characters
	COMBINING = [unichr(c) for c in xrange(0x300, 0x370)]

	ZalgoError = KeyError

	def zalgoify(source, rand):
	"""
	Given a string and a random number generator, give a zalgo string
	"""
	for char in source:
	yield char # Emit the character

	# Use geometric R.V. to determine number of additional chars
	expected_chars = 5
	max_chars = len(COMBINING)
	numchars = 0

	while rand.random() > 1/expected_chars and numchars < max_chars:
	numchars += 1

	for ch in rand.sample(COMBINING, numchars):
	yield ch

	def gen_markov(source, rand):
	"""
	Given an input source of tokens, generate an infinite sequence of words
	using a Markov model
	"""


	def get_transition_map(source):
	"""
	Given a source, which is an iterable sequence of words, generate a
	dictionary of transitions.

	The transitions dictionary maps a word to another dictionary of
	the possible words we could emit next.

	This second-level dictionary maps "next words" to the number of
	times this transition has occurred in the text.
	"""
	transitions = {}
	last_token = None
	for token in source:
	if last_token is not None:
	if not last_token in transitions:
	transitions[last_token] = {}
	if not token in transitions[last_token]:
	transitions[last_token][token] = 0

	transitions[last_token][token] += 1

	last_token = token

	return transitions

	def get_frequency_map(transitions):
	"""
	Given a map of absolute transition occurrences like the
	output of get_transition_map, scales all of the transitions
	from each word to sum to 1 so that each word has a
	probability mass function of possible transitions.
	"""

	freqs = {}
	for (token, trans) in transitions.iteritems():
	freqs[token] = {}

	scaling_factor = 0
	for (next_token, count) in transitions[token].iteritems():
	scaling_factor += count

	for (next_token, count) in transitions[token].iteritems():
	freqs[token][next_token] = count / scaling_factor
	return freqs

	def select_from_pmf(pmf):
	"""
	Given a probability mass function, which is a dictionary mapping
	items to their probabilities, randomly choose one
	"""
	s = 0
	selector = rand.random()

	for (item, prob) in pmf.iteritems():
	s += prob
	if s > selector:
	return item

	freqs = get_frequency_map(get_transition_map(source))

	# Choose a first word randomly and begin transitioning
	word = rand.choice(freqs.keys())
	while True:
	yield word
	try:
	word = select_from_pmf(freqs[word])
	except ZalgoError:
	word = rand.choice(freqs.keys())

	TEXT = """oh god the horror it comes it lurks in the shadows oh god oh god
	oh no why why oh why no no the fear not the oh it can't be why me parsing
	XML with regex it is torture the madness it cannot be fear as it is"""

	def main():
	def words_to_characters(source):
	for word in source:
	for character in word:
	yield character
	yield " "

	rand = SystemRandom()

	chars = words_to_characters(gen_markov(TEXT.split(), rand))
	zalgoed = zalgoify(chars, rand)

	for char in zalgoed:
	sys.stdout.write(char)

	if __name__ == '__main__':
	main()