frnsys · August 29, 2015 13:56 · dattasaurabh82 · Mar 23, 2015
diff --git a/markov.py b/markov.py
 # -*- coding: utf-8 -*-

 text = """
 The Eagle has landed. The regret on our side is, they used to say years ago, we are reading about you in science class. Now they say, we are reading about you in history class.

 Never in all their history have men been able truly to conceive of the world as one: a single sphere, a globe, having the qualities of a globe, a round earth in which all the directions eventually meet, in which there is no center because every point, or none, is center, an equal earth which all men occupy as equals. The airman's earth, if free men make it, will be truly round: a globe in practice, not in theory.

 For those who have seen the Earth from space, and for the hundreds and perhaps thousands more who will, the experience most certainly changes your perspective. The things that we share in our world are far more valuable than those which divide us.
 """

 import random

 # We'll use this to keep track of words
 # and the probabilities between them.
 knowledge = {}

 # Split the text into sentences (sentenceuments).
 # Clean up line breaks, lowercase everything, and remove empty strings.
 sentences = filter(None, text.lower().replace('\n', '').split('.'))

 # Generate the "knowledge".
 for sentence in sentences:
    # Split the sentence into words.
    # Splitting on whitespace is a decent approach.
    # We also remove empty strings.
    words = filter(None, sentence.split(' '))

    # We want to keep track of the start and end
    # of sentences so we know where to start and end.
    words.insert(0, '<start>')
    words.append('<stop>')

    for idx, word in enumerate(words):
        if idx < len(words) - 1:
            entry = knowledge.get(word, {})

            # Look at the next word so we can
            # build probabilities b/w words.
            next = words[idx+1]

            # Increment the count of this word
            # in the knowledge.
            if next not in entry:
                entry[next] = 0
            entry[next] += 1

            knowledge[word] = entry

 def generate():
    # Start with the start word.
    sentence = ['<start>']

    # Start picking words for the sentence!
    while(sentence[-1] != '<stop>'):
        try:
            word = weighted_choice(knowledge[ sentence[-1] ])
            sentence.append(word)
        except KeyError:
            break

    # Join the sentence, with a period for good measure.
    return ' '.join(sentence[1:-1]) + '.'


 def weighted_choice(choices):
    """
    Random selects a key from a dictionary,
    where each key's value is its probability weight.
    """
    # Randomly select a value between 0 and
    # the sum of all the weights.
    rand = random.uniform(0, sum(choices.values()))

    # Seek through the dict until a key is found
    # resulting in the random value.
    summ = 0.0
    for key, value in choices.items():
        summ += value
        if rand < summ: return key

    # If this returns False,
    # it's likely because the knowledge is empty.
    return False
diff --git a/markov_generate.py b/markov_generate.py
 def generate():
    # Start with the start word.
    sentence = ['<start>']

    # Start picking words for the sentence!
    while(sentence[-1] != '<stop>'):
        try:
            word = weighted_choice(knowledge[ sentence[-1] ])
            sentence.append(word)
        except KeyError:
            break

    # Join the sentence, with a period for good measure.
    return ' '.join(sentence[1:-1]) + '.'


 def weighted_choice(choices):
    """
    Random selects a key from a dictionary,
    where each key's value is its probability weight.
    """
    # Randomly select a value between 0 and
    # the sum of all the weights.
    rand = random.uniform(0, sum(choices.values()))

    # Seek through the dict until a key is found
    # resulting in the random value.
    summ = 0.0
    for key, value in choices.items():
        summ += value
        if rand < summ: return key

    # If this returns False,
    # it's likely because the knowledge is empty.
    return False
diff --git a/markov_training.py b/markov_training.py
 # We'll use this to keep track of words
 # and the probabilities between them.
 knowledge = {}

 # Split the text into sentences (sentenceuments).
 # Clean up line breaks, lowercase everything, and remove empty strings.
 sentences = filter(None, text.lower().replace('\n', '').split('.'))

 # Generate the "knowledge".
 for sentence in sentences:
    # Split the sentence into words.
    # Splitting on whitespace is a decent approach.
    # We also remove empty strings.
    words = filter(None, sentence.split(' '))

    # We want to keep track of the start and end
    # of sentences so we know where to start and end.
    words.insert(0, '<start>')
    words.append('<stop>')

    for idx, word in enumerate(words):
        if idx < len(words) - 1:
            entry = knowledge.get(word, {})

            # Look at the next word so we can
            # build probabilities b/w words.
            next = words[idx+1]

            # Increment the count of this word
            # in the knowledge.
            if next not in entry:
                entry[next] = 0
            entry[next] += 1

            knowledge[word] = entry
	# -- coding: utf-8 --

	text = """
	The Eagle has landed. The regret on our side is, they used to say years ago, we are reading about you in science class. Now they say, we are reading about you in history class.

	Never in all their history have men been able truly to conceive of the world as one: a single sphere, a globe, having the qualities of a globe, a round earth in which all the directions eventually meet, in which there is no center because every point, or none, is center, an equal earth which all men occupy as equals. The airman's earth, if free men make it, will be truly round: a globe in practice, not in theory.

	For those who have seen the Earth from space, and for the hundreds and perhaps thousands more who will, the experience most certainly changes your perspective. The things that we share in our world are far more valuable than those which divide us.
	"""

	import random

	# We'll use this to keep track of words
	# and the probabilities between them.
	knowledge = {}

	# Split the text into sentences (sentenceuments).
	# Clean up line breaks, lowercase everything, and remove empty strings.
	sentences = filter(None, text.lower().replace('\n', '').split('.'))

	# Generate the "knowledge".
	for sentence in sentences:
	# Split the sentence into words.
	# Splitting on whitespace is a decent approach.
	# We also remove empty strings.
	words = filter(None, sentence.split(' '))

	# We want to keep track of the start and end
	# of sentences so we know where to start and end.
	words.insert(0, '<start>')
	words.append('<stop>')

	for idx, word in enumerate(words):
	if idx < len(words) - 1:
	entry = knowledge.get(word, {})

	# Look at the next word so we can
	# build probabilities b/w words.
	next = words[idx+1]

	# Increment the count of this word
	# in the knowledge.
	if next not in entry:
	entry[next] = 0
	entry[next] += 1

	knowledge[word] = entry

	def generate():
	# Start with the start word.
	sentence = ['<start>']

	# Start picking words for the sentence!
	while(sentence[-1] != '<stop>'):
	try:
	word = weighted_choice(knowledge[ sentence[-1] ])
	sentence.append(word)
	except KeyError:
	break

	# Join the sentence, with a period for good measure.
	return ' '.join(sentence[1:-1]) + '.'


	def weighted_choice(choices):
	"""
	Random selects a key from a dictionary,
	where each key's value is its probability weight.
	"""
	# Randomly select a value between 0 and
	# the sum of all the weights.
	rand = random.uniform(0, sum(choices.values()))

	# Seek through the dict until a key is found
	# resulting in the random value.
	summ = 0.0
	for key, value in choices.items():
	summ += value
	if rand < summ: return key

	# If this returns False,
	# it's likely because the knowledge is empty.
	return False