CasperCL · May 10, 2018 16:07
diff --git a/markov.py b/markov.py
 """ 
 A simple implementation of a n-gram (2-gram) 
 Markov model written for Python 3.6+
 """ 

 import re
 import random
 from typing import List
 from collections import defaultdict


 def remove_symbols(str) -> str:
    return re.sub(r'[^\w]', ' ', str)


 def normalise(word: str) -> str:
    return remove_symbols(word).lower()


 def train(corpus: str) -> dict:
    model = defaultdict(list)
    for index, word in enumerate(words):
        if index +1 >= len(words): break
        next_ = words[index + 1]
        model[word].append(next_)
    return model


 def predict(model: dict, word: str) -> str:
    word = normalise(word)
    if word not in model: return None
    return random.choice(model[word])


 def generate_sentence(model: dict, start_word: str) -> str:
    current = start_word
    words = [start_word]
    for i in range(0, 10):
        current = predict(model, current)
        words.append(current)
    return " ".join(words)

 if __name__ == '__main__':
    corpus: str = "We are in the process of writing a book on Mathematics for Machine Learning that motivates people to learn mathematical concepts. The book is not intended to cover advanced machine learning techniques because there are already plenty of books doing this. Instead, we aim to provide the necessary mathematical skills to read those other books."
    words: List[str] = [normalise(word) for word in corpus.split(' ')]
    model: dict = train(" ".join(words))
    sentence: str = generate_sentence(model, 'Mathematics')
	"""
	A simple implementation of a n-gram (2-gram)
	Markov model written for Python 3.6+
	"""

	import re
	import random
	from typing import List
	from collections import defaultdict


	def remove_symbols(str) -> str:
	return re.sub(r'[^\w]', ' ', str)


	def normalise(word: str) -> str:
	return remove_symbols(word).lower()


	def train(corpus: str) -> dict:
	model = defaultdict(list)
	for index, word in enumerate(words):
	if index +1 >= len(words): break
	next_ = words[index + 1]
	model[word].append(next_)
	return model


	def predict(model: dict, word: str) -> str:
	word = normalise(word)
	if word not in model: return None
	return random.choice(model[word])


	def generate_sentence(model: dict, start_word: str) -> str:
	current = start_word
	words = [start_word]
	for i in range(0, 10):
	current = predict(model, current)
	words.append(current)
	return " ".join(words)

	if __name__ == '__main__':
	corpus: str = "We are in the process of writing a book on Mathematics for Machine Learning that motivates people to learn mathematical concepts. The book is not intended to cover advanced machine learning techniques because there are already plenty of books doing this. Instead, we aim to provide the necessary mathematical skills to read those other books."
	words: List[str] = [normalise(word) for word in corpus.split(' ')]
	model: dict = train(" ".join(words))
	sentence: str = generate_sentence(model, 'Mathematics')