Skip to content

Instantly share code, notes, and snippets.

@CasperCL
Last active May 10, 2018 16:07
Show Gist options
  • Save CasperCL/bfc99f77d1b57dd5434aea5abf629d98 to your computer and use it in GitHub Desktop.
Save CasperCL/bfc99f77d1b57dd5434aea5abf629d98 to your computer and use it in GitHub Desktop.
Simple implementation of a Markov model
"""
A simple implementation of a n-gram (2-gram)
Markov model written for Python 3.6+
"""
import re
import random
from typing import List
from collections import defaultdict
def remove_symbols(str) -> str:
return re.sub(r'[^\w]', ' ', str)
def normalise(word: str) -> str:
return remove_symbols(word).lower()
def train(corpus: str) -> dict:
model = defaultdict(list)
for index, word in enumerate(words):
if index +1 >= len(words): break
next_ = words[index + 1]
model[word].append(next_)
return model
def predict(model: dict, word: str) -> str:
word = normalise(word)
if word not in model: return None
return random.choice(model[word])
def generate_sentence(model: dict, start_word: str) -> str:
current = start_word
words = [start_word]
for i in range(0, 10):
current = predict(model, current)
words.append(current)
return " ".join(words)
if __name__ == '__main__':
corpus: str = "We are in the process of writing a book on Mathematics for Machine Learning that motivates people to learn mathematical concepts. The book is not intended to cover advanced machine learning techniques because there are already plenty of books doing this. Instead, we aim to provide the necessary mathematical skills to read those other books."
words: List[str] = [normalise(word) for word in corpus.split(' ')]
model: dict = train(" ".join(words))
sentence: str = generate_sentence(model, 'Mathematics')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment