Skip to content

Instantly share code, notes, and snippets.

@1328
Created October 16, 2014 22:51
Show Gist options
  • Select an option

  • Save 1328/0a0123cc93f8a2cc8f24 to your computer and use it in GitHub Desktop.

Select an option

Save 1328/0a0123cc93f8a2cc8f24 to your computer and use it in GitHub Desktop.
round 2
from string import ascii_lowercase
from collections import defaultdict
from pprint import pprint
def add_word(matrix, word):
'''
this modifies a letter count matrix to add in the pairs
the matrix stores things as follows:
matrix[first][second] = count of times first+second appear in word
Note: consider if you want to have somethign that recognizes some letters
are more common at the end of things
Also note this does not work with one letter words, so consider how you want
to handle
'''
word = word.lower()
for a,b in zip(word,word[1:]):
matrix[a][b] = matrix[a].get(b,0) + 1
def letter_probs(m):
'''
takes a part of the letter count matrix to build the stochastic matrix for
a particular first letter. I.e.:
letter_probs(matrix['a']) will return a dictionary of k:v where:
k = second letter
v = percentage of times second letter appears after first
Note: sum of all vs = 1
'''
total = sum([c for c in m.values()])
result = {l:p/total for l,p in m.items()}
return result
def build_stochastic(matrix):
'''
takes a letter count matrix and returns a new dictionary of matrixs
each key in new matrix is a letter
the value = a new stochastic matrix of following letters: probability
'''
result = {l:letter_probs(matrix[l]) for l in matrix}
return result
matrix = defaultdict(dict)
add_word(matrix, 'hello')
add_word(matrix, 'jello')
add_word(matrix, 'ham')
add_word(matrix, 'dam')
add_word(matrix, 'dan')
add_word(matrix, 'dap')
#print(letter_probs(matrix['a']))
print(build_stochastic(matrix))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment