Created
October 16, 2014 22:51
-
-
Save 1328/0a0123cc93f8a2cc8f24 to your computer and use it in GitHub Desktop.
round 2
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from string import ascii_lowercase | |
| from collections import defaultdict | |
| from pprint import pprint | |
| def add_word(matrix, word): | |
| ''' | |
| this modifies a letter count matrix to add in the pairs | |
| the matrix stores things as follows: | |
| matrix[first][second] = count of times first+second appear in word | |
| Note: consider if you want to have somethign that recognizes some letters | |
| are more common at the end of things | |
| Also note this does not work with one letter words, so consider how you want | |
| to handle | |
| ''' | |
| word = word.lower() | |
| for a,b in zip(word,word[1:]): | |
| matrix[a][b] = matrix[a].get(b,0) + 1 | |
| def letter_probs(m): | |
| ''' | |
| takes a part of the letter count matrix to build the stochastic matrix for | |
| a particular first letter. I.e.: | |
| letter_probs(matrix['a']) will return a dictionary of k:v where: | |
| k = second letter | |
| v = percentage of times second letter appears after first | |
| Note: sum of all vs = 1 | |
| ''' | |
| total = sum([c for c in m.values()]) | |
| result = {l:p/total for l,p in m.items()} | |
| return result | |
| def build_stochastic(matrix): | |
| ''' | |
| takes a letter count matrix and returns a new dictionary of matrixs | |
| each key in new matrix is a letter | |
| the value = a new stochastic matrix of following letters: probability | |
| ''' | |
| result = {l:letter_probs(matrix[l]) for l in matrix} | |
| return result | |
| matrix = defaultdict(dict) | |
| add_word(matrix, 'hello') | |
| add_word(matrix, 'jello') | |
| add_word(matrix, 'ham') | |
| add_word(matrix, 'dam') | |
| add_word(matrix, 'dan') | |
| add_word(matrix, 'dap') | |
| #print(letter_probs(matrix['a'])) | |
| print(build_stochastic(matrix)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment