Skip to content

Instantly share code, notes, and snippets.

@jordanorelli
Created April 2, 2012 05:11
Show Gist options
  • Save jordanorelli/2280931 to your computer and use it in GitHub Desktop.
Save jordanorelli/2280931 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import collections
import string
from string import ascii_lowercase
from sys import stderr
pair_counts = {}
# the pair_counts dict will have a key for each character that we've seen. The
# values associated to these keys will themselves be dictionaries, whose keys
# are also characters, such that pair_counts['a']['k'] would be the number of
# occurrences of the pair "ak".
pair_totals = {}
# pair_totals will also containt a key for each character that we've seen. The
# value for each one will be the total number of pairs we've seen that have
# that character as their first character.
pair_probabilities = {}
# pair_probabilities is like pair_counts, but with probabilities to see pairs
# instead of just their raw number of occurencess.
def get_probabilities():
pass
def add_pair(left, right):
"""Given two characters (left and right), adds the catenation of those
characters to our dictionary of pairs. Returns the number of times that
pair has been seen so far."""
if left not in pair_counts:
pair_counts[left] = {}
if left not in pair_totals:
pair_totals[left] = 1
else:
pair_totals[left] += 1
if right in pair_counts[left]:
pair_counts[left][right] += 1
else:
pair_counts[left][right] = 1
return pair_counts[left][right]
def tabulate_pairs(word):
"""Given a word, tabulates the pair of adjacent letters in them and updates
our pair_counts dict."""
if len(word) < 2:
return
prev = word[0]
for i in range(1, len(word)):
add_pair(prev, word[i])
prev = word[i]
def add_word(word):
tabulate_pairs(word)
get_probabilities()
f = open('/usr/share/dict/words', 'r')
for line in f:
word = line.strip().lower()
add_word(word)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment