jan-g · April 26, 2017 14:12
diff --git a/digraph b/digraph
 #!/usr/bin/env python3

 import collections
 import argparse
 import itertools


 def load_ngraphs(fn, n):
    """Load in a set of ngraph counts from a source corpus"""

    # Read in ngraph
    c = collections.Counter()

    with open(fn) as f:

        # Go through each line of the OSW (or your other source of sample words)
        # Pad each word with a bracketing character, $apple$
        # Split each word into a sequence of ngraphs, $a ap pp pl le e$
        # Count up all of those ngraphs.

        for line in f:
            line = '$' + line.strip() + '$'

            for ngraph in [line[i:i + n] for i in range(len(line) - n + 1)]:
                c[ngraph] += 1

    total = sum(c[k] for k in c)
    return {k: c[k] / total for k in c}


 def score(freqs, word, n):
    """Word out the probabilistic score for this particular word"""

    product = 1
    word = '$' + word + '$'

    for ngraph in [word[i:i + n] for i in range(len(word) - n + 1)]:
        product *= freqs.get(ngraph, 0)

    return product


 def hunt(freqs, word, n):
    return {anag: score(freqs, anag, n)
            for anag in set(map(''.join, itertools.permutations(word)))}
        

 def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--dict', default='source.txt', help='word list source')
    parser.add_argument('-n', default=2, type=int, help='length of n-gram to use')
    parser.add_argument('anag', help='word to look for an anagram for')

    args = parser.parse_args()

    c = load_ngraphs(args.dict, args.n)

    results = hunt(c, args.anag, args.n)
    m = max(results[k] for k in results)
    by_score = sorted([(results[k] / m, k) for k in results if results[k] > 0], reverse=True)

    for i, (score, word) in enumerate(by_score[:100]):
        print(i, score, word)


 if __name__ == '__main__':
    main()
	#!/usr/bin/env python3

	import collections
	import argparse
	import itertools


	def load_ngraphs(fn, n):
	"""Load in a set of ngraph counts from a source corpus"""

	# Read in ngraph
	c = collections.Counter()

	with open(fn) as f:

	# Go through each line of the OSW (or your other source of sample words)
	# Pad each word with a bracketing character, $apple$
	# Split each word into a sequence of ngraphs, $a ap pp pl le e$
	# Count up all of those ngraphs.

	for line in f:
	line = '$' + line.strip() + '$'

	for ngraph in [line[i:i + n] for i in range(len(line) - n + 1)]:
	c[ngraph] += 1

	total = sum(c[k] for k in c)
	return {k: c[k] / total for k in c}


	def score(freqs, word, n):
	"""Word out the probabilistic score for this particular word"""

	product = 1
	word = '$' + word + '$'

	for ngraph in [word[i:i + n] for i in range(len(word) - n + 1)]:
	product *= freqs.get(ngraph, 0)

	return product


	def hunt(freqs, word, n):
	return {anag: score(freqs, anag, n)
	for anag in set(map(''.join, itertools.permutations(word)))}


	def main():
	parser = argparse.ArgumentParser()
	parser.add_argument('--dict', default='source.txt', help='word list source')
	parser.add_argument('-n', default=2, type=int, help='length of n-gram to use')
	parser.add_argument('anag', help='word to look for an anagram for')

	args = parser.parse_args()

	c = load_ngraphs(args.dict, args.n)

	results = hunt(c, args.anag, args.n)
	m = max(results[k] for k in results)
	by_score = sorted([(results[k] / m, k) for k in results if results[k] > 0], reverse=True)

	for i, (score, word) in enumerate(by_score[:100]):
	print(i, score, word)


	if __name__ == '__main__':
	main()