kmwenja · December 4, 2018 06:15
diff --git a/N-Gram Similarity b/N-Gram Similarity
 An experiment doing n-gram similarity.

 Usage:

 `$ python trigram.py '<left string>' '<right string>' <number of letters in a gram>`

 Example:

 ```
 $ python trigram.py 'hello' 'hallo' 3
 Left to Right Similarity: 0.5
 Right to Left Similarity: 0.5
 ```
diff --git a/trigram.py b/trigram.py
 import sys


 def grammify(s, maxn=3):
    s = s.replace(' ', '')
    grams = []
    for n in range(1, len(s)):
        if n > maxn:
            break

        for i in range(len(s)):
            gram = s[i:i+n]
            if len(gram) == n:
                grams.append(gram)

    return grams


 def main():
    s1 = sys.argv[1]
    s2 = sys.argv[2]
    maxn = int(sys.argv[3])

    left = grammify(s1, maxn=maxn)
    right = grammify(s2, maxn=maxn)
    common = set(left).intersection(set(right))
    # print("Left grams:", left)
    # print("Right grams:", right)
    # print("Common grams:", common)

    # how similar is s1 to s2
    right_score = len(common) / len(right)
    print("Left to Right Similarity:", right_score)

    # how similar is s2 to s1
    left_score = len(common) / len(left)
    print("Right to Left Similarity:", left_score)


 if __name__ == "__main__":
    main()
	An experiment doing n-gram similarity.

	Usage:

	`$ python trigram.py '<left string>' '<right string>' <number of letters in a gram>`

	Example:

	```
	$ python trigram.py 'hello' 'hallo' 3
	Left to Right Similarity: 0.5
	Right to Left Similarity: 0.5
	```
	import sys


	def grammify(s, maxn=3):
	s = s.replace(' ', '')
	grams = []
	for n in range(1, len(s)):
	if n > maxn:
	break

	for i in range(len(s)):
	gram = s[i:i+n]
	if len(gram) == n:
	grams.append(gram)

	return grams


	def main():
	s1 = sys.argv[1]
	s2 = sys.argv[2]
	maxn = int(sys.argv[3])

	left = grammify(s1, maxn=maxn)
	right = grammify(s2, maxn=maxn)
	common = set(left).intersection(set(right))
	# print("Left grams:", left)
	# print("Right grams:", right)
	# print("Common grams:", common)

	# how similar is s1 to s2
	right_score = len(common) / len(right)
	print("Left to Right Similarity:", right_score)

	# how similar is s2 to s1
	left_score = len(common) / len(left)
	print("Right to Left Similarity:", left_score)


	if __name__ == "__main__":
	main()