Last active
April 10, 2018 14:16
-
-
Save ylt6/a7d937bb92f0edd27648b63bfe477e0d to your computer and use it in GitHub Desktop.
courses.csail.mit.edu/6.006/fall11/lectures/lecture2
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import math | |
def make_words(document): | |
return document.split() | |
def make_frequency(document): | |
frequency = {} | |
for w in make_words(document): | |
if w in frequency: | |
frequency[w] += 1 | |
else: | |
frequency[w] = 1 | |
return frequency | |
def dot_product(f1, f2): | |
sum = 0 | |
for k in f1: | |
if k in f2: | |
sum += f1[k] * f2[k] | |
return sum | |
def d(d1, d2): | |
d1f = make_frequency(d1) | |
d2f = make_frequency(d2) | |
return math.acos(dot_product(d1f, d2f)/math.sqrt(dot_product(d1f, d1f) * dot_product(d2f, d2f))) | |
if __name__ == '__main__': | |
file1 = sys.argv[1] | |
file2 = sys.argv[2] | |
with open(file1) as f1: | |
d1 = f1.read() | |
with open(file2) as f2: | |
d2 = f2.read() | |
print(d(d1,d2)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment