Last active
May 24, 2017 02:37
-
-
Save khrlimam/c523f6ad147e1d809431b8937cc05cd9 to your computer and use it in GitHub Desktop.
Uji similaritas, this file only works on project dir with activated working environment
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from lsirina.lsi.similarity import calc_similarity | |
d1 = "Shipment of gold damaged in a fire" | |
d2 = "Delivery of silver arrived in a silver truck" | |
d3 = "Shipment of gold arrived in a truck" | |
query = "gold silver truck" | |
docs = [d1,d2,d3] | |
tokenized_doc = [d.split() for d in docs] | |
sim = calc_similarity(query, tokenized_doc) | |
sort_by_most_valid = filter(lambda x: x[1] > 0, sorted(enumerate(sim), key=lambda item: -item[1])) | |
for index, sim in sort_by_most_valid: | |
print "dokumen %s: %s, nilai similaritas: %s" % (index+1, docs[index], sim) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment