Created
March 31, 2015 05:41
-
-
Save spundun/36381fdd830f1da282a7 to your computer and use it in GitHub Desktop.
Similarity based on Levenshtein Distance
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// main.cpp | |
// StringSimilarity | |
// | |
// Created by Spundun Bhatt on 3/30/15. | |
// Copyright (c) 2015 Spundun Bhatt. All rights reserved. | |
// | |
#include <iostream> | |
#include <memory> | |
#include <vector> | |
#include <cmath> | |
using namespace std; | |
size_t LevDistance(string s1, string s2) { | |
size_t l1 = s1.length(); | |
size_t l2 = s2.length(); | |
if(l1==0) return l2; | |
if(l2==0) return l1; | |
shared_ptr<vector<size_t>> prev(new vector<size_t>(l1)), cur(new vector<size_t>(l1)); | |
////////////////////////////////// | |
// This code populates the first row. | |
if (s1[0]==s2[0]) { | |
(*prev)[0] = 0; | |
} else { | |
(*prev)[0] = 1; | |
} | |
for (size_t i = 1; i< l1; i++) { | |
if (s1[i] == s2[0]) { | |
(*prev)[i] = i; | |
} else { | |
(*prev)[i] = min(i+1, (*prev)[i-1]+1); | |
} | |
} | |
////////////////////////////////// | |
for (size_t i=1; i<l2; i++) { | |
if (s1[0] == s2[i]) { | |
(*cur)[0] = i; | |
}else { | |
(*cur)[0] = min(i+1, (*prev)[0]+1); | |
} | |
for (size_t j=1; j<l1; j++) { | |
if(s1[j] == s2[i]) { | |
(*cur)[j] = (*prev)[j-1]; | |
} else { | |
(*cur)[j] = min((*prev)[j]+1, (*cur)[j-1] + 1); | |
} | |
} | |
cur.swap(prev); | |
} | |
return (*prev)[l1-1]; | |
} | |
double SimExp(string s1, string s2) { | |
if (s1.length()==0 && s2.length()==0) { | |
return 1; | |
} | |
return exp(-((double)LevDistance(s1, s2))/(double)max(s1.length(),s2.length())); | |
} | |
double SimLinear(string s1, string s2) { | |
if (s1.length()==0 && s2.length()==0) { | |
return 1; | |
} | |
return 1-((double)LevDistance(s1, s2))/(double)(max(s1.length(),s2.length())); | |
} | |
int main(int argc, const char * argv[]) { | |
while(true){ | |
string s1, s2; | |
//cin>> s1; | |
//cin>> s2; | |
getline(cin, s1); | |
getline(cin, s2); | |
cout << "Two Strings \"" << s1 << "\", \"" << s2 << "\"" << endl << | |
"... Similarity(exp): " << SimExp(s1, s2) << " ... Similarity(linear): " << SimLinear(s1, s2) << endl; | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment