Created
June 30, 2013 07:51
-
-
Save riyadparvez/5894283 to your computer and use it in GitHub Desktop.
Obtained from http://stackoverflow.com/a/1663745/512251.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /// <summary> | |
| /// This class implements string comparison algorithm | |
| /// based on character pair similarity | |
| /// Source: http://www.catalysoft.com/articles/StrikeAMatch.html | |
| /// </summary> | |
| public class SimilarityTool | |
| { | |
| /// <summary> | |
| /// Compares the two strings based on letter pair matches | |
| /// </summary> | |
| /// <param name="str1"></param> | |
| /// <param name="str2"></param> | |
| /// <returns>The percentage match from 0.0 to 1.0 where 1.0 is 100%</returns> | |
| public double CompareStrings(string str1, string str2) | |
| { | |
| List<string> pairs1 = WordLetterPairs(str1.ToUpper()); | |
| List<string> pairs2 = WordLetterPairs(str2.ToUpper()); | |
| int intersection = 0; | |
| int union = pairs1.Count + pairs2.Count; | |
| for (int i = 0; i < pairs1.Count; i++) | |
| { | |
| for (int j = 0; j < pairs2.Count; j++) | |
| { | |
| if (pairs1[i] == pairs2[j]) | |
| { | |
| intersection++; | |
| pairs2.RemoveAt(j);//Must remove the match to prevent "GGGG" from appearing to match "GG" with 100% success | |
| break; | |
| } | |
| } | |
| } | |
| return (2.0 * intersection) / union; | |
| } | |
| /// <summary> | |
| /// Gets all letter pairs for each | |
| /// individual word in the string | |
| /// </summary> | |
| /// <param name="str"></param> | |
| /// <returns></returns> | |
| private List<string> WordLetterPairs(string str) | |
| { | |
| List<string> AllPairs = new List<string>(); | |
| // Tokenize the string and put the tokens/words into an array | |
| string[] Words = Regex.Split(str, @"\s"); | |
| // For each word | |
| for (int w = 0; w < Words.Length; w++) | |
| { | |
| if (!string.IsNullOrEmpty(Words[w])) | |
| { | |
| // Find the pairs of characters | |
| String[] PairsInWord = LetterPairs(Words[w]); | |
| for (int p = 0; p < PairsInWord.Length; p++) | |
| { | |
| AllPairs.Add(PairsInWord[p]); | |
| } | |
| } | |
| } | |
| return AllPairs; | |
| } | |
| /// <summary> | |
| /// Generates an array containing every | |
| /// two consecutive letters in the input string | |
| /// </summary> | |
| /// <param name="str"></param> | |
| /// <returns></returns> | |
| private string[] LetterPairs(string str) | |
| { | |
| int numPairs = str.Length - 1; | |
| string[] pairs = new string[numPairs]; | |
| for (int i = 0; i < numPairs; i++) | |
| { | |
| pairs[i] = str.Substring(i, 2); | |
| } | |
| return pairs; | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment