Created
June 29, 2013 12:53
-
-
Save jbowles/5890992 to your computer and use it in GitHub Desktop.
Part three of Levenshtein distance
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import "fmt" | |
// PART THREE: if characters are not the same, we step through a comparison against each character to | |
//// determine DELETION, INSERTION, SUBSTITUTION and get the minimum of the three values. | |
func MinInt(a ...int) int { | |
min := int(^uint(0) >> 1) | |
for _, i := range a { | |
if i < min { | |
min = i | |
} | |
} | |
return min | |
} | |
func LevenshteinThree(s1, s2 string) { | |
m := len(s1) | |
n := len(s2) | |
width := n - 1 | |
fmt.Println("width =",width) | |
vcell := make([]int, m * n) | |
fmt.Println("making vcell",len(vcell), "elements long") | |
// cells for i of m(s1) | |
for i := 1; i < m; i++ { | |
vcell[i * width + 0] = i | |
fmt.Println("i=",i, "for len(str1)==",m, "at vector cell", vcell[i * width + 0]*width) | |
} | |
// cell for j of n(s2) | |
for j := 1; j < n; j++ { | |
vcell[0 * width + j] = j | |
fmt.Println("j=",j, "for len(str2)==",n, "at vector cell", vcell[0 * width + j]) | |
} | |
fmt.Println("Vector Cell:\n", vcell) | |
fmt.Println("never compare 0-th index:", string(s1[0]),string(s2[0])) | |
count := 0 | |
for j := 1; j < n; j++ { | |
for i := 1; i < m; i++ { | |
if s1[i] == s2[j] { | |
fmt.Println("dropping into true for s1[i] == s2[j]",string(s1[i]),string(s2[j])) | |
vcell[i * width + j] = vcell[(i - 1) * width + (j - 1)] | |
count += 1 | |
fmt.Println(count) | |
} else { | |
fmt.Println(" &&&&& not true for s1[i] == s2[j]",string(s1[i]),string(s2[j])) | |
deletion := vcell[(i - 1) * width + j] + 1 | |
fmt.Println(i,"- 1", "*",width,"+",j, "+ 1") | |
fmt.Println("deletion:",deletion) | |
insertion := vcell[(i * width + (j - 1))] + 1 | |
fmt.Println(i,"*",width,"+",j,"- 1 + 1") | |
fmt.Println("insertion:",insertion) | |
sub := vcell[((i - 1) * width + (j - 1))] + 1 | |
fmt.Println(i," - 1","*",width,"+",j,"- 1 + 1") | |
fmt.Println("substitution:",sub) | |
count += 1 | |
fmt.Println(count) | |
fmt.Println("MinInt ************* :",MinInt(deletion,insertion,sub)) | |
} | |
} | |
} | |
fmt.Println("operated on",count, "cells") | |
} | |
func main() { | |
LevenshteinThree("str","str") | |
} | |
/* | |
*** OUTPUT | |
width = 2 | |
making vcell 9 elements long | |
i= 1 for len(str1)== 3 at vector cell 2 | |
i= 2 for len(str1)== 3 at vector cell 4 | |
j= 1 for len(str2)== 3 at vector cell 1 | |
j= 2 for len(str2)== 3 at vector cell 2 | |
Vector Cell: | |
[0 1 2 0 2 0 0 0 0] | |
never compare 0-th index: s s | |
dropping into true for s1[i] == s2[j] t t | |
1 | |
&&&&& not true for s1[i] == s2[j] r t | |
2 - 1 * 2 + 1 + 1 | |
deletion: 1 | |
2 * 2 + 1 - 1 + 1 | |
insertion: 3 | |
2 - 1 * 2 + 1 - 1 + 1 | |
substitution: 3 | |
2 | |
MinInt ************* : 1 | |
&&&&& not true for s1[i] == s2[j] t r | |
1 - 1 * 2 + 2 + 1 | |
deletion: 3 | |
1 * 2 + 2 - 1 + 1 | |
insertion: 1 | |
1 - 1 * 2 + 2 - 1 + 1 | |
substitution: 2 | |
3 | |
MinInt ************* : 1 | |
dropping into true for s1[i] == s2[j] r r | |
4 | |
operated on 4 cells | |
*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment