Created
June 27, 2014 05:54
-
-
Save bgreenlee/52d93a1d8fa1b8c1f38b to your computer and use it in GitHub Desktop.
Levenshtein Distance in Swift
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Levenshtein edit distance calculator | |
* Usage: levenstein <string> <string> | |
* | |
* To compile: | |
* sudo xcode-select -switch /Applications/Xcode6-Beta.app/Contents/Developer | |
* xcrun swift -sdk $(xcrun --show-sdk-path --sdk macosx) levenshtein.swift | |
*/ | |
import Foundation | |
// return minimum value in a list of Ints | |
func min(numbers: Int...) -> Int { | |
return numbers.reduce(numbers[0], {$0 < $1 ? $0 : $1}) | |
} | |
func levenshtein(aStr: String, bStr: String) -> Int { | |
// create character arrays | |
let a = Array(aStr) | |
let b = Array(bStr) | |
// initialize matrix of size |a|+1 * |b|+1 to zero | |
var dist = Int[][]() | |
for row in 0...a.count { | |
dist.append(Int[](count: b.count + 1, repeatedValue: 0)) | |
} | |
// 'a' prefixes can be transformed into empty string by deleting every char | |
for i in 1...a.count { | |
dist[i][0] = i | |
} | |
// 'b' prefixes can be created from empty string by inserting every char | |
for j in 1...b.count { | |
dist[0][j] = j | |
} | |
for i in 1...a.count { | |
for j in 1...b.count { | |
if a[i-1] == b[j-1] { | |
dist[i][j] = dist[i-1][j-1] // noop | |
} else { | |
dist[i][j] = min( | |
dist[i-1][j] + 1, // deletion | |
dist[i][j-1] + 1, // insertion | |
dist[i-1][j-1] + 1 // substitution | |
) | |
} | |
} | |
} | |
return dist[a.count][b.count] | |
} | |
if Process.arguments.count != 3 { | |
println("Usage: levenstein <string> <string>") | |
exit(0) | |
} | |
println(levenshtein(Process.arguments[1], Process.arguments[2])) |
I went and improved on the prior versions to incorporate all of the optimizations available in Swift 2.2. You can find the gist here: https://gist.github.com/TheDarkCode/341ec5b84c078a0be1887c2c58f6d929
However, I've also turned it into a cocoapod: https://www.github.com/TheDarkCode/SwiftyLevenshtein
Statements like this for i in 1...a.count
will lead to crash if a.count
is 0.
Always use something like 1..<count
to avoid any possibility of crashing.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Here is my (more generalized) version: https://gist.github.com/shergin/a6dba6ee5ae3b9ecb16c