Created
November 24, 2015 13:53
-
-
Save mikezs/c1fa48eae2a5c10b22f7 to your computer and use it in GitHub Desktop.
Proper Boyer-Moore string searching in Swift 2.0
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Implemention from here: http://www-igm.univ-mlv.fr/~lecroq/string/node14.html#SECTION00140 | |
*/ | |
extension String { | |
private func preBmBc(forString x: String) -> [Character: Int] { | |
let m = x.characters.count | |
var bmBc = [Character: Int](/*count: alphabetSize, repeatedValue: m*/) | |
for i in 0...m - 2 { | |
let c = x[x.startIndex.advancedBy(i)] | |
bmBc[c] = m - i - 1 | |
} | |
return bmBc | |
} | |
private func suffixes(forString x: String) -> [Int] { | |
let m = x.characters.count | |
var suff = [Int](count: m, repeatedValue: 0) | |
suff[m - 1] = m | |
var g = m - 1 | |
var f = 0 | |
for (var i = m - 2; i >= 0; --i) { | |
if (i > g && suff[i + m - 1 - f] < i - g) { | |
suff[i] = suff[i + m - 1 - f] | |
} else { | |
if (i < g) { | |
g = i | |
} | |
f = i | |
while (g >= 0 && x[x.startIndex.advancedBy(g)] == x[x.startIndex.advancedBy(g + m - 1 - f)]) { | |
--g | |
} | |
suff[i] = f - g | |
} | |
} | |
return suff | |
} | |
private func preBmGs(forString x: String) -> [Int] { | |
let m = x.characters.count | |
let suff = suffixes(forString: find) | |
var bmGs = [Int](count: m, repeatedValue: m) | |
var j = 0 | |
for (var i = m - 1; i >= 0; --i) { | |
if (suff[i] == i + 1) { | |
for (; j < m - 1 - i; ++j) { | |
if (bmGs[j] == m) { | |
bmGs[j] = m - 1 - i | |
} | |
} | |
} | |
} | |
for (var i = 0; i <= m - 2; ++i) { | |
bmGs[m - 1 - suff[i]] = m - 1 - i | |
} | |
return bmGs | |
} | |
private func BM(findString x: String, inString y: String) -> Int? { | |
let m = x.characters.count | |
let n = y.characters.count | |
/* Preprocessing */ | |
let bmGs = preBmGs(forString: x) | |
let bmBc = preBmBc(forString: x) | |
/* Searching */ | |
var j = 0 | |
var i: Int | |
while (j <= n - m) { | |
for (i = m - 1; i >= 0 && x[x.startIndex.advancedBy(i)] == y[y.startIndex.advancedBy(i + j)]; --i) {} | |
if (i < 0) { | |
// We could just print here (and uncomment subsiquent line) to carry on matching | |
return j | |
//j += bmGs[0] | |
} else { | |
// We cheat slightly here and | |
let bmBcJump = bmBc[y[y.startIndex.advancedBy(i + j)]] ?? m | |
j += max(bmGs[i], bmBcJump - m + 1 + i) | |
} | |
} | |
return nil | |
} | |
func indexOf(pattern: String) -> String.Index? { | |
let patternLength = pattern.characters.count | |
assert(patternLength > 0) | |
assert(patternLength <= self.characters.count) | |
if let index = BM(findString: pattern, inString: self) { | |
return pattern.startIndex.advancedBy(index) | |
} | |
return nil | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This is not very "Swift" or particularly efficient. This is because of Swift's inability to do random access for characterView's