Skip to content

Instantly share code, notes, and snippets.

@dfrobison
Created November 5, 2020 23:09
Show Gist options
  • Save dfrobison/f098d92a7b83b51e289789af7bc0208b to your computer and use it in GitHub Desktop.
Save dfrobison/f098d92a7b83b51e289789af7bc0208b to your computer and use it in GitHub Desktop.
[A weighted base text search] This is a swift version of Forrest Smith's fuzzy text search. I'm absolutely sure there are performance optimizations to be made to make it faster, but it's suitable for my needs. You can find the article here: https://www.forrestthewoods.com/blog/reverse_engineering_sublime_texts_fuzzy_match/
struct FuzzyTextSearch {
private static let maxMatches = 256
private static let defaultMatches: [Int] = Array(repeating: -1, count: maxMatches)
private static let recursionLimit = 10
private static let sequentialBonus = 15 // bonus for adjacent matches
private static let separatorBonus = 30 // bonus if match occurs after a separator
private static let camelBonus = 30 // bonus if match is uppercase and prev is lower
private static let firstLetterBonus = 15 // bonus if the first letter is matched
private static let leadingLetterPenalty = -5 // penalty applied for every letter in str before the first match
private static let maxLeadingLetterPenalty = -15 // maximum penalty for leading letters
private static let unmatchedLetterPenalty = -1 // penalty for every letter that doesn't matter
private let neighborSeparator = "_ "
private let pattern: String
private let patternLength: Int
private var str: String = ""
private var strLength = 0
init(pattern: String) {
self.pattern = pattern
patternLength = pattern.count
}
mutating func fuzzyMatchSimple(stringToMatch str: String) -> Bool {
guard !pattern.isEmpty else {return true}
guard !str.isEmpty else {return false}
var strIndexOffset = 0
var patternIndexOffset = 0
strLength = str.count
while (patternIndexOffset < patternLength) && (strIndexOffset < strLength) {
if pattern[pattern.index(pattern.startIndex, offsetBy: patternIndexOffset)].lowercased() ==
str[str.index(str.startIndex, offsetBy: strIndexOffset)].lowercased() {
patternIndexOffset += 1
}
strIndexOffset += 1
}
return patternIndexOffset == patternLength
}
mutating func fuzzyMatch(stringToMatch str: String, outScore: inout Int) -> Bool {
var matches: [Int] = []
return fuzzyMatch(stringToMatch: str, outScore: &outScore, matches: &matches)
}
mutating func fuzzyMatch(stringToMatch str: String, outScore: inout Int, matches: inout [Int]) -> Bool {
var recursionCount = 0
var internalMatches = FuzzyTextSearch.defaultMatches
self.str = str
strLength = str.count
let matched = fuzzyMatchRecursive(patternOffsetIndex: 0,
strOffsetIndex: 0,
outScore: &outScore,
srcMatches: nil,
matches: &internalMatches,
nextMatch: 0,
recursionCount: &recursionCount)
matches = internalMatches.filter{$0 != -1}
return matched
}
// Private implementation
private mutating func fuzzyMatchRecursive(patternOffsetIndex: Int,
strOffsetIndex: Int,
outScore: inout Int,
srcMatches: [Int]?,
matches: inout [Int],
nextMatch: Int,
recursionCount: inout Int) -> Bool {
// Count recursions
recursionCount += 1
guard recursionCount < FuzzyTextSearch.recursionLimit else {return false}
// Detect end of strings
guard patternOffsetIndex < patternLength && strOffsetIndex < strLength else {return false}
var patternOffsetIndex = patternOffsetIndex
var strOffsetIndex = strOffsetIndex
var nextMatch = nextMatch
// Recursion params
var recursiveMatch = false
var bestRecursiveMatches = FuzzyTextSearch.defaultMatches
var bestRecursiveScore = 0
// Loop through pattern and str looking for a match
var first_match = true
while patternOffsetIndex != patternLength && strOffsetIndex != strLength {
// Found match
if pattern[pattern.index(pattern.startIndex, offsetBy: patternOffsetIndex)].lowercased() ==
str[str.index(str.startIndex, offsetBy: strOffsetIndex)].lowercased() {
// Supplied matches buffer was too short
if nextMatch >= FuzzyTextSearch.maxMatches {
return false
}
// Remember matches
if first_match && srcMatches != nil {
for index in 0 ..< nextMatch {
matches[index] = srcMatches![index]
}
first_match = false
}
// Recursive call that "skips" this match
var recursiveMatches = FuzzyTextSearch.defaultMatches
var recursiveScore = 0
if fuzzyMatchRecursive(patternOffsetIndex: patternOffsetIndex,
strOffsetIndex: strOffsetIndex + 1,
outScore: &recursiveScore,
srcMatches: matches,
matches: &recursiveMatches,
nextMatch: nextMatch,
recursionCount: &recursionCount) {
// Pick best recursive score
if !recursiveMatch || recursiveScore > bestRecursiveScore {
bestRecursiveMatches = recursiveMatches
bestRecursiveScore = recursiveScore
}
recursiveMatch = true
}
// Advance
matches[nextMatch] = strOffsetIndex
nextMatch += 1
patternOffsetIndex += 1
}
strOffsetIndex += 1
}
// Determine if full pattern was matched
let matched = patternOffsetIndex == patternLength
// Calculate score
if matched {
// Nothing else needs to be looked since a match occurred
strOffsetIndex = strLength
// Initialize score
outScore = 100
// Apply leading letter penalty
var penalty = FuzzyTextSearch.leadingLetterPenalty * matches[0]
if penalty < FuzzyTextSearch.maxLeadingLetterPenalty {
penalty = FuzzyTextSearch.maxLeadingLetterPenalty
}
outScore += penalty
// Apply unmatched penalty
let unmatched = strLength - nextMatch
outScore += FuzzyTextSearch.unmatchedLetterPenalty * unmatched
// Apply ordering bonuses
for nextMatchIndex in 0 ..< nextMatch {
let currIdx = matches[nextMatchIndex]
if nextMatchIndex > 0 {
let prevIdx = matches[nextMatchIndex - 1]
// Sequential
if currIdx == (prevIdx + 1) {
outScore += FuzzyTextSearch.sequentialBonus
}
}
// Check for bonuses based on neighbor character value
if currIdx > 0 {
// Camel case
let neighbor = str[str.index(str.startIndex, offsetBy: currIdx - 1)]
let curr = str[str.index(str.startIndex, offsetBy: currIdx)]
if neighbor.isLowercase && curr.isUppercase {
outScore += FuzzyTextSearch.camelBonus
}
// Separator
if neighborSeparator.contains(neighbor) {
outScore += FuzzyTextSearch.separatorBonus
}
} else {
// First letter
outScore += FuzzyTextSearch.firstLetterBonus
}
}
}
// Return best result
if recursiveMatch && (!matched || bestRecursiveScore > outScore) {
// Recursive score is better than "this"
matches = bestRecursiveMatches
outScore = bestRecursiveScore
return true
} else if matched {
// "this" score is better than recursive
return true
}
// no match
return false
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment