Created
November 5, 2020 23:09
-
-
Save dfrobison/f098d92a7b83b51e289789af7bc0208b to your computer and use it in GitHub Desktop.
[A weighted base text search] This is a swift version of Forrest Smith's fuzzy text search. I'm absolutely sure there are performance optimizations to be made to make it faster, but it's suitable for my needs. You can find the article here: https://www.forrestthewoods.com/blog/reverse_engineering_sublime_texts_fuzzy_match/
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
struct FuzzyTextSearch { | |
private static let maxMatches = 256 | |
private static let defaultMatches: [Int] = Array(repeating: -1, count: maxMatches) | |
private static let recursionLimit = 10 | |
private static let sequentialBonus = 15 // bonus for adjacent matches | |
private static let separatorBonus = 30 // bonus if match occurs after a separator | |
private static let camelBonus = 30 // bonus if match is uppercase and prev is lower | |
private static let firstLetterBonus = 15 // bonus if the first letter is matched | |
private static let leadingLetterPenalty = -5 // penalty applied for every letter in str before the first match | |
private static let maxLeadingLetterPenalty = -15 // maximum penalty for leading letters | |
private static let unmatchedLetterPenalty = -1 // penalty for every letter that doesn't matter | |
private let neighborSeparator = "_ " | |
private let pattern: String | |
private let patternLength: Int | |
private var str: String = "" | |
private var strLength = 0 | |
init(pattern: String) { | |
self.pattern = pattern | |
patternLength = pattern.count | |
} | |
mutating func fuzzyMatchSimple(stringToMatch str: String) -> Bool { | |
guard !pattern.isEmpty else {return true} | |
guard !str.isEmpty else {return false} | |
var strIndexOffset = 0 | |
var patternIndexOffset = 0 | |
strLength = str.count | |
while (patternIndexOffset < patternLength) && (strIndexOffset < strLength) { | |
if pattern[pattern.index(pattern.startIndex, offsetBy: patternIndexOffset)].lowercased() == | |
str[str.index(str.startIndex, offsetBy: strIndexOffset)].lowercased() { | |
patternIndexOffset += 1 | |
} | |
strIndexOffset += 1 | |
} | |
return patternIndexOffset == patternLength | |
} | |
mutating func fuzzyMatch(stringToMatch str: String, outScore: inout Int) -> Bool { | |
var matches: [Int] = [] | |
return fuzzyMatch(stringToMatch: str, outScore: &outScore, matches: &matches) | |
} | |
mutating func fuzzyMatch(stringToMatch str: String, outScore: inout Int, matches: inout [Int]) -> Bool { | |
var recursionCount = 0 | |
var internalMatches = FuzzyTextSearch.defaultMatches | |
self.str = str | |
strLength = str.count | |
let matched = fuzzyMatchRecursive(patternOffsetIndex: 0, | |
strOffsetIndex: 0, | |
outScore: &outScore, | |
srcMatches: nil, | |
matches: &internalMatches, | |
nextMatch: 0, | |
recursionCount: &recursionCount) | |
matches = internalMatches.filter{$0 != -1} | |
return matched | |
} | |
// Private implementation | |
private mutating func fuzzyMatchRecursive(patternOffsetIndex: Int, | |
strOffsetIndex: Int, | |
outScore: inout Int, | |
srcMatches: [Int]?, | |
matches: inout [Int], | |
nextMatch: Int, | |
recursionCount: inout Int) -> Bool { | |
// Count recursions | |
recursionCount += 1 | |
guard recursionCount < FuzzyTextSearch.recursionLimit else {return false} | |
// Detect end of strings | |
guard patternOffsetIndex < patternLength && strOffsetIndex < strLength else {return false} | |
var patternOffsetIndex = patternOffsetIndex | |
var strOffsetIndex = strOffsetIndex | |
var nextMatch = nextMatch | |
// Recursion params | |
var recursiveMatch = false | |
var bestRecursiveMatches = FuzzyTextSearch.defaultMatches | |
var bestRecursiveScore = 0 | |
// Loop through pattern and str looking for a match | |
var first_match = true | |
while patternOffsetIndex != patternLength && strOffsetIndex != strLength { | |
// Found match | |
if pattern[pattern.index(pattern.startIndex, offsetBy: patternOffsetIndex)].lowercased() == | |
str[str.index(str.startIndex, offsetBy: strOffsetIndex)].lowercased() { | |
// Supplied matches buffer was too short | |
if nextMatch >= FuzzyTextSearch.maxMatches { | |
return false | |
} | |
// Remember matches | |
if first_match && srcMatches != nil { | |
for index in 0 ..< nextMatch { | |
matches[index] = srcMatches![index] | |
} | |
first_match = false | |
} | |
// Recursive call that "skips" this match | |
var recursiveMatches = FuzzyTextSearch.defaultMatches | |
var recursiveScore = 0 | |
if fuzzyMatchRecursive(patternOffsetIndex: patternOffsetIndex, | |
strOffsetIndex: strOffsetIndex + 1, | |
outScore: &recursiveScore, | |
srcMatches: matches, | |
matches: &recursiveMatches, | |
nextMatch: nextMatch, | |
recursionCount: &recursionCount) { | |
// Pick best recursive score | |
if !recursiveMatch || recursiveScore > bestRecursiveScore { | |
bestRecursiveMatches = recursiveMatches | |
bestRecursiveScore = recursiveScore | |
} | |
recursiveMatch = true | |
} | |
// Advance | |
matches[nextMatch] = strOffsetIndex | |
nextMatch += 1 | |
patternOffsetIndex += 1 | |
} | |
strOffsetIndex += 1 | |
} | |
// Determine if full pattern was matched | |
let matched = patternOffsetIndex == patternLength | |
// Calculate score | |
if matched { | |
// Nothing else needs to be looked since a match occurred | |
strOffsetIndex = strLength | |
// Initialize score | |
outScore = 100 | |
// Apply leading letter penalty | |
var penalty = FuzzyTextSearch.leadingLetterPenalty * matches[0] | |
if penalty < FuzzyTextSearch.maxLeadingLetterPenalty { | |
penalty = FuzzyTextSearch.maxLeadingLetterPenalty | |
} | |
outScore += penalty | |
// Apply unmatched penalty | |
let unmatched = strLength - nextMatch | |
outScore += FuzzyTextSearch.unmatchedLetterPenalty * unmatched | |
// Apply ordering bonuses | |
for nextMatchIndex in 0 ..< nextMatch { | |
let currIdx = matches[nextMatchIndex] | |
if nextMatchIndex > 0 { | |
let prevIdx = matches[nextMatchIndex - 1] | |
// Sequential | |
if currIdx == (prevIdx + 1) { | |
outScore += FuzzyTextSearch.sequentialBonus | |
} | |
} | |
// Check for bonuses based on neighbor character value | |
if currIdx > 0 { | |
// Camel case | |
let neighbor = str[str.index(str.startIndex, offsetBy: currIdx - 1)] | |
let curr = str[str.index(str.startIndex, offsetBy: currIdx)] | |
if neighbor.isLowercase && curr.isUppercase { | |
outScore += FuzzyTextSearch.camelBonus | |
} | |
// Separator | |
if neighborSeparator.contains(neighbor) { | |
outScore += FuzzyTextSearch.separatorBonus | |
} | |
} else { | |
// First letter | |
outScore += FuzzyTextSearch.firstLetterBonus | |
} | |
} | |
} | |
// Return best result | |
if recursiveMatch && (!matched || bestRecursiveScore > outScore) { | |
// Recursive score is better than "this" | |
matches = bestRecursiveMatches | |
outScore = bestRecursiveScore | |
return true | |
} else if matched { | |
// "this" score is better than recursive | |
return true | |
} | |
// no match | |
return false | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment