Skip to content

Instantly share code, notes, and snippets.

@CDillinger
Last active July 25, 2023 09:17
Show Gist options
  • Save CDillinger/2aa02128f840bdca90340ce08ee71bc2 to your computer and use it in GitHub Desktop.
Save CDillinger/2aa02128f840bdca90340ce08ee71bc2 to your computer and use it in GitHub Desktop.
C# Implementation of Fuzzy Match
// LICENSE
//
// This software is dual-licensed to the public domain and under the following
// license: you are granted a perpetual, irrevocable license to copy, modify,
// publish, and distribute this file as you see fit.
using System;
using System.Collections.Generic;
public static class FuzzyMatcher
{
/// <summary>
/// Does a fuzzy search for a pattern within a string.
/// </summary>
/// <param name="stringToSearch">The string to search for the pattern in.</param>
/// <param name="pattern">The pattern to search for in the string.</param>
/// <returns>true if each character in pattern is found sequentially within stringToSearch; otherwise, false.</returns>
public static bool FuzzyMatch(string stringToSearch, string pattern)
{
var patternIdx = 0;
var strIdx = 0;
var patternLength = pattern.Length;
var strLength = stringToSearch.Length;
while (patternIdx != patternLength && strIdx != strLength)
{
if (char.ToLower(pattern[patternIdx]) == char.ToLower(stringToSearch[strIdx]))
++patternIdx;
++strIdx;
}
return patternLength != 0 && strLength != 0 && patternIdx == patternLength;
}
/// <summary>
/// Does a fuzzy search for a pattern within a string, and gives the search a score on how well it matched.
/// </summary>
/// <param name="stringToSearch">The string to search for the pattern in.</param>
/// <param name="pattern">The pattern to search for in the string.</param>
/// <param name="outScore">The score which this search received, if a match was found.</param>
/// <returns>true if each character in pattern is found sequentially within stringToSearch; otherwise, false.</returns>
public static bool FuzzyMatch(string stringToSearch, string pattern, out int outScore)
{
// Score consts
const int adjacencyBonus = 5; // bonus for adjacent matches
const int separatorBonus = 10; // bonus if match occurs after a separator
const int camelBonus = 10; // bonus if match is uppercase and prev is lower
const int leadingLetterPenalty = -3; // penalty applied for every letter in stringToSearch before the first match
const int maxLeadingLetterPenalty = -9; // maximum penalty for leading letters
const int unmatchedLetterPenalty = -1; // penalty for every letter that doesn't matter
// Loop variables
var score = 0;
var patternIdx = 0;
var patternLength = pattern.Length;
var strIdx = 0;
var strLength = stringToSearch.Length;
var prevMatched = false;
var prevLower = false;
var prevSeparator = true; // true if first letter match gets separator bonus
// Use "best" matched letter if multiple string letters match the pattern
char? bestLetter = null;
char? bestLower = null;
int? bestLetterIdx = null;
var bestLetterScore = 0;
var matchedIndices = new List<int>();
// Loop over strings
while (strIdx != strLength)
{
var patternChar = patternIdx != patternLength ? pattern[patternIdx] as char? : null;
var strChar = stringToSearch[strIdx];
var patternLower = patternChar != null ? char.ToLower((char)patternChar) as char? : null;
var strLower = char.ToLower(strChar);
var strUpper = char.ToUpper(strChar);
var nextMatch = patternChar != null && patternLower == strLower;
var rematch = bestLetter != null && bestLower == strLower;
var advanced = nextMatch && bestLetter != null;
var patternRepeat = bestLetter != null && patternChar != null && bestLower == patternLower;
if (advanced || patternRepeat)
{
score += bestLetterScore;
matchedIndices.Add((int)bestLetterIdx);
bestLetter = null;
bestLower = null;
bestLetterIdx = null;
bestLetterScore = 0;
}
if (nextMatch || rematch)
{
var newScore = 0;
// Apply penalty for each letter before the first pattern match
// Note: Math.Max because penalties are negative values. So max is smallest penalty.
if (patternIdx == 0)
{
var penalty = Math.Max(strIdx * leadingLetterPenalty, maxLeadingLetterPenalty);
score += penalty;
}
// Apply bonus for consecutive bonuses
if (prevMatched)
newScore += adjacencyBonus;
// Apply bonus for matches after a separator
if (prevSeparator)
newScore += separatorBonus;
// Apply bonus across camel case boundaries. Includes "clever" isLetter check.
if (prevLower && strChar == strUpper && strLower != strUpper)
newScore += camelBonus;
// Update pattern index IF the next pattern letter was matched
if (nextMatch)
++patternIdx;
// Update best letter in stringToSearch which may be for a "next" letter or a "rematch"
if (newScore >= bestLetterScore)
{
// Apply penalty for now skipped letter
if (bestLetter != null)
score += unmatchedLetterPenalty;
bestLetter = strChar;
bestLower = char.ToLower((char)bestLetter);
bestLetterIdx = strIdx;
bestLetterScore = newScore;
}
prevMatched = true;
}
else
{
score += unmatchedLetterPenalty;
prevMatched = false;
}
// Includes "clever" isLetter check.
prevLower = strChar == strLower && strLower != strUpper;
prevSeparator = strChar == '_' || strChar == ' ';
++strIdx;
}
// Apply score for last match
if (bestLetter != null)
{
score += bestLetterScore;
matchedIndices.Add((int)bestLetterIdx);
}
outScore = score;
return patternIdx == patternLength;
}
}
@tajmone
Copy link

tajmone commented Jul 19, 2019

Hi @CDillinger,

just wanted to let you know that I've added your C# port to a new project built around Forrest Smith's FTS Fuzzy Match:

@Codename2200
Copy link

Hey CDillinger,
I found your code really helpful. Btw, there was an update on this article (https://www.forrestthewoods.com/blog/reverse_engineering_sublime_texts_fuzzy_match/)
Update — 2/18/2017

It would be great also if you could apply the recursive match.

Thanks!

@tajmone
Copy link

tajmone commented May 14, 2020

It would be great also if you could apply the recursive match.

I agree, it would be great. And I'll be happy to include the updated algorithm in in the fuzzy-search repository.

@Codename2200
Copy link

Codename2200 commented May 15, 2020

I made the recursive version but it doesn't work.

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;

namespace Tests
{
    public static class Fuzzy
    {
        const int SEQUENTIAL_BONUS = 15;              // bonus for adjacent matches
        const int SEPARATOR_BONUS = 30;               // bonus if match occurs after a separator
        const int CAMEL_BONUS = 30;                   // bonus if match is uppercase and prev is lower
        const int FIRST_LETTER_BONUS = 15;            // bonus if the first letter is matched
        const int LEADING_LETTER_PENALTY = -5;        // penalty applied for every letter in str before the first match
        const int MAX_LEADING_LETTER_PENALTY = -15;   // maximum penalty for leading letters
        const int UNMATCHED_LETTER_PENALTY = -1;      // penalty for every letter that doesn't matter

        public static bool FuzzyMatchRecursive(string pattern, string target, out int outScore)
        {
            int recursionCount = 0;
            int recursionLimit = 10;
            List<int> matches = new List<int>();
            int maxMatches = 256;

            return FuzzyMatchRecursive(
                pattern,                    // string to Search
                target,                     // target string
                0,                          // patternCurIndex
                0,                          // strCurrIndex
                null,                       // srcMatces
                matches,                    // matches
                maxMatches,                 // maxMatches
                0,                          // nextMatch
                recursionCount,             // recursionCount
                recursionLimit,             // recursionLimit
                out outScore);
        }

        public static bool FuzzyMatchRecursive(
            string pattern,
            string str,
            int patternCurIndex,
            int strCurrIndex,
            List<int> srcMatces,
            List<int> matches,
            int maxMatches,
            int nextMatch,
            int recursionCount,
            int recursionLimit,
            out int outScore)
        {
            outScore = 0;

            // Return if recursion limit is reached.
            if (++recursionCount >= recursionLimit)
                return false;

            // Return if we reached ends of strings.
            if (patternCurIndex == pattern.Length || strCurrIndex == str.Length)
                return false;

            // Recursion params
            bool recursiveMatch = false;
            List<int> bestRecursiveMatches = new List<int>();
            int bestRecursiveScore = 0;

            // Loop through pattern and str looking for a match.
            bool firstMatch = true;

            while (patternCurIndex < pattern.Length && strCurrIndex < str.Length)
            {
                // Match found.
                var patternChar = char.ToLower(pattern[patternCurIndex]);
                var strChar = char.ToLower(str[strCurrIndex]);

                if (patternChar == strChar)
                {
                    if (nextMatch > maxMatches)
                        return false;

                    if (firstMatch && srcMatces != null)
                    {
                        matches = srcMatces;
                        firstMatch = false;
                    }

                    List<int> recursiveMatches = new List<int>();
                    var ismatched = FuzzyMatchRecursive(
                        pattern,
                        str,
                        patternCurIndex,
                        strCurrIndex + 1,
                        matches,
                        recursiveMatches,
                        maxMatches,
                        nextMatch,
                        recursionCount,
                        recursionLimit,
                        out int recursiveScore);

                    if (ismatched)
                    {
                        // Pick best recursive score.
                        if (!recursiveMatch || recursiveScore > bestRecursiveScore)
                        {
                            bestRecursiveMatches = recursiveMatches;
                            bestRecursiveScore = recursiveScore;
                        }
                        recursiveMatch = true;
                    }

                    nextMatch++;
                    matches.Add(strCurrIndex);

                    ++patternCurIndex;
                }

                ++strCurrIndex;
            }

            var matched = patternCurIndex == pattern.Length;

            if (matched)
            {
                outScore = 100;

                // Apply leading letter penalty
                int penalty = LEADING_LETTER_PENALTY * matches[0];
                penalty = penalty < MAX_LEADING_LETTER_PENALTY ? MAX_LEADING_LETTER_PENALTY : penalty;
                outScore += penalty;

                //Apply unmatched penalty
                int unmatched = str.Length - nextMatch;
                outScore += UNMATCHED_LETTER_PENALTY * unmatched;

                for (int i = 0; i < nextMatch; i++)
                {
                    int currIdx = matches[i];

                    if (i > 0)
                    {
                        int prevIdx = matches[i - 1];
                        if (currIdx == prevIdx + 1)
                        {
                            outScore += SEQUENTIAL_BONUS;
                        }
                    }

                    // Check for bonuses based on neighbor character value.
                    if (currIdx > 0)
                    {
                        // Camel case
                        char neighbor = str[currIdx - 1];
                        char curr = str[currIdx];
                        if (neighbor == char.ToLower(neighbor) && curr == char.ToUpper(curr))
                        {
                            outScore += CAMEL_BONUS;
                        }
                        bool isNeighbourSeparator = neighbor == '_' || neighbor == ' ';
                        if (isNeighbourSeparator)
                        {
                            outScore += SEPARATOR_BONUS;
                        }
                    }
                    else
                    {
                        // First letter
                        outScore += FIRST_LETTER_BONUS;
                    }
                }

                // Return best result
                if (recursiveMatch && (!matched || bestRecursiveScore > outScore))
                {
                    // Recursive score is better than "this"
                    matches = bestRecursiveMatches;
                    outScore = bestRecursiveScore;
                    return true;
                }
                else if (matched)
                {
                    // "this" score is better than recursive
                    return true;
                }
                else
                {
                    return false;
                }
            }

            return false;
        }

        public static string FuzzyMatchRecursive(string pattern, string[] targets)
        {
            if (!string.IsNullOrEmpty(pattern) && targets?.Length > 0)
            {
                Dictionary<string, int> distances = new Dictionary<string, int>();

                foreach (var item in targets)
                {
                    var result = FuzzyMatchRecursive(pattern, item, out int score);
                    distances.Add(item, score);
                }

                return distances.Aggregate((l, r) => l.Value > r.Value ? l : r).Key;
            }

            return pattern;
        }

        public static void Test()
        {
            string[] targets = new[] {
                "Internal Hire or Backfill",
                "OTE Neutral",
                "Funded by Business Leader Budget",
                "Funded by Merit Budget"
            };

            string pattern = "Intern Conversion";
            string pattern2 = "Internal Hire";

            var result = FuzzyMatchRecursive(pattern, targets);
            var result2 = FuzzyMatchRecursive(pattern2, targets);

            if (result != "Internal Hire or Backfill")
                throw new Exception("No match found!");

            if (result2 != "Internal Hire or Backfill")
                throw new Exception("No match found!");
        }
    }
}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment