Skip to content

Instantly share code, notes, and snippets.

@aheinze
Last active August 20, 2018 07:44
Show Gist options
  • Save aheinze/d49bd43083bfd472eb6bdacdfaefbd88 to your computer and use it in GitHub Desktop.
Save aheinze/d49bd43083bfd472eb6bdacdfaefbd88 to your computer and use it in GitHub Desktop.
Fuzzy Search function - returning score
<?php
function fuzzy_search($search, $text, $distance = 3){
$needles = explode(' ', mb_strtolower($search, 'UTF-8'));
$tokens = explode(' ', mb_strtolower($text, 'UTF-8'));
$score = 0;
$levenshtein_utf8 = function ($s1, $s2) {
$map = [];
$utf8_to_extended_ascii = function($str) use($map) {
// find all multibyte characters (cf. utf-8 encoding specs)
$matches = array();
if (!preg_match_all('/[\xC0-\xF7][\x80-\xBF]+/', $str, $matches))
return $str; // plain ascii string
// update the encoding map with the characters not already met
foreach ($matches[0] as $mbc)
if (!isset($map[$mbc]))
$map[$mbc] = chr(128 + count($map));
// finally remap non-ascii characters
return strtr($str, $map);
};
return levenshtein($utf8_to_extended_ascii($s1), $utf8_to_extended_ascii($s2));
};
foreach ($needles as $needle){
foreach ($tokens as $token) {
if (strpos($token, $needle) !== false) {
$score += 1;
} else {
$d = $levenshtein_utf8($needle, $token);
if ($d <= $distance) {
$l = mb_strlen($token, 'UTF-8');
$matches = $l - $d;
$score += ($matches / $l);
}
}
}
}
return $score / count($needles);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment