Created
March 26, 2019 11:08
-
-
Save terwanerik/d09f2557ca3c28c322451e96a341d9d6 to your computer and use it in GitHub Desktop.
PHP port of https://github.com/joshaven/string_score
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* Made by Erik Terwan | |
* [email protected] | |
* https://erikterwan.com | |
* | |
* All rights reserved | |
* | |
* Created on 2019-03-26 | |
*/ | |
class StringHelper { | |
/** | |
* Score a string match | |
* @param string $string | |
* @param string $word | |
* @param null|float $fuzziness | |
* @return float|int | |
*/ | |
static function score($string, $word, $fuzziness = null) { | |
// If the string is equal to the word, perfect match. | |
if ($string == $word) { return 1; } | |
// If it's not a perfect match and is empty return 0 | |
if ($word == "") { return 0; } | |
$runningScore = 0; | |
$charScore = 0; | |
$finalScore = 0; | |
$lString = strtolower($string); | |
$strLength = strlen($string); | |
$lWord = strtolower($word); | |
$wordLength = strlen($word); | |
$idxOf = null; | |
$startAt = 0; | |
$fuzzies = 1; | |
$fuzzyFactor = null; | |
// Cache fuzzyFactor for speed increase | |
if ($fuzziness) { $fuzzyFactor = 1 - $fuzziness; } | |
// Walk through word and add up scores. | |
// Code duplication occurs to prevent checking fuzziness inside for loop | |
if ($fuzziness) { | |
for ($i = 0; $i < $wordLength; $i+=1) { | |
$idxOf = strpos($lString, $lWord[$i], $startAt); | |
if ($idxOf === false) { | |
$fuzzies += $fuzzyFactor; | |
} else { | |
if ($startAt === $idxOf) { | |
// Consecutive letter & start-of-string Bonus | |
$charScore = 0.7; | |
} else { | |
$charScore = 0.1; | |
// Acronym Bonus | |
// Weighing Logic: Typing the first character of an acronym is as if you | |
// preceded it with two perfect character matches. | |
if ($string[$idxOf - 1] === ' ') { | |
$charScore += 0.8; | |
} | |
} | |
// Same case bonus. | |
if ($string[$idxOf] === $word[$i]) { $charScore += 0.1; } | |
// Update scores and startAt position for next round of indexOf | |
$runningScore += $charScore; | |
$startAt = $idxOf + 1; | |
} | |
} | |
} else { | |
for ($i = 0; $i < $wordLength; $i+=1) { | |
$idxOf = strpos($lString, $lWord[$i], $startAt); | |
if ($idxOf === false) { return 0; } | |
if ($startAt === $idxOf) { | |
$charScore = 0.7; | |
} else { | |
$charScore = 0.1; | |
if ($string[$idxOf - 1] === ' ') { $charScore += 0.8; } | |
} | |
if ($string[$idxOf] === $word[$i]) { $charScore += 0.1; } | |
$runningScore += $charScore; | |
$startAt = $idxOf + 1; | |
} | |
} | |
// Reduce penalty for longer strings. | |
$finalScore = 0.5 * ($runningScore / $strLength + $runningScore / $wordLength) / $fuzzies; | |
if (($lWord[0] === $lString[0]) && ($finalScore < 0.85)) { | |
$finalScore += 0.15; | |
} | |
return $finalScore; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment