Skip to content

Instantly share code, notes, and snippets.

@Kovah
Created February 22, 2017 21:29
Show Gist options
  • Save Kovah/df90d336478a47d869b9683766cff718 to your computer and use it in GitHub Desktop.
Save Kovah/df90d336478a47d869b9683766cff718 to your computer and use it in GitHub Desktop.
Levenshtein function for MySQL
-- Levenshtein function
-- Source: https://openquery.com.au/blog/levenshtein-mysql-stored-function
-- Levenshtein reference: http://en.wikipedia.org/wiki/Levenshtein_distance
-- Arjen note: because the levenshtein value is encoded in a byte array, distance cannot exceed 255;
-- thus the maximum string length this implementation can handle is also limited to 255 characters.
DELIMITER $$
DROP FUNCTION IF EXISTS LEVENSHTEIN $$
CREATE FUNCTION LEVENSHTEIN(s1 VARCHAR(255) CHARACTER SET utf8, s2 VARCHAR(255) CHARACTER SET utf8)
RETURNS INT
DETERMINISTIC
BEGIN
DECLARE s1_len, s2_len, i, j, c, c_temp, cost INT;
DECLARE s1_char CHAR CHARACTER SET utf8;
-- max strlen=255 for this function
DECLARE cv0, cv1 VARBINARY(256);
SET s1_len = CHAR_LENGTH(s1),
s2_len = CHAR_LENGTH(s2),
cv1 = 0x00,
j = 1,
i = 1,
c = 0;
IF (s1 = s2) THEN
RETURN (0);
ELSEIF (s1_len = 0) THEN
RETURN (s2_len);
ELSEIF (s2_len = 0) THEN
RETURN (s1_len);
END IF;
WHILE (j <= s2_len) DO
SET cv1 = CONCAT(cv1, CHAR(j)),
j = j + 1;
END WHILE;
WHILE (i <= s1_len) DO
SET s1_char = SUBSTRING(s1, i, 1),
c = i,
cv0 = CHAR(i),
j = 1;
WHILE (j <= s2_len) DO
SET c = c + 1,
cost = IF(s1_char = SUBSTRING(s2, j, 1), 0, 1);
SET c_temp = ORD(SUBSTRING(cv1, j, 1)) + cost;
IF (c > c_temp) THEN
SET c = c_temp;
END IF;
SET c_temp = ORD(SUBSTRING(cv1, j+1, 1)) + 1;
IF (c > c_temp) THEN
SET c = c_temp;
END IF;
SET cv0 = CONCAT(cv0, CHAR(c)),
j = j + 1;
END WHILE;
SET cv1 = cv0,
i = i + 1;
END WHILE;
RETURN (c);
END $$
DELIMITER ;
@trivia211
Copy link

trivia211 commented Dec 16, 2020

Probably not intended, the MySQL = operator can be tricky. For example "ä" = "a", so LEVENSHTEIN("Hofbräu", "Hofbrau") = 0.

@zwxeno
Copy link

zwxeno commented Jan 30, 2021

note: this script wouldn't run on database workbench Lite v5.6.8 but runs fine on MySQL workbench.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment