Created
July 18, 2020 19:33
-
-
Save demdxx/f06af44747361b66f0eb51728d5f2132 to your computer and use it in GitHub Desktop.
Cosine Similarity implementation in MySQL
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- for calculation of norm vector -- | |
DELIMITER $$ | |
CREATE FUNCTION vector_norm( vector JSON ) | |
RETURNS DOUBLE | |
READS SQL DATA | |
DETERMINISTIC | |
BEGIN | |
DECLARE array_length INTEGER(11); | |
DECLARE retval DOUBLE(19,2); | |
DECLARE cell_value DOUBLE(19,2); | |
DECLARE idx INT(11); | |
SELECT json_length( vector ) INTO array_length; | |
SET retval = 0.0; | |
SET idx = 0; | |
WHILE idx < array_length DO | |
SELECT json_extract( vector, concat( '$[', idx, ']' ) ) INTO cell_value; | |
SET retval = retval + POWER(cell_value, 2); | |
SET idx = idx + 1; | |
END WHILE; | |
RETURN SQRT(retval); | |
END$$ | |
DELIMITER ; | |
-- dot product implementation -- | |
DELIMITER $$ | |
CREATE FUNCTION dot_product( vector1 JSON, vector2 JSON ) | |
RETURNS DOUBLE | |
READS SQL DATA | |
DETERMINISTIC | |
BEGIN | |
DECLARE array_length INTEGER(11); | |
DECLARE retval DOUBLE(19,2); | |
DECLARE cell_value1 DOUBLE(19,2); | |
DECLARE cell_value2 DOUBLE(19,2); | |
DECLARE idx INT(11); | |
SELECT json_length( vector1 ) INTO array_length; | |
SET retval = 0.0; | |
SET idx = 0; | |
WHILE idx < array_length DO | |
SELECT json_extract( vector1, concat( '$[', idx, ']' ) ) INTO cell_value1; | |
SELECT json_extract( vector2, concat( '$[', idx, ']' ) ) INTO cell_value2; | |
SET retval = retval + cell_value1 * cell_value2; | |
SET idx = idx + 1; | |
END WHILE; | |
RETURN retval; | |
END$$ | |
DELIMITER ; | |
-- cosine similarity calculation -- | |
DELIMITER $$ | |
CREATE FUNCTION cosine_similarity( vector1 JSON, vector2 JSON ) | |
RETURNS DOUBLE | |
READS SQL DATA | |
DETERMINISTIC | |
BEGIN | |
DECLARE retval DOUBLE(19,2); | |
SELECT dot_product(vector1, vector2) / (vector_norm(vector1) * vector_norm(vector2)) INTO retval; | |
RETURN retval; | |
END$$ | |
DELIMITER ; |
Code has no license and completely free. I strongly do not suggest using it
in production as it's very slow.
…On Thu, Jul 15, 2021 at 7:29 AM Greg Priday ***@***.***> wrote:
***@***.**** commented on this gist.
------------------------------
This looks great! What's the license for this code? I'd like to include it
in an MIT licensed package I'm working on.
—
You are receiving this because you authored the thread.
Reply to this email directly, view it on GitHub
<https://gist.github.com/f06af44747361b66f0eb51728d5f2132#gistcomment-3813451>,
or unsubscribe
<https://github.com/notifications/unsubscribe-auth/AACF5WIKAJQLK44D56RE36DTXZW4DANCNFSM5AMZO2QA>
.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This looks great! What's the license for this code? I'd like to include it in an MIT licensed package I'm working on.