Skip to content

Instantly share code, notes, and snippets.

@tobchen
Created May 1, 2024 18:32
Show Gist options
  • Save tobchen/4e2ad939ca9b16c48ab9d0ec771b8c95 to your computer and use it in GitHub Desktop.
Save tobchen/4e2ad939ca9b16c48ab9d0ec771b8c95 to your computer and use it in GitHub Desktop.
My go at Cologne Phonetics in PostgreSQL
CREATE FUNCTION cologne_phonetics_preprocess(txt varchar) RETURNS varchar AS $$
BEGIN
RETURN replace(replace(replace(replace(upper(txt), 'Ä', 'A'), 'Ö', 'O'), 'Ü', 'U'), 'ß', 'SS');
END;
$$ LANGUAGE plpgsql
IMMUTABLE
RETURNS NULL ON NULL INPUT;
CREATE FUNCTION cologne_phonetics_encode_except_cx(ch varchar, next_ch varchar) RETURNS varchar AS $$
DECLARE
result varchar;
BEGIN
IF ch = 'A' OR ch = 'E' OR ch = 'I' OR ch = 'J' OR ch = 'O' OR ch = 'U' OR ch = 'Y' THEN
result := '0';
/* ELSIF ch = 'H' THEN
result := '-'; */
ELSIF ch = 'B' THEN
result := '1';
ELSIF ch = 'F' OR ch = 'V' OR ch = 'W' THEN
result := '3';
ELSIF ch = 'G' OR ch = 'K' OR ch = 'Q' THEN
result := '4';
ELSIF ch = 'L' THEN
result := '5';
ELSIF ch = 'M' OR ch = 'N' THEN
result := '6';
ELSIF ch = 'R' THEN
result := '7';
ELSIF ch = 'S' OR ch = 'Z' THEN
result := '8';
ELSIF ch = 'P' THEN
IF next_ch IS NULL OR next_ch != 'H' THEN
result := '1';
ELSE
result := '3';
END IF;
ELSIF ch = 'D' OR ch = 'T' THEN
IF next_ch IS NULL OR (next_ch != 'C' AND next_ch != 'S' AND next_ch != 'Z') THEN
result := '2';
ELSE
result := '8';
END IF;
ELSE
result := '';
END IF;
RETURN result;
END;
$$ LANGUAGE plpgsql
IMMUTABLE
CALLED ON NULL INPUT;
CREATE FUNCTION cologne_phonetics_encode_only_c(prev_ch varchar, next_ch varchar) RETURNS varchar AS $$
DECLARE
result varchar;
BEGIN
IF prev_ch IS NULL THEN
IF next_ch IS NOT NULL AND (next_ch = 'A' OR next_ch = 'H' OR next_ch = 'K' OR next_ch = 'L'
OR next_ch = 'O' OR next_ch = 'Q' OR next_ch = 'R' OR next_ch = 'U' OR next_ch = 'X') THEN
result := '4';
ELSE
result := '8';
END IF;
ELSIF prev_ch = 'S' OR prev_ch = 'Z' THEN
result := '8';
ELSIF next_ch IS NOT NULL AND (next_ch = 'A' OR next_ch = 'H' OR next_ch = 'K' OR next_ch = 'O'
OR next_ch = 'Q' OR next_ch = 'U' OR next_ch = 'X') THEN
result := '4';
ELSE
result := '8';
END IF;
RETURN result;
END;
$$ LANGUAGE plpgsql
IMMUTABLE
CALLED ON NULL INPUT;
CREATE FUNCTION cologne_phonetics_encode_only_x(prev_ch varchar) RETURNS varchar AS $$
DECLARE
result varchar;
BEGIN
IF prev_ch IS NOT NULL AND (prev_ch = 'C' OR prev_ch = 'K' OR prev_ch = 'Q') THEN
result := '48';
ELSE
result := '8';
END IF;
RETURN result;
END;
$$ LANGUAGE plpgsql
IMMUTABLE
CALLED ON NULL INPUT;
CREATE FUNCTION cologne_phonetics_encode(prepped varchar) RETURNS varchar AS $$
DECLARE
result varchar;
prepped_len int;
prepped_array varchar[];
i int;
prev_ch varchar;
ch varchar;
next_ch varchar;
BEGIN
prepped_array := regexp_split_to_array(prepped, '');
prepped_len := array_length(prepped_array, 1);
result := '';
i := 1;
ch := prepped_array[1];
WHILE i <= prepped_len LOOP
IF (i + 1) <= prepped_len THEN
next_ch := prepped_array[i + 1];
ELSE
next_ch := NULL;
END IF;
IF ch = 'C' THEN
result := result || cologne_phonetics_encode_only_c(prev_ch, next_ch);
ELSIF ch = 'X' THEN
result := result || cologne_phonetics_encode_only_x(prev_ch);
ELSE
result := result || cologne_phonetics_encode_except_cx(ch, next_ch);
END IF;
prev_ch := ch;
ch := next_ch;
i := i + 1;
END LOOP;
RETURN result;
END;
$$ LANGUAGE plpgsql
IMMUTABLE
RETURNS NULL ON NULL INPUT;
CREATE FUNCTION cologne_phonetics_finalize(encoded varchar) RETURNS varchar AS $$
DECLARE
encoded_array varchar[];
result varchar;
ch varchar;
prev_ch varchar;
BEGIN
encoded_array := regexp_split_to_array(encoded, '');
FOREACH ch IN ARRAY encoded_array
LOOP
IF prev_ch IS NULL THEN
result := ch;
prev_ch := ch;
ELSIF ch != prev_ch AND ch != '0' THEN
result := result || ch;
prev_ch := ch;
END IF;
END LOOP;
RETURN result;
END;
$$ LANGUAGE plpgsql
IMMUTABLE
RETURNS NULL ON NULL INPUT;
CREATE FUNCTION cologne_phonetics(txt varchar) RETURNS varchar AS $$
DECLARE
prepped varchar;
encoded varchar;
result varchar;
BEGIN
prepped := cologne_phonetics_preprocess(txt);
encoded := cologne_phonetics_encode(prepped);
result := cologne_phonetics_finalize(encoded);
RETURN result;
END;
$$ LANGUAGE plpgsql
IMMUTABLE
RETURNS NULL ON NULL INPUT;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment