Skip to content

Instantly share code, notes, and snippets.

@oelmekki
Last active July 26, 2016 15:28
Show Gist options
  • Save oelmekki/a513dfdb679ac59c4a5e38fa4a675449 to your computer and use it in GitHub Desktop.
Save oelmekki/a513dfdb679ac59c4a5e38fa4a675449 to your computer and use it in GitHub Desktop.
Order sentences by similarities in postgres
-- $ createdb test
-- $ psql test
CREATE TABLE sentences( words varchar(255)[] );
INSERT INTO sentences VALUES( ARRAY['hello', 'world', 'how', 'are', 'you', 'doing', 'today'] );
INSERT INTO sentences VALUES( ARRAY['how', 'are', 'you', 'doing', 'today', 'Jim'] );
INSERT INTO sentences VALUES( ARRAY['Maybe', 'this', 'sentence', 'will', 'not', 'be', 'as', 'close'] );
-- extract as function for less typing
CREATE FUNCTION array_varchar_intersect( varchar(255)[], varchar(255)[] )
RETURNS varchar(255)[]
language sql
as $FUNCTION$
SELECT ARRAY(
SELECT UNNEST( lower( $1::text )::varchar(255)[] )
INTERSECT
SELECT UNNEST( lower( $2::text )::varchar(255)[] )
);
$FUNCTION$;
-- Input is `ARRAY[ 'Are', 'you', 'doing', 'fine', 'jim' ]`
SELECT words, array_varchar_intersect( ARRAY[ 'Are', 'you', 'doing', 'fine', 'jim' ], words ) as matching
FROM sentences
ORDER BY array_length( array_varchar_intersect( ARRAY[ 'Are', 'you', 'doing', 'fine', 'jim' ], words ), 1 ) DESC NULLS LAST;
-- words | matching
-- --------------------------------------------+---------------------
-- {how,are,you,doing,today,Jim} | {are,you,jim,doing}
-- {hello,world,how,are,you,doing,today} | {are,you,doing}
-- {Maybe,this,sentence,will,not,be,as,close} | {}
-- (3 rows)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment