Skip to content

Instantly share code, notes, and snippets.

@za-arthur
Last active February 21, 2017 15:54
Show Gist options
  • Save za-arthur/34dd444c3742ab736080a9d7aec9ecb6 to your computer and use it in GitHub Desktop.
Save za-arthur/34dd444c3742ab736080a9d7aec9ecb6 to your computer and use it in GitHub Desktop.
Meetup sai.msu.ru. FTS.

Load dump

psql apod < scripts/apod.dump

Install dictionaries

CREATE EXTENSION hunspell_en_us;
CREATE EXTENSION hunspell_ru_ru;

CREATE TEXT SEARCH CONFIGURATION apod_conf (copy=simple);
ALTER TEXT SEARCH CONFIGURATION apod_conf
	ALTER MAPPING FOR asciiword, asciihword, hword_asciipart WITH english_hunspell, english_stem;
ALTER TEXT SEARCH CONFIGURATION apod_conf
	ALTER MAPPING FOR hword, hword_part, word WITH russian_hunspell, russian_stem;

Examples:

SELECT ts_lexize('english_hunspell', 'booking');
SELECT ts_lexize('russian_hunspell', 'туши');

Add tsvector column

ALTER TABLE apod ADD COLUMN fts tsvector;
UPDATE apod SET fts =
	setweight(to_tsvector('apod_conf', coalesce(title,'')), 'A') ||
	setweight(to_tsvector('apod_conf', coalesce(text, '')), 'B');

CREATE FUNCTION apod_trigger()
RETURNS trigger as $$
BEGIN
	new.tsv :=
		setweight(to_tsvector('apod_conf', coalesce(new.title, '')), 'A') ||
		setweight(to_tsvector('apod_conf', coalesce(new.text, '')), 'B');
	RETURN new;
END
$$ LANGUAGE plpgsql;

CREATE TRIGGER apod_fts_update BEFORE INSERT OR UPDATE
	ON apod FOR EACH ROW EXECUTE procedure apod_trigger();

Example:

SELECT msg_id, title, lang, date, ts_headline('apod_conf', text, to_tsquery('apod_conf', 'milky & way & !galaxy')) AS text 
FROM (SELECT msg_id, title, lang, date::date, text 
	FROM apod 
	WHERE fts @@ to_tsquery('apod_conf', 'milky & way & !galaxy') 
	ORDER BY ts_rank(fts, to_tsquery('apod_conf', 'milky & way & !galaxy')) DESC 
	LIMIT 10) AS entries;

Install RUM

CREATE EXTENSION rum;

CREATE INDEX apod_tsv_idx ON apod USING rum (fts rum_tsvector_ops);

Examples:

SELECT msg_id, title, lang, date
	FROM apod 
	WHERE fts @@ to_tsquery('apod_conf', 'milky & way & !galaxy') 
	ORDER BY fts <=> to_tsquery('apod_conf', 'milky & way & !galaxy') 
	LIMIT 10;

EXPLAIN ANALYZE SELECT msg_id, title, lang, date
	FROM apod 
	WHERE fts @@ to_tsquery('apod_conf', 'milky & way & !galaxy') 
	ORDER BY fts <=> to_tsquery('apod_conf', 'milky & way & !galaxy') 
	LIMIT 10;

Text search with json and jsonb types

psql airports < scripts/airports.dump
CREATE EXTENSION ts_json;

Example:

SELECT ap FROM airports
	WHERE to_tsvector('apod_conf', jsonb_values(ap, ' ')) @@
		to_tsquery('apod_conf', 'georgia')
	LIMIT 10;

Use custom parser

CREATE EXTENSION ts_parser;

CREATE TEXT SEARCH CONFIGURATION ts_conf (parser=ts_parser);
ALTER TEXT SEARCH CONFIGURATION ts_conf
	ADD MAPPING FOR email, file, float, host, hword_numpart, int,
	numhword, numword, sfloat, uint, url, url_path, version
	WITH simple;
ALTER TEXT SEARCH CONFIGURATION ts_conf
	ALTER MAPPING FOR asciiword, asciihword, hword_asciipart WITH english_hunspell, english_stem;
ALTER TEXT SEARCH CONFIGURATION ts_conf
	ALTER MAPPING FOR hword, hword_part, word WITH russian_hunspell, russian_stem;

Example:

SELECT to_tsvector('english', 'pg_trgm') as def_parser,
       to_tsvector('ts_conf', 'pg_trgm')  as new_parser;
SELECT to_tsvector('english', '123-abc') as def_parser,
       to_tsvector('ts_conf', '123-abc')  as new_parser;

query table example

CREATE TYPE query_tag AS ENUM ('sn', 'color', 'bang', 'shape');
CREATE TABLE query (q tsquery, tag query_tag);

INSERT INTO query VALUES ('supernova & star', 'sn'),
	('black', 'color'),
	('big & bang & black & hole', 'bang'),
	('spiral & galaxy', 'shape'),
	('black & hole', 'color');

SELECT q, tag, title
	FROM query, apod
	WHERE apod.fts @@ q AND tag = 'color';

Hints to user

CREATE EXTENSION pg_trgm;

CREATE TABLE words AS SELECT word FROM
	ts_stat('SELECT to_tsvector(''simple'', title) || to_tsvector(''simple'', text) FROM apod');

CREATE INDEX words_idx ON words USING GIN (word gin_trgm_ops);

Example:

SELECT word FROM words WHERE word % 'mulky';

Prepare pglist

CREATE EXTENSION rum;

CREATE FUNCTION pglist_trigger()
RETURNS trigger as $$
BEGIN
	new.fts :=
		setweight(to_tsvector('english', coalesce(new.subject, '')), 'A') ||
		setweight(to_tsvector('english', coalesce(new.body_plain, '')), 'B') ||
		setweight(to_tsvector('english', coalesce(new.author, '')), 'C');
	RETURN new;
END
$$ LANGUAGE plpgsql;

CREATE TRIGGER pglist_fts_update BEFORE INSERT OR UPDATE
	ON pglist FOR EACH ROW EXECUTE procedure pglist_trigger();

CREATE INDEX pglist_idx ON pglist USING rum (fts rum_tsvector_ops);

CREATE INDEX pg_query_idx ON pg_query USING rum (q);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment