Created
October 9, 2020 22:50
-
-
Save robrich/3e36fe454e7e4b520e29d63b23822fb5 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- FULL TEXT SEARCH | |
-- ================ | |
-- setup schema | |
CREATE DATABASE library; | |
USE library; | |
CREATE TABLE books ( | |
title VARCHAR(200) not null, | |
author VARCHAR(200) not null, | |
published DATE not null, | |
line INT not null, | |
content TEXT, | |
KEY (title, author, line) USING CLUSTERED COLUMNSTORE, | |
FULLTEXT (content) | |
); | |
-- load books | |
-- Thank you to http://www.gutenberg.org/ for the book text | |
CREATE PIPELINE books | |
AS LOAD DATA FS '/vagrant/books/*' | |
INTO TABLE books | |
FORMAT CSV | |
FIELDS TERMINATED BY '\t' | |
LINES TERMINATED BY '\n' | |
IGNORE 1 LINES; | |
-- start pipeline | |
TEST PIPELINE books LIMIT 10; | |
START PIPELINE books FOREGROUND LIMIT 1 BATCHES; | |
START PIPELINE books; | |
-- verify pipeline | |
SELECT * FROM books; | |
SELECT count(*) FROM books; | |
SELECT * FROM information_schema.PIPELINES_BATCHES_SUMMARY; | |
OPTIMIZE TABLE books FLUSH; | |
SELECT title, count(*) FROM books GROUP BY 1; | |
-- without the index | |
SELECT * FROM books WHERE content like '%Alice%'; | |
-- use full-text index | |
SELECT * FROM books WHERE MATCH (content) AGAINST ('Alice'); | |
SELECT title, count(*) FROM books WHERE MATCH (content) AGAINST ('Alice') GROUP BY 1; | |
SELECT * FROM books WHERE MATCH (content) AGAINST ('Peter'); | |
SELECT title, count(*) FROM books WHERE MATCH (content) AGAINST ('Peter') GROUP BY 1; | |
-- find multiple words | |
SELECT * FROM books WHERE MATCH (content) AGAINST ('Alice OR Peter'); | |
SELECT * FROM books WHERE MATCH (content) AGAINST ('Alice AND Peter'); | |
SELECT * FROM books WHERE MATCH (content) AGAINST ('Huck* OR Tom'); | |
SELECT title, count(*) FROM books WHERE MATCH (content) AGAINST ('Huck* OR Tom') GROUP BY 1; | |
SELECT * FROM books WHERE MATCH (content) AGAINST ('Huck* AND Tom'); | |
SELECT title, count(*) FROM books WHERE MATCH (content) AGAINST ('Huck* AND Tom') GROUP BY 1; | |
-- Highlight matching lines | |
SELECT HIGHLIGHT (content) AGAINST ('Alice') FROM books WHERE MATCH (content) AGAINST ('Alice'); | |
SELECT HIGHLIGHT (content) AGAINST ('Huck OR Tom') FROM books WHERE MATCH (content) AGAINST ('Huck* AND Tom'); | |
-- Cleanup | |
STOP PIPELINE books; | |
DROP PIPELINE books; | |
DROP TABLE books; | |
DROP DATABASE library; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment