Last active
February 6, 2023 18:49
-
-
Save panchicore/d20bed8de919ce772778b06bdb339477 to your computer and use it in GitHub Desktop.
https://www.postgresql.org/docs/current/textsearch-controls.html - PostgreSQL - 12.3. Controlling Text Search
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
create index if not exists idx_text_fts | |
on public.metadata_texts using gin (to_tsvector('english'::regconfig, text)); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SELECT mt.id, | |
ts_headline('english', mt.text, query, 'MaxWords=10,MinWords=2,MaxFragments=2') as headlines, | |
ts_rank(to_tsvector(mt.text), query) AS rank | |
FROM metadata_texts as mt, websearch_to_tsquery('english', 'generative') query | |
WHERE to_tsvector(mt.text) @@ query | |
ORDER BY rank DESC | |
LIMIT 10; | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[ | |
{ | |
"id": 7, | |
"headlines": "recent development in <b>generative</b> AI e.g. chatGPT, the public ... opportunity with this new <b>generative</b> AI, some of the risks", | |
"rank": 0.08654518 | |
} | |
] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* A set of functions called "actions" for `search` | |
*/ | |
function getKeywords(str){ | |
const regex = /\<b>(.+?)\<\/b>/gm; | |
let m; | |
let keywords = []; | |
while ((m = regex.exec(str)) !== null) { | |
if (m.index === regex.lastIndex) { | |
regex.lastIndex++; | |
} | |
m.forEach((match, groupIndex) => { | |
if(groupIndex > 0 && !keywords.includes(match)){ | |
keywords.push(match) | |
} | |
}); | |
} | |
return keywords; | |
} | |
export default { | |
search: async (ctx, next) => { | |
const {query} = ctx.request.query | |
try { | |
const knex = strapi.db.connection; | |
const matches = await knex.select( | |
[ | |
'id', | |
knex.raw(`ts_headline('english', text, query, 'MaxWords=10,MinWords=2,MaxFragments=2') as headlines`), | |
knex.raw(`ts_rank(to_tsvector(text), query) AS rank`) | |
] | |
).fromRaw( | |
`metadata_texts, websearch_to_tsquery('english', '${query}') query` | |
).whereRaw( | |
'to_tsvector(text) @@ query' | |
).orderBy( | |
'rank', 'desc' | |
).limit( | |
10 | |
) | |
let results = [] | |
for (let i = 0; i < matches.length; i++) { | |
results.push( | |
{data: matches[i], keywords: getKeywords(matches[i].headlines)} | |
) | |
} | |
ctx.body = results; | |
} catch (err) { | |
console.log(err) | |
ctx.body = err; | |
} | |
} | |
}; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[ | |
{ | |
"data": { | |
"id": 7, | |
"headlines": "recent development in <b>generative</b> AI e.g. chatGPT, the public ... opportunity with this new <b>generative</b> AI, some of the risks", | |
"rank": 0.08654518 | |
}, | |
"keywords": [ | |
"generative" | |
] | |
} | |
] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
'use strict' | |
module.exports = { | |
async up(knex) { | |
// You have full access to the Knex.js API with an already initialized connection to the database | |
console.log("migrating....", __filename) | |
const res = await knex.schema.withSchema('public').raw( | |
`create index if not exists idx_text_fts on public.metadata_texts using gin (to_tsvector('english'::regconfig, text));` | |
) | |
console.log(res) | |
}, | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment