Created
October 22, 2024 01:30
-
-
Save slvnperron/a52916e71c82e3955c53aff18cc1741d to your computer and use it in GitHub Desktop.
Botpress Table Domain Extractor
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// The Table we want to analyze | |
const Table: FromagesTable = FromagesTable | |
const TableName = 'FromagesTable' | |
// List of columns we want to analyze the unique values and count of | |
const columns: Array<keyof Awaited<ReturnType<Table['getRecord']>>> = [ | |
'alcools', | |
'familles', | |
'flaveurs', | |
'fromageries', | |
'laits', | |
'pastilles', | |
'pate', | |
'region', | |
'texture', | |
'utilisations' | |
] | |
let filterTemplate = {} | |
let content = '' | |
for (let col of columns) { | |
// 'group' to get the unique values and how many records there are for each of them | |
const rows = await Table.findRecords({ | |
group: { | |
[col]: ['key', 'count'] | |
} | |
}) | |
// Because values cam be a list of options (comma-separated), we split them by comma | |
let values = rows | |
.map((x) => (x[col] ?? '').toString().split(',')) | |
.flat() | |
.filter((x) => !!x && x.length) | |
.map((x) => x.trim()) | |
.filter((x) => !!x && x.length) | |
// Only take the unique values (discard duplicates) | |
values = _.uniq(values) | |
filterTemplate[col] = { $regex: '...' } | |
// We need to count how many unique items there are for each unique values | |
// Because we had to split by comma-separated, more than one group can contain each value | |
// So we use a reducer | |
const counts = values.reduce((list, val) => { | |
const count = rows | |
.filter((x) => x[col]?.includes(val)) | |
.reduce((acc, cur) => { | |
return acc + (cur?.[col.toString() + 'Count'] ?? 0) | |
}, 0) | |
list[val] = count | |
return list | |
}, {}) | |
content += ` | |
### Analysis of column "${col.toString()}" | |
The column "${col.toString()}" can be hard-filtered with the following ${TableName} filter: | |
\`{ filter: { "${col.toString()}": { "$regex": "value" } } \`} expression. | |
This filter should be used when possible. You can also combine both the "filter" and "search". | |
For example: \`{ search: "poivre", filter: ... }\` | |
Here are the possible values to filter by: | |
${values.map((x) => `- "${x}" | Count = ${counts[x]}`).join('\n')} | |
`.trim() | |
content += '\n\n---\n\n' | |
} | |
content += ` | |
## Filtering by multiple columns | |
Multiple filters can be used at once: | |
${JSON.stringify(filterTemplate, null, 2)} | |
This is a MongoDB-like query language, so you can also use "$or" and "$and" etc. | |
` | |
await client.uploadFile({ | |
key: 'domain.txt', | |
index: true, | |
publicContentImmediatelyAccessible: true, | |
content, | |
tags: { | |
source: 'knowledge-base', | |
kbId: 'kb-4363f5ae1a', | |
title: 'Table Analysis' | |
} | |
}) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment