Skip to content

Instantly share code, notes, and snippets.

@DeoLeung
Created January 22, 2024 10:01
Show Gist options
  • Save DeoLeung/ebd3f14c96cfc95ad0a025caaf114b3d to your computer and use it in GitHub Desktop.
Save DeoLeung/ebd3f14c96cfc95ad0a025caaf114b3d to your computer and use it in GitHub Desktop.
sqlalchemy recipe for paradedb
"""
sqlalchemy discussion: https://github.com/sqlalchemy/sqlalchemy/discussions/10841
"""
def dump_dict(v):
"""recursively dump dict into rust style, which no quotes for key"""
match v:
case str():
return f'"{v}"'
case int():
return f'{v}'
case float():
return f'{v}'
case dict():
items = []
for i, j in v.items():
items.append(f'{i}: {dump_dict(j)}')
return '{ ' + ','.join(items) + ' }'
case _:
raise NotImplementedError(v)
def kw_kv(k, v):
"""wrap into text as postgresql function kwargs 'k => v'"""
match v:
case dict():
return text(f"{k} => '{dump_dict(v)}'")
case _:
return text(f"{k} => '{v}'")
def create_bm25_index(
index_name: str,
schema_name: str,
table_name: str,
key_field: str,
text_fields: dict[str, dict] = None,
numeric_fields: dict[str, dict] = None,
boolean_fields: dict[str, dict] = None,
json_fields: dict[str, dict] = None,
):
"""helper function to generate bm25 index create ddl"""
args = [
kw_kv('index_name', index_name),
kw_kv('schema_name', schema_name),
kw_kv('table_name', table_name),
kw_kv('key_field', key_field),
]
for k, v in zip(
['text_fields', 'numeric_fields', 'boolean_fields', 'json_fields'],
[text_fields, numeric_fields, boolean_fields, json_fields]):
if v is not None:
args.append(kw_kv(k, v))
return func.paradedb.create_bm25(*args)
def drop_bm25_index(index_name: str):
"""helper function to generate bm25 index drop ddl"""
return func.paradedb.drop_bm25(text(f"'{index_name}'"))
# add event to table
table = Table(...)
create_index = create_bm25_index(
index_name=f'{table.name}_search_idx',
schema_name=f'{table.schema}',
table_name=f'{table.name}',
key_field='id',
text_fields={
'description': {
'tokenizer': {
'type': 'chinese_compatible'
}
},
'name': {
'tokenizer': {
'type': 'chinese_compatible'
}
},
'category': {
'tokenizer': {
'type': 'raw'
}
},
},
numeric_fields={
'account_id': {
'tokenizer': {
'type': 'int4'
}
},
},
)
drop_index = drop_bm25_index(f'{table.name}_search_idx')
event.listen(table, 'after_create', DDL(f'CALL {create_index};'))
event.listen(table, 'after_drop', DDL(f'CALL {drop_index};'))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment