Skip to content

Instantly share code, notes, and snippets.

@ochafik
Last active November 17, 2024 02:50
Show Gist options
  • Select an option

  • Save ochafik/5e6d5f10423d881cd1f8ecf552648ebc to your computer and use it in GitHub Desktop.

Select an option

Save ochafik/5e6d5f10423d881cd1f8ecf552648ebc to your computer and use it in GitHub Desktop.
crash repro for sqlite_lembed large inputs
#include <assert.h>
#include <stdio.h>
#include <sqlite3.h>
// #include <sqlite-lembed.h>
int sqlite3_lembed_init(sqlite3 *db, char **pzErrMsg, const sqlite3_api_routines *pApi);
// #define EXT_PATH "./dist/lembed0"
#define EXT_PATH "./build/lembed0"
#define MODEL1_PATH "./dist/.models/all-MiniLM-L6-v2.e4ce9877.q8_0.gguf"
static void execute1s(sqlite3 *db, const char *sql, const char *arg) {
sqlite3_stmt *stmt;
int rc;
assert(sqlite3_prepare_v2(db, sql, -1, &stmt, 0) == SQLITE_OK);
assert(sqlite3_bind_text(stmt, 1, arg, -1, SQLITE_STATIC) == SQLITE_OK); // (1-based index);
// rc = sqlite3_step(stmt);
// assert(rc == SQLITE_ROW || rc == SQLITE_DONE);
assert(sqlite3_step(stmt) == SQLITE_DONE);
assert(sqlite3_finalize(stmt) == SQLITE_OK);
}
int main() {
sqlite3 *db;
sqlite3_stmt *stmt;
assert(sqlite3_auto_extension((void (*)())sqlite3_lembed_init) == SQLITE_OK);
assert(sqlite3_open("test.db", &db) == SQLITE_OK);
// assert(sqlite3_open(":memory:", &db) == SQLITE_OK);
// assert(sqlite3_enable_load_extension(db, 1) == SQLITE_OK);
// assert(sqlite3_load_extension(db, EXT_PATH, 0, 0) == SQLITE_OK);
// assert(sqlite3_load_extension(db, EXT_PATH, "sqlite3_lembed_init", 0) == SQLITE_OK);
assert(sqlite3_exec(db, "SELECT lembed_version();", 0, 0, 0) == SQLITE_OK);
assert(sqlite3_exec(db, "SELECT sqlite_version();", 0, 0, 0) == SQLITE_OK);
//sqlite3_prepare_v2(db, "SELECT name FROM sqlite_master WHERE type='function';", -1, &stmt, 0);
sqlite3_prepare_v2(db, "SELECT name FROM sqlite_master;", -1, &stmt, 0);
while (sqlite3_step(stmt) == SQLITE_ROW) {
const char *func_name = (const char *)sqlite3_column_text(stmt, 0);
printf("Available function: %s\n", func_name);
}
assert(sqlite3_finalize(stmt) == SQLITE_OK);
assert(sqlite3_exec(db, "insert into lembed_models(name, model) values ('default', lembed_model_from_file('" MODEL1_PATH "');", 0, 0, 0) == SQLITE_OK);
// execute1s(
// db,
// "insert into lembed_models(name, model) values ('default', lembed_model_from_file(?));",
// MODEL1_PATH);
execute1s(
db,
"select lembed('default', ?);",
"a <( TypeScript](https://github.com/Microsoft), [Alberto Schiabel](https://github.com/jkomyno), [Alvis HT Tang](https://github.com/alvis), [Andrew Makarov](https://github.com/r3nya), [Benjamin Toueg](https://github.com/btoueg), [Chigozirim C.](), [Deividas Bakanas](https://github.com/DeividasBakanas), [Eugene Y. Q. Shen](https://github.com/eyqs), [Hannes Magnusson](https://github.com/Hannes-Magnusson-CK), [Huw](https://github.com/hoo29), [Kelvin Jin](https://github.com/kjin), [Klaus Meinhardt](https://github.com/ajafff), [Lishude](https://github.com/islishude), [Mariusz Wiktorczyk](https://github.com/mwiktorczyk), [Mohsen Azimi](https://github.com/mohsen1), [Nicolas Even](https://github.com/n-e),[Samuel Ainsworth](https://github.com/samuela), [Kyle Uehlein](https://github.com/kuehlein), [Thanik Bhongbhibhat](https://github.com/bhongy), [Marcin Kopacz](https://github.com/LinusU), [wafuwafu13](https://github.com/wafuwafu13), [Matteo Collina](https://github.com/mcollina), and [Dmitry Semigradsky](https://github.com/Semigradsky).\" )");
assert(sqlite3_close(db) == SQLITE_OK);
return 0;
}
# /// script
# requires-python = ">=3.11"
# dependencies = [
# "aiosqlite",
# "gguf",
# "huggingface-hub",
# "more-itertools",
# "openai",
# "pydantic",
# "sentencepiece",
# "sqlite-lembed",
# "sqlite-rembed",
# "sqlite-vec",
# ]
# ///
import time
import traceback
import aiosqlite
import asyncio
from contextlib import asynccontextmanager
import logging
from more_itertools import unzip
import os
from pydantic import BaseModel
import sqlite_lembed
import sqlite_rembed
import sqlite_vec
import sys
from typing import Dict, Optional, Union, Literal
from huggingface_hub import hf_hub_download
async def main() :
try:
print('main')
async with aiosqlite.connect(':memory:') as db:
# print(sqlite_lembed.loadable_path())
print('connected')
await db.enable_load_extension(True)
await db.load_extension('./dist/lembed0')
# await db.load_extension('./build/lembed0')
# await db.load_extension(sqlite_lembed.loadable_path())
print('loaded')
await db.execute(f'''
INSERT INTO lembed_models(name, model)
VALUES ('default', lembed_model_from_file(?));
''', (
hf_hub_download(
repo_id='nomic-ai/nomic-embed-text-v1.5-GGUF',
filename='nomic-embed-text-v1.5.Q8_0.gguf'
),
))
print(type((await (await db.execute('SELECT lembed(?, ?)', ('default', 'ab' * 1000))).fetchone())[0]))
except Exception:
print(traceback.format_exc())
if __name__ == '__main__':
asyncio.run(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment