-
-
Save ochafik/5e6d5f10423d881cd1f8ecf552648ebc to your computer and use it in GitHub Desktop.
crash repro for sqlite_lembed large inputs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include <assert.h> | |
| #include <stdio.h> | |
| #include <sqlite3.h> | |
| // #include <sqlite-lembed.h> | |
| int sqlite3_lembed_init(sqlite3 *db, char **pzErrMsg, const sqlite3_api_routines *pApi); | |
| // #define EXT_PATH "./dist/lembed0" | |
| #define EXT_PATH "./build/lembed0" | |
| #define MODEL1_PATH "./dist/.models/all-MiniLM-L6-v2.e4ce9877.q8_0.gguf" | |
| static void execute1s(sqlite3 *db, const char *sql, const char *arg) { | |
| sqlite3_stmt *stmt; | |
| int rc; | |
| assert(sqlite3_prepare_v2(db, sql, -1, &stmt, 0) == SQLITE_OK); | |
| assert(sqlite3_bind_text(stmt, 1, arg, -1, SQLITE_STATIC) == SQLITE_OK); // (1-based index); | |
| // rc = sqlite3_step(stmt); | |
| // assert(rc == SQLITE_ROW || rc == SQLITE_DONE); | |
| assert(sqlite3_step(stmt) == SQLITE_DONE); | |
| assert(sqlite3_finalize(stmt) == SQLITE_OK); | |
| } | |
| int main() { | |
| sqlite3 *db; | |
| sqlite3_stmt *stmt; | |
| assert(sqlite3_auto_extension((void (*)())sqlite3_lembed_init) == SQLITE_OK); | |
| assert(sqlite3_open("test.db", &db) == SQLITE_OK); | |
| // assert(sqlite3_open(":memory:", &db) == SQLITE_OK); | |
| // assert(sqlite3_enable_load_extension(db, 1) == SQLITE_OK); | |
| // assert(sqlite3_load_extension(db, EXT_PATH, 0, 0) == SQLITE_OK); | |
| // assert(sqlite3_load_extension(db, EXT_PATH, "sqlite3_lembed_init", 0) == SQLITE_OK); | |
| assert(sqlite3_exec(db, "SELECT lembed_version();", 0, 0, 0) == SQLITE_OK); | |
| assert(sqlite3_exec(db, "SELECT sqlite_version();", 0, 0, 0) == SQLITE_OK); | |
| //sqlite3_prepare_v2(db, "SELECT name FROM sqlite_master WHERE type='function';", -1, &stmt, 0); | |
| sqlite3_prepare_v2(db, "SELECT name FROM sqlite_master;", -1, &stmt, 0); | |
| while (sqlite3_step(stmt) == SQLITE_ROW) { | |
| const char *func_name = (const char *)sqlite3_column_text(stmt, 0); | |
| printf("Available function: %s\n", func_name); | |
| } | |
| assert(sqlite3_finalize(stmt) == SQLITE_OK); | |
| assert(sqlite3_exec(db, "insert into lembed_models(name, model) values ('default', lembed_model_from_file('" MODEL1_PATH "');", 0, 0, 0) == SQLITE_OK); | |
| // execute1s( | |
| // db, | |
| // "insert into lembed_models(name, model) values ('default', lembed_model_from_file(?));", | |
| // MODEL1_PATH); | |
| execute1s( | |
| db, | |
| "select lembed('default', ?);", | |
| "a <( TypeScript](https://github.com/Microsoft), [Alberto Schiabel](https://github.com/jkomyno), [Alvis HT Tang](https://github.com/alvis), [Andrew Makarov](https://github.com/r3nya), [Benjamin Toueg](https://github.com/btoueg), [Chigozirim C.](), [Deividas Bakanas](https://github.com/DeividasBakanas), [Eugene Y. Q. Shen](https://github.com/eyqs), [Hannes Magnusson](https://github.com/Hannes-Magnusson-CK), [Huw](https://github.com/hoo29), [Kelvin Jin](https://github.com/kjin), [Klaus Meinhardt](https://github.com/ajafff), [Lishude](https://github.com/islishude), [Mariusz Wiktorczyk](https://github.com/mwiktorczyk), [Mohsen Azimi](https://github.com/mohsen1), [Nicolas Even](https://github.com/n-e),[Samuel Ainsworth](https://github.com/samuela), [Kyle Uehlein](https://github.com/kuehlein), [Thanik Bhongbhibhat](https://github.com/bhongy), [Marcin Kopacz](https://github.com/LinusU), [wafuwafu13](https://github.com/wafuwafu13), [Matteo Collina](https://github.com/mcollina), and [Dmitry Semigradsky](https://github.com/Semigradsky).\" )"); | |
| assert(sqlite3_close(db) == SQLITE_OK); | |
| return 0; | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # /// script | |
| # requires-python = ">=3.11" | |
| # dependencies = [ | |
| # "aiosqlite", | |
| # "gguf", | |
| # "huggingface-hub", | |
| # "more-itertools", | |
| # "openai", | |
| # "pydantic", | |
| # "sentencepiece", | |
| # "sqlite-lembed", | |
| # "sqlite-rembed", | |
| # "sqlite-vec", | |
| # ] | |
| # /// | |
| import time | |
| import traceback | |
| import aiosqlite | |
| import asyncio | |
| from contextlib import asynccontextmanager | |
| import logging | |
| from more_itertools import unzip | |
| import os | |
| from pydantic import BaseModel | |
| import sqlite_lembed | |
| import sqlite_rembed | |
| import sqlite_vec | |
| import sys | |
| from typing import Dict, Optional, Union, Literal | |
| from huggingface_hub import hf_hub_download | |
| async def main() : | |
| try: | |
| print('main') | |
| async with aiosqlite.connect(':memory:') as db: | |
| # print(sqlite_lembed.loadable_path()) | |
| print('connected') | |
| await db.enable_load_extension(True) | |
| await db.load_extension('./dist/lembed0') | |
| # await db.load_extension('./build/lembed0') | |
| # await db.load_extension(sqlite_lembed.loadable_path()) | |
| print('loaded') | |
| await db.execute(f''' | |
| INSERT INTO lembed_models(name, model) | |
| VALUES ('default', lembed_model_from_file(?)); | |
| ''', ( | |
| hf_hub_download( | |
| repo_id='nomic-ai/nomic-embed-text-v1.5-GGUF', | |
| filename='nomic-embed-text-v1.5.Q8_0.gguf' | |
| ), | |
| )) | |
| print(type((await (await db.execute('SELECT lembed(?, ?)', ('default', 'ab' * 1000))).fetchone())[0])) | |
| except Exception: | |
| print(traceback.format_exc()) | |
| if __name__ == '__main__': | |
| asyncio.run(main()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment