Skip to content

Instantly share code, notes, and snippets.

@leiless
Last active February 26, 2019 06:41
Show Gist options
  • Save leiless/b5699f6941dcc12d8754bca2fadae820 to your computer and use it in GitHub Desktop.
Save leiless/b5699f6941dcc12d8754bca2fadae820 to your computer and use it in GitHub Desktop.
sqlite3_bind_text16() will discard heading BOM(UTF-16) character, i.e. BOM-aware
/*
* Created 190102
*
* gcc -Wall -Wextra -std=c11 -O0 -g -DDEBUG -lsqlite3 sqlite_bom.c -o sqlite_bom
*
* see:
* https://www.sqlite.org/cintro.html
* https://www.sqlite.org/quickstart.html
* https://www.sqlite.org/howtocompile.html
* https://en.wikipedia.org/wiki/Byte_order_mark#UTF-16
*/
#include <stdio.h>
#include <errno.h>
#include <assert.h>
#include <unistd.h>
#include <stdint.h>
#include <locale.h>
#include <string.h>
#include <sqlite3.h>
typedef uint16_t char16_t;
static const char *db_file = "bom.db";
#define LOG(fmt, ...) printf(fmt "\n", ##__VA_ARGS__)
#define LOG_ERR(fmt, ...) fprintf(stderr, "ERR: " fmt "\n", ##__VA_ARGS__)
#ifdef DEBUG
#define LOG_DBG(fmt, ...) LOG("DBG: " fmt, ##__VA_ARGS__)
#else
#define LOG_DBG(fmt, ...) (void) (0, ##__VA_ARGS__)
#endif
/**
* @return always zero
* NOTE: if it returns a non-zero value
* sqlite3_exec() will return SQLITE_ABORT
* without invoking the callback again and
* without running any subsequent SQL statements
* see: https://www.sqlite.org/c3ref/exec.html
*/
static int callback(void *cookie __unused, int argc, char **argv, char **colv)
{
int i;
for (i = 0; i < argc; i++) {
LOG_DBG("%s = %s", colv[i], argv[i]);
}
return 0;
}
static int create_bom_table(sqlite3 *conn)
{
int rc;
const char *sql = "CREATE TABLE bom_tab("
"id INTEGER PRIMARY KEY,"
"dir_path TEXT NOT NULL,"
"file_name TEXT NOT NULL"
");";
char *errmsg = NULL;
rc = sqlite3_exec(conn, sql, callback, NULL, &errmsg);
if (rc != SQLITE_OK) {
LOG_ERR("sqlite3_exec() fail err: [%d] %s", rc, errmsg);
sqlite3_free(errmsg);
}
return rc;
}
/*
* see: https://www.sqlite.org/c3ref/prepare.html
*/
//static
int insert_into_tab(
sqlite3 *conn,
int id,
const char *dir_path,
const char *file_name)
{
int rc;
const char *sql = "INSERT INTO bom_tab VALUES(?, ?, ?);";
sqlite3_stmt *st;
int i = 0;
rc = sqlite3_prepare_v2(conn, sql, -1, &st, NULL);
if (rc != SQLITE_OK) {
LOG_ERR("sqlite3_prepare_v2() fail err: [%d] %s",
rc, sqlite3_errmsg(conn));
goto out_exit;
}
rc = sqlite3_bind_int(st, ++i, id);
if (rc) {
LOG_ERR("sqlite3_bind_int() fail i: %d err: [%d] %s",
i, rc, sqlite3_errmsg(conn));
goto out_exit2;
}
rc = sqlite3_bind_text(st, ++i, dir_path, -1, SQLITE_STATIC);
if (rc) {
LOG_ERR("sqlite3_bind_text() fail i: %d err: [%d] %s",
i, rc, sqlite3_errmsg(conn));
goto out_exit2;
}
rc = sqlite3_bind_text(st, ++i, file_name, -1, SQLITE_STATIC);
if (rc) {
LOG_ERR("sqlite3_bind_text() fail i: %d err: [%d] %s",
i, rc, sqlite3_errmsg(conn));
goto out_exit2;
}
rc = sqlite3_step(st);
if (rc != SQLITE_DONE) {
LOG_ERR("sqlite3_step() fail err: [%d] %s", rc, sqlite3_errmsg(conn));
goto out_exit2;
} else {
rc = SQLITE_OK; /* SQLITE_DONE = 101 */
}
out_exit2:
sqlite3_finalize(st);
out_exit:
return rc;
}
/**
* Naive UTF-16 strlen variant
* @return return number of characters in UTF-16 form
*
* see:
* https://stackoverflow.com/a/14528742
* https://stackoverflow.com/a/22128415
*/
static inline size_t strlen16(register const char16_t *str)
{
register size_t len = 0;
assert(str != NULL);
while (str[len++])
continue;
return len;
}
/*
* UTF-16 variant
*/
static int insert_into_tab_16(
sqlite3 *conn,
int id,
const char16_t *dir_path,
const char16_t *file_name)
{
int rc;
const char *sql = "INSERT INTO bom_tab VALUES(?, ?, ?);";
sqlite3_stmt *st;
int i = 0;
int len;
rc = sqlite3_prepare_v2(conn, sql, -1, &st, NULL);
if (rc != SQLITE_OK) {
LOG_ERR("sqlite3_prepare_v2() fail err: [%d] %s",
rc, sqlite3_errmsg(conn));
goto out_exit;
}
rc = sqlite3_bind_int(st, ++i, id);
if (rc) {
LOG_ERR("sqlite3_bind_int() fail i: %d err: [%d] %s",
i, rc, sqlite3_errmsg(conn));
goto out_exit2;
}
len = strlen16(dir_path) * sizeof(char16_t);
rc = sqlite3_bind_text16(st, ++i, dir_path, len, SQLITE_STATIC);
if (rc) {
LOG_ERR("sqlite3_bind_text() fail i: %d err: [%d] %s",
i, rc, sqlite3_errmsg(conn));
goto out_exit2;
}
len = strlen16(file_name) * sizeof(char16_t);
rc = sqlite3_bind_text16(st, ++i, file_name, len, SQLITE_STATIC);
if (rc) {
LOG_ERR("sqlite3_bind_text() fail i: %d err: [%d] %s",
i, rc, sqlite3_errmsg(conn));
goto out_exit2;
}
rc = sqlite3_step(st);
if (rc != SQLITE_DONE) {
LOG_ERR("sqlite3_step() fail err: [%d] %s", rc, sqlite3_errmsg(conn));
goto out_exit2;
} else {
rc = SQLITE_OK;
}
out_exit2:
sqlite3_finalize(st);
out_exit:
return rc;
}
static void select_from_tab(
sqlite3 *conn,
const char *dir_path,
const char *file_name)
{
int rc;
const char *sql = "SELECT id, dir_path, file_name FROM bom_tab "
"WHERE dir_path = ? AND file_name = ?;";
sqlite3_stmt *st;
int i = 0;
int id;
const unsigned char *dir_path_out;
const unsigned char *file_name_out;
rc = sqlite3_prepare_v2(conn, sql, -1, &st, NULL);
if (rc != SQLITE_OK) {
LOG_ERR("sqlite3_prepare_v2() fail err: [%d] %s",
rc, sqlite3_errmsg(conn));
return;
}
rc = sqlite3_bind_text(st, ++i, dir_path, -1, SQLITE_STATIC);
if (rc) {
LOG_ERR("sqlite3_bind_text() fail i: %d err: [%d] %s",
i, rc, sqlite3_errmsg(conn));
goto out_exit;
}
rc = sqlite3_bind_text(st, ++i, file_name, -1, SQLITE_STATIC);
if (rc) {
LOG_ERR("sqlite3_bind_text() fail i: %d err: [%d] %s",
i, rc, sqlite3_errmsg(conn));
goto out_exit;
}
while (1) {
rc = sqlite3_step(st);
if (rc == SQLITE_ROW) {
id = sqlite3_column_int(st, 0);
dir_path_out = sqlite3_column_text(st, 1);
file_name_out = sqlite3_column_text(st, 2);
LOG("Selected column id: %d", id);
} else if (rc == SQLITE_DONE) {
rc = SQLITE_OK;
break;
} else {
LOG_ERR("sqlite3_step() fail err: [%d] %s", rc, sqlite3_errmsg(conn));
break;
}
}
out_exit:
sqlite3_finalize(st);
}
static void select_from_tab_16(
sqlite3 *conn,
const char16_t *dir_path,
const char16_t *file_name)
{
int rc;
const char *sql = "SELECT id, dir_path, file_name FROM bom_tab "
"WHERE dir_path = ? AND file_name = ?;";
sqlite3_stmt *st;
int i = 0;
int len;
int id;
const char16_t *dir_path_out;
const char16_t *file_name_out;
rc = sqlite3_prepare_v2(conn, sql, -1, &st, NULL);
if (rc != SQLITE_OK) {
LOG_ERR("sqlite3_prepare_v2() fail err: [%d] %s",
rc, sqlite3_errmsg(conn));
return;
}
len = strlen16(dir_path) * sizeof(char16_t);
rc = sqlite3_bind_text16(st, ++i, dir_path, len, SQLITE_STATIC);
if (rc) {
LOG_ERR("sqlite3_bind_text() fail i: %d err: [%d] %s",
i, rc, sqlite3_errmsg(conn));
goto out_exit;
}
len = strlen16(file_name) * sizeof(char16_t);
rc = sqlite3_bind_text16(st, ++i, file_name, len, SQLITE_STATIC);
if (rc) {
LOG_ERR("sqlite3_bind_text() fail i: %d err: [%d] %s",
i, rc, sqlite3_errmsg(conn));
goto out_exit;
}
while (1) {
rc = sqlite3_step(st);
if (rc == SQLITE_ROW) {
id = sqlite3_column_int(st, 0);
dir_path_out = sqlite3_column_text16(st, 1);
file_name_out = sqlite3_column_text16(st, 2);
LOG("Selected column id: %d", id);
} else if (rc == SQLITE_DONE) {
rc = SQLITE_OK;
break;
} else {
LOG_ERR("sqlite3_step() fail err: [%d] %s", rc, sqlite3_errmsg(conn));
break;
}
}
out_exit:
sqlite3_finalize(st);
}
int main(int argc __unused, char **argv __unused)
{
int rc;
sqlite3 *conn;
int i = 0;
if (!setlocale(LC_CTYPE, "en_US.UTF-8")) LOG_ERR("setlocale(3) fail");
LOG("sizeof(char16_t): %zu", sizeof(char16_t));
LOG("SQLite version %s", SQLITE_VERSION);
LOG("");
if (unlink(db_file)) {
LOG_ERR("unlink(2) fail path: %s errno: %d", db_file, errno);
}
rc = sqlite3_open(db_file, &conn);
if (rc) {
LOG_ERR("Cannot open database(%s) err: [%d] %s\n",
db_file, rc, sqlite3_errmsg(conn));
goto out_exit;
}
rc = create_bom_table(conn);
if (rc) goto out_exit;
#if 1
/*
* Heading UTF16 BOM is controllable over sqlite3_bind_text16()
*/
LOG("Insert with BOM(UTF-16) id: %d", i);
rc = insert_into_tab_16(conn, i++, u"/home/sqlite3/ext", u"\uFEFF_README.md");
LOG("");
#elif 0
LOG("Insert without BOM id: %d\n", i);
rc = insert_into_tab_16(conn, i++, u"/home/sqlite3/ext", u"_README.md");
#else
LOG("Insert with BOM(UTF-8) id: %d\n", i);
rc = insert_into_tab(conn, i++, "/home/sqlite3/ext", "\xEF\xBB\xBF_README.md");
#endif
if (rc) goto out_exit;
LOG("Select with BOM(UTF-16)");
select_from_tab_16(conn, u"/home/sqlite3/ext", u"\uFEFF_README.md");
LOG("Select without BOM(UTF-16)");
select_from_tab_16(conn, u"/home/sqlite3/ext", u"_README.md");
LOG("\nSelect with BOM(UTF-8)");
select_from_tab(conn, "/home/sqlite3/ext", "\xEF\xBB\xBF_README.md");
LOG("Select without BOM(UTF-8)");
select_from_tab(conn, "/home/sqlite3/ext", "_README.md");
LOG("\ndone");
out_exit:
sqlite3_close(conn);
return rc;
}
@leiless
Copy link
Author

leiless commented Jan 3, 2019

Sample output:

sizeof(char16_t): 2
SQLite version 3.24.0

Insert with BOM(UTF-16)  id: 0

Select with BOM(UTF-16)
Selected column  id: 0
Select without BOM(UTF-16)
Selected column  id: 0

Select with BOM(UTF-8)
Select without BOM(UTF-8)

done

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment