Skip to content

Instantly share code, notes, and snippets.

@waltarix
Created March 4, 2017 05:40
Show Gist options
  • Save waltarix/9735e09cc405bc785d95024360ecb651 to your computer and use it in GitHub Desktop.
Save waltarix/9735e09cc405bc785d95024360ecb651 to your computer and use it in GitHub Desktop.
migemo function for sqlite3
/*
* Written by Alexey Tourbin <[email protected]>.
*
* The author has dedicated the code to the public domain. Anyone is free
* to copy, modify, publish, use, compile, sell, or distribute the original
* code, either in source code form or as a compiled binary, for any purpose,
* commercial or non-commercial, and by any means.
*/
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include <pcre.h>
#include <sqlite3ext.h>
#include "migemo.h"
SQLITE_EXTENSION_INIT1
typedef struct {
char *s;
pcre *p;
pcre_extra *e;
} cache_entry;
#ifndef CACHE_SIZE
#define CACHE_SIZE 16
#endif
static
void func_migemo(sqlite3_context *ctx, int argc, sqlite3_value **argv)
{
const char *re, *str;
pcre *p;
pcre_extra *e;
assert(argc == 2);
re = (const char *) sqlite3_value_text(argv[0]);
if (!re) {
sqlite3_result_error(ctx, "no word", -1);
return;
}
str = (const char *) sqlite3_value_text(argv[1]);
if (!str) {
str = "";
}
/* simple LRU cache */
{
int i;
int found = 0;
cache_entry *cache = sqlite3_user_data(ctx);
assert(cache);
for (i = 0; i < CACHE_SIZE && cache[i].s; i++)
if (strcmp(re, cache[i].s) == 0) {
found = 1;
break;
}
if (found) {
if (i > 0) {
cache_entry c = cache[i];
memmove(cache + 1, cache, i * sizeof(cache_entry));
cache[0] = c;
}
}
else {
cache_entry c;
const char *err;
int pos;
migemo *m;
m = migemo_open("/usr/local/opt/cmigemo/share/migemo/utf-8/migemo-dict");
unsigned char *migemo_pattern;
migemo_pattern = migemo_query(m, (const unsigned char*)re);
c.p = pcre_compile((char*)migemo_pattern, 0, &err, &pos, NULL);
migemo_release(m, migemo_pattern);
migemo_close(m);
if (!c.p) {
char *e2 = sqlite3_mprintf("%s: %s (offset %d)", re, err, pos);
sqlite3_result_error(ctx, e2, -1);
sqlite3_free(e2);
return;
}
c.e = pcre_study(c.p, 0, &err);
c.s = strdup(re);
if (!c.s) {
sqlite3_result_error(ctx, "strdup: ENOMEM", -1);
pcre_free(c.p);
pcre_free(c.e);
return;
}
i = CACHE_SIZE - 1;
if (cache[i].s) {
free(cache[i].s);
assert(cache[i].p);
pcre_free(cache[i].p);
pcre_free(cache[i].e);
}
memmove(cache + 1, cache, i * sizeof(cache_entry));
cache[0] = c;
}
p = cache[0].p;
e = cache[0].e;
}
{
int rc;
assert(p);
rc = pcre_exec(p, e, str, strlen(str), 0, 0, NULL, 0);
sqlite3_result_int(ctx, rc >= 0);
return;
}
}
int sqlite3_extension_init(sqlite3 *db, char **err, const sqlite3_api_routines *api)
{
SQLITE_EXTENSION_INIT2(api)
cache_entry *cache = calloc(CACHE_SIZE, sizeof(cache_entry));
if (!cache) {
*err = "calloc: ENOMEM";
return 1;
}
sqlite3_create_function(db, "migemo", 2, SQLITE_UTF8, cache, func_migemo, NULL, NULL);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment