Skip to content

Instantly share code, notes, and snippets.

@hiropppe
Last active June 16, 2016 06:07
Show Gist options
  • Save hiropppe/488767fa9cfd6a55f9dd3218c0c70498 to your computer and use it in GitHub Desktop.
Save hiropppe/488767fa9cfd6a55f9dd3218c0c70498 to your computer and use it in GitHub Desktop.
knp distsim call test
/*
# VirtualBoxのディスクがIO負荷で(?)勝手にリードオンリーになった
VBoxManage setextradata "docker-host" "VBoxInternal/Devices/ahci/0/LUN0/Config/FlushInterval" 1000000
# 実行ファイル
gcc -DHAVE_CONFIG_H -I. -I.. -I/knp-4.16/CRF++-0.58 -I/knp-4.16/crf -DUSE_CRF -DKNP_DICT="\"/usr/local/share/knp/dict"\" -DKNP_RULE="\"/usr/local/share/knp/rule"\" -DKNP_RC_DEFAULT="\"/usr/local/etc/knprc"\" -DCF_VERSION="\"CF1.1"\" -DCDB -I/knp-4.16/cdb -I/usr/local//include -I/knp-4.16/distsim -g3 -O0 -MT dsim.o -MD -MP -MF .deps/dsim.Tpo -c -o dsim.o dsim.c
/bin/sh ../libtool --tag=CC --mode=link g++ -g3 -O0 -o dsim dsim.o tools.o db.o -lm /knp-4.16/distsim/libdistsim.la -lstdc++ -L/knp-4.16/cdb -lcdb -lz
# 共有ライブラリ
## distsim
/bin/sh ../libtool --tag=CXX --mode=compile g++ -DHAVE_CONFIG_H -I. -I.. -I../cdb -g -O2 -MT distsim_for_knp.lo -MD -MP -MF .deps/distsim_for_knp.Tpo -c -fPIC -o distsim_for_knp.lo distsim_for_knp.cc
/bin/sh ../libtool --tag=CXX --mode=compile g++ -DHAVE_CONFIG_H -I. -I.. -I../cdb -g -O2 -MT distsim.lo -MD -MP -MF .deps/distsim.Tpo -c -fPIC -o distsim.lo distsim.cc
## cdb
make shared
cp libcdb.so /usr/local/lib
ldconfig
## system
;コンパイル
gcc -DHAVE_CONFIG_H -I. -I.. -DCDB -I/knp-4.16/cdb -I/usr/local//include -I/knp-4.16/distsim -g3 -O0 -MT tools.o -MD -MP -MF .deps/tools.Tpo -c -fPIC -o tools.o tools.c
gcc -DHAVE_CONFIG_H -I. -I.. -DCDB -I/knp-4.16/cdb -I/usr/local//include -I/knp-4.16/distsim -g3 -O0 -MT db.o -MD -MP -MF .deps/db.Tpo -c -fPIC -o db.o db.c
gcc -DHAVE_CONFIG_H -I. -I.. -DCDB -I/knp-4.16/cdb -I/usr/local//include -I/knp-4.16/distsim -g3 -O0 -MT dsim.o -MD -MP -MF .deps/dsim.Tpo -c -fPIC -o dsim.o dsim.c
;共有ライブラリの作成
g++ -shared -o libdsim.so dsim.o db.o tools.o ../distsim/distsim.o ../distsim/distsim_for_knp.o -L../cdb/ -lstdc++ -lcdb -lz
cp libdsim.so /usr/local/lib
ldconfig
gcc -I./ dmain.c -o dmain -ldsim
; 共有ライブラリのバージョニングする場合
g++ -shared -Wl,-soname=libdsim.so.1 -o libdsim.so.1.0.0 dsim.o db.o tools.o ../distsim/distsim.o ../distsim/distsim_for_knp.o -L../cdb/ -lstdc++ -lcdb -lz
cp libdsim.so.1.0.0 /usr/local/lib
ldconfig
cd /usr/local/lib
ln -s libdsim.so.1.0.0 libdsim.so
*/
/* dsim.h */
#ifndef DSIM_H
#define DSIM_H
void init_dsim(char *dic_name);
float similarity(char *rep1, char *rep2);
#endif
/* dsim.c */
#include <stdio.h>
#include <sys/stat.h>
#include <const.h>
#include <path.h>
#include "distsim_for_knp.h"
#include "dsim.h"
char static_buffer1[5120];
char static_buffer2[5120];
DBM_FILE mrph2id_db;
char *rep2id2(char *rep, int rep_len, char *buffer)
{
/* MRPH_MAX * 9(max8桁+"+"の分)以上あるので溢れない */
buffer[0] = '\0';
if (rep && rep_len > 0) {
char *token_start, *token, *value;
char *copied_rep = (char *)malloc_data(sizeof(char) * rep_len + 1, "rep2id");
strncpy(copied_rep, rep, rep_len);
copied_rep[rep_len] = '\0';
token_start = strtok(copied_rep, "+?");
token = token_start;
while (token) {
value = db_get(mrph2id_db, token);
if (value) {
if (buffer[0]) /* 2つ目以降 */
strncat(buffer, rep + (token - 1 - token_start), 1);
strcat(buffer, value);
free(value);
}
token = strtok(NULL, "+?");
}
free(copied_rep);
/* fprintf(stderr, ";; %s -> %s\n", rep, buffer); */
}
return buffer;
}
DBM_FILE *open_dict(char *dic_name)
{
DBM_FILE db;
db = DB_open(dic_name, O_RDONLY, 0);
return db;
}
void init_dsim(char *dic_name)
{
mrph2id_db = open_dict(dic_name);
init_distsim();
}
float similarity(char *rep1, char *rep2)
{
char *id1 = rep2id2(rep1, strlen(rep1), &(static_buffer1[0]));
char *id2 = rep2id2(rep2, strlen(rep2), &(static_buffer2[0]));
float score = calc_distsim(id1, id2);
printf("distsim_score(%s:%s, %s:%s)=%f\n",rep1,id1,rep2,id2,score);
return score;
}
/* main */
#include <stdio.h>
#include <dsim.h>
int main()
{
init_dsim("mrph2id.db");
char rep1[100], rep2[100];
scanf("%s %s", rep1, rep2);
float score = similarity(rep1, rep2);
printf("Score: %f\n",score);
}
/* Python */
import ctypes
dsim = ctypes.cdll.LoadLibrary('libdsim.so.1')
#dsim = ctypes.cdll.LoadLibrary('libdsim.so')
dsim.init_dsim(ctypes.c_char_p('mrph2id.db'))
similarity = dsim.similarity
similarity.restype = ctypes.c_float
similarity.argtypes = [ctypes.c_char_p, ctypes.c_char_p]
similarity('林檎/りんご', '梨/なし')
similarity('林檎/りんご', '引力/いんりょく')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment