Last active
June 16, 2016 06:07
-
-
Save hiropppe/488767fa9cfd6a55f9dd3218c0c70498 to your computer and use it in GitHub Desktop.
knp distsim call test
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
# VirtualBoxのディスクがIO負荷で(?)勝手にリードオンリーになった | |
VBoxManage setextradata "docker-host" "VBoxInternal/Devices/ahci/0/LUN0/Config/FlushInterval" 1000000 | |
# 実行ファイル | |
gcc -DHAVE_CONFIG_H -I. -I.. -I/knp-4.16/CRF++-0.58 -I/knp-4.16/crf -DUSE_CRF -DKNP_DICT="\"/usr/local/share/knp/dict"\" -DKNP_RULE="\"/usr/local/share/knp/rule"\" -DKNP_RC_DEFAULT="\"/usr/local/etc/knprc"\" -DCF_VERSION="\"CF1.1"\" -DCDB -I/knp-4.16/cdb -I/usr/local//include -I/knp-4.16/distsim -g3 -O0 -MT dsim.o -MD -MP -MF .deps/dsim.Tpo -c -o dsim.o dsim.c | |
/bin/sh ../libtool --tag=CC --mode=link g++ -g3 -O0 -o dsim dsim.o tools.o db.o -lm /knp-4.16/distsim/libdistsim.la -lstdc++ -L/knp-4.16/cdb -lcdb -lz | |
# 共有ライブラリ | |
## distsim | |
/bin/sh ../libtool --tag=CXX --mode=compile g++ -DHAVE_CONFIG_H -I. -I.. -I../cdb -g -O2 -MT distsim_for_knp.lo -MD -MP -MF .deps/distsim_for_knp.Tpo -c -fPIC -o distsim_for_knp.lo distsim_for_knp.cc | |
/bin/sh ../libtool --tag=CXX --mode=compile g++ -DHAVE_CONFIG_H -I. -I.. -I../cdb -g -O2 -MT distsim.lo -MD -MP -MF .deps/distsim.Tpo -c -fPIC -o distsim.lo distsim.cc | |
## cdb | |
make shared | |
cp libcdb.so /usr/local/lib | |
ldconfig | |
## system | |
;コンパイル | |
gcc -DHAVE_CONFIG_H -I. -I.. -DCDB -I/knp-4.16/cdb -I/usr/local//include -I/knp-4.16/distsim -g3 -O0 -MT tools.o -MD -MP -MF .deps/tools.Tpo -c -fPIC -o tools.o tools.c | |
gcc -DHAVE_CONFIG_H -I. -I.. -DCDB -I/knp-4.16/cdb -I/usr/local//include -I/knp-4.16/distsim -g3 -O0 -MT db.o -MD -MP -MF .deps/db.Tpo -c -fPIC -o db.o db.c | |
gcc -DHAVE_CONFIG_H -I. -I.. -DCDB -I/knp-4.16/cdb -I/usr/local//include -I/knp-4.16/distsim -g3 -O0 -MT dsim.o -MD -MP -MF .deps/dsim.Tpo -c -fPIC -o dsim.o dsim.c | |
;共有ライブラリの作成 | |
g++ -shared -o libdsim.so dsim.o db.o tools.o ../distsim/distsim.o ../distsim/distsim_for_knp.o -L../cdb/ -lstdc++ -lcdb -lz | |
cp libdsim.so /usr/local/lib | |
ldconfig | |
gcc -I./ dmain.c -o dmain -ldsim | |
; 共有ライブラリのバージョニングする場合 | |
g++ -shared -Wl,-soname=libdsim.so.1 -o libdsim.so.1.0.0 dsim.o db.o tools.o ../distsim/distsim.o ../distsim/distsim_for_knp.o -L../cdb/ -lstdc++ -lcdb -lz | |
cp libdsim.so.1.0.0 /usr/local/lib | |
ldconfig | |
cd /usr/local/lib | |
ln -s libdsim.so.1.0.0 libdsim.so | |
*/ | |
/* dsim.h */ | |
#ifndef DSIM_H | |
#define DSIM_H | |
void init_dsim(char *dic_name); | |
float similarity(char *rep1, char *rep2); | |
#endif | |
/* dsim.c */ | |
#include <stdio.h> | |
#include <sys/stat.h> | |
#include <const.h> | |
#include <path.h> | |
#include "distsim_for_knp.h" | |
#include "dsim.h" | |
char static_buffer1[5120]; | |
char static_buffer2[5120]; | |
DBM_FILE mrph2id_db; | |
char *rep2id2(char *rep, int rep_len, char *buffer) | |
{ | |
/* MRPH_MAX * 9(max8桁+"+"の分)以上あるので溢れない */ | |
buffer[0] = '\0'; | |
if (rep && rep_len > 0) { | |
char *token_start, *token, *value; | |
char *copied_rep = (char *)malloc_data(sizeof(char) * rep_len + 1, "rep2id"); | |
strncpy(copied_rep, rep, rep_len); | |
copied_rep[rep_len] = '\0'; | |
token_start = strtok(copied_rep, "+?"); | |
token = token_start; | |
while (token) { | |
value = db_get(mrph2id_db, token); | |
if (value) { | |
if (buffer[0]) /* 2つ目以降 */ | |
strncat(buffer, rep + (token - 1 - token_start), 1); | |
strcat(buffer, value); | |
free(value); | |
} | |
token = strtok(NULL, "+?"); | |
} | |
free(copied_rep); | |
/* fprintf(stderr, ";; %s -> %s\n", rep, buffer); */ | |
} | |
return buffer; | |
} | |
DBM_FILE *open_dict(char *dic_name) | |
{ | |
DBM_FILE db; | |
db = DB_open(dic_name, O_RDONLY, 0); | |
return db; | |
} | |
void init_dsim(char *dic_name) | |
{ | |
mrph2id_db = open_dict(dic_name); | |
init_distsim(); | |
} | |
float similarity(char *rep1, char *rep2) | |
{ | |
char *id1 = rep2id2(rep1, strlen(rep1), &(static_buffer1[0])); | |
char *id2 = rep2id2(rep2, strlen(rep2), &(static_buffer2[0])); | |
float score = calc_distsim(id1, id2); | |
printf("distsim_score(%s:%s, %s:%s)=%f\n",rep1,id1,rep2,id2,score); | |
return score; | |
} | |
/* main */ | |
#include <stdio.h> | |
#include <dsim.h> | |
int main() | |
{ | |
init_dsim("mrph2id.db"); | |
char rep1[100], rep2[100]; | |
scanf("%s %s", rep1, rep2); | |
float score = similarity(rep1, rep2); | |
printf("Score: %f\n",score); | |
} | |
/* Python */ | |
import ctypes | |
dsim = ctypes.cdll.LoadLibrary('libdsim.so.1') | |
#dsim = ctypes.cdll.LoadLibrary('libdsim.so') | |
dsim.init_dsim(ctypes.c_char_p('mrph2id.db')) | |
similarity = dsim.similarity | |
similarity.restype = ctypes.c_float | |
similarity.argtypes = [ctypes.c_char_p, ctypes.c_char_p] | |
similarity('林檎/りんご', '梨/なし') | |
similarity('林檎/りんご', '引力/いんりょく') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment