Skip to content

Instantly share code, notes, and snippets.

@naoa
naoa / groonga-token-counter.c
Last active August 29, 2015 14:08
gcc src/index_sample.c -o index_sample -Wall -O2 -lgroonga -I/usr/include/groonga
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <groonga.h>
#include <groonga/nfkc.h>
/*
Wikipedia ja 30万件 3.8G
real 1m12.745s
user 0m58.432s
@naoa
naoa / mrn.sql
Last active August 29, 2015 14:08
mysql> drop table terms;
Query OK, 0 rows affected (0.00 sec)
mysql> drop table memos;
Query OK, 0 rows affected (0.01 sec)
mysql> CREATE TABLE terms (
-> term VARCHAR(255) NOT NULL PRIMARY KEY
-> ) Engine=Mroonga COMMENT='default_tokenizer "TokenBigram"' DEFAULT CHARSET=utf8;
Query OK, 0 rows affected (0.09 sec)

オンラインインデックス構築

table_create Documents TABLE_HASH_KEY ShortText
[[0,0.0,0.0],true]
column_create Documents content COLUMN_SCALAR Text
[[0,0.0,0.0],true]
table_create Terms TABLE_PAT_KEY|KEY_NORMALIZE ShortText --default_tokenizer TokenBigram
[[0,0.0,0.0],true]
column_create Terms document_index COLUMN_INDEX|WITH_POSITION Documents content
<?php
$article = $argv[1];
$xml = new XMLReader();
if(!$xml->open($article)){
die('Failed to open file!');
}
$rc = 1;
<?php
$article = $argv[1];
$xml = new XMLReader();
if(!$xml->open($article)){
die('Failed to open file!');
}
$rc = 1;
2014-07-11T12:12:42.926168+00:00 app[run.7403]: Making all in ubuntu
2014-07-11T12:12:42.976663+00:00 app[run.7403]: make[4]: Nothing to be done for `all'.
2014-07-11T12:12:42.976647+00:00 app[run.7403]: make[4]: Entering directory `/app/groonga-4.0.3/packages/rpm/centos'
2014-07-11T12:12:43.008150+00:00 app[run.7403]: make[3]: Nothing to be done for `all'.
2014-07-11T12:12:42.976665+00:00 app[run.7403]: make[4]: Leaving directory `/app/groonga-4.0.3/packages/rpm/centos'
2014-07-11T12:12:42.978613+00:00 app[run.7403]: Making all in fedora
2014-07-11T12:12:42.993198+00:00 app[run.7403]: Making all in yum
2014-07-11T12:12:43.030222+00:00 app[run.7403]: Making all in patches
2014-07-11T12:12:43.009527+00:00 app[run.7403]: Making all in windows
<?php
$article = $argv[1];
if ($handle = opendir($article)) {
while (false !== ($file = readdir($handle))) {
echo "-------$file------\n";
echo "XML loading and importing...\n";
$xml = new XMLReader();
if(!$xml->open($article . "/" . $file)){
<?php
$article = $argv[1];
if ($handle = opendir($article)) {
while (false !== ($file = readdir($handle))) {
echo "-------$file------\n";
echo "XML loading and importing...\n";
$xml = new XMLReader();
if(!$xml->open($article . "/" . $file)){
<?php
$category_file = $argv[1];
$rc = 1;
$startTimeAll = microtime(true);
$fp = fopen($category_file, 'r');
while(!feof($fp)){
$line = fgets($fp);
<?php
$category_file = $argv[1];
$rc = 1;
$startTimeAll = microtime(true);
$fp = fopen($category_file, 'r');
while(!feof($fp)){
$line = fgets($fp);