Created
July 29, 2012 14:50
-
-
Save fukata/3199348 to your computer and use it in GitHub Desktop.
extract noun use mecab.fluentd out_exec_filter.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
use strict; | |
use warnings; | |
use utf8; | |
use Encode; | |
use Text::MeCab; | |
use JSON::XS; | |
use Data::MessagePack; | |
my $mecab = Text::MeCab->new; | |
my $mp = Data::MessagePack->new; | |
while(my $json = <STDIN>){ | |
my $decode = eval { decode_json($json) }; | |
next if $@; | |
my $query = $decode->{query}; | |
$query =~ s/^\s*(.*?)\s*$/$1/; | |
next unless $query; | |
my $node = $mecab->parse( $query ); | |
while ($node) { | |
my $keyword = decode('utf8', $node->surface) || ''; | |
if ($query ne $keyword and $keyword) { | |
my @features = split ',', decode('utf8', $node->feature); | |
if ( $keyword ne '名詞' and grep('名詞', @features) ) { | |
print $mp->pack({keyword => $keyword}); | |
} | |
} | |
$node = $node->next; | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<source> | |
type tail | |
format /^q\:(?<query>.*), page\:(?<page>[0-9]+), slag\:(?<slag>[^ ]+), time\:(?<time_max>[0-9]*), sort\:(?<sort>[^ ]+), order\:(?<order>[^ ]+)$/ | |
path /var/log/hoge/foo.log | |
tag hoge.queries | |
pos_file /var/log/td-agent/foo.queries.pos | |
</source> | |
<match hoge.queries> | |
type exec_filter | |
command /hoge/foo/parse.pl | |
in_format json | |
out_format msgpack | |
tag hoge.queries.parsed | |
flush_interval 1s | |
</match> | |
<match hoge.queries.parsed> | |
type copy | |
<store> | |
type tdlog | |
apikey ${API KEY} | |
auto_create_table false | |
database hoge | |
table queries | |
flush_interval 300s | |
use_ssl true | |
buffer_type file | |
buffer_path /var/log/td-agent/buffer/hoge_queries_td | |
</store> | |
<store> | |
type mongo | |
database hoge | |
collection queries | |
host 127.0.0.1 | |
port 27017 | |
flush_interval 10s | |
buffer_type file | |
buffer_path /var/log/td-agent/buffer/hoge_queries_mongo | |
</store> | |
</match> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment