Created
August 21, 2013 14:47
-
-
Save cocodrips/6295453 to your computer and use it in GitHub Desktop.
PerlでMeCab + TermExtractを使う。
TermExtractはここから。http://gensen.dl.itc.u-tokyo.ac.jp/termextract.html
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use strict; | |
use warnings; | |
use utf8; | |
use Text::MeCab; | |
use Text::TermExtract; | |
use TermExtract::MeCab; | |
use Data::Dumper; | |
_extract_keyword(); | |
sub _extract_keyword { | |
my $str = "ここに文章がくる"; | |
my $mecab_obj = _mecab($str); | |
_term($mecab_obj); | |
} | |
sub _mecab { | |
my ($str) = @_; | |
my $mecab = Text::MeCab->new(); | |
my $node = $mecab->parse($str); | |
my $text = ""; | |
while ($node = $node->next) { | |
$text .= $node->surface."\t"; | |
$text .= $node->feature; | |
$text .= "\n"; | |
} | |
$text .= "EOS\n"; | |
return { mecab_string => $text}; | |
} | |
sub _term { | |
my ($mecab_obj) = @_; | |
my $ext = Text::TermExtract->new(); | |
my @array_terms; | |
my $mecab_string = $mecab_obj->{'mecab_string'}; | |
my $term_extract = new TermExtract::MeCab; | |
my @noun_list = $term_extract->get_imp_word($mecab_string, 'var'); | |
warn Dumper @noun_list; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment