Skip to content

Instantly share code, notes, and snippets.

@cocodrips
Created August 21, 2013 14:47
Show Gist options
  • Save cocodrips/6295453 to your computer and use it in GitHub Desktop.
Save cocodrips/6295453 to your computer and use it in GitHub Desktop.
PerlでMeCab + TermExtractを使う。 TermExtractはここから。http://gensen.dl.itc.u-tokyo.ac.jp/termextract.html
use strict;
use warnings;
use utf8;
use Text::MeCab;
use Text::TermExtract;
use TermExtract::MeCab;
use Data::Dumper;
_extract_keyword();
sub _extract_keyword {
my $str = "ここに文章がくる";
my $mecab_obj = _mecab($str);
_term($mecab_obj);
}
sub _mecab {
my ($str) = @_;
my $mecab = Text::MeCab->new();
my $node = $mecab->parse($str);
my $text = "";
while ($node = $node->next) {
$text .= $node->surface."\t";
$text .= $node->feature;
$text .= "\n";
}
$text .= "EOS\n";
return { mecab_string => $text};
}
sub _term {
my ($mecab_obj) = @_;
my $ext = Text::TermExtract->new();
my @array_terms;
my $mecab_string = $mecab_obj->{'mecab_string'};
my $term_extract = new TermExtract::MeCab;
my @noun_list = $term_extract->get_imp_word($mecab_string, 'var');
warn Dumper @noun_list;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment