Skip to content

Instantly share code, notes, and snippets.

@kei-s
Created June 19, 2009 02:44
Show Gist options
  • Save kei-s/132372 to your computer and use it in GitHub Desktop.
Save kei-s/132372 to your computer and use it in GitHub Desktop.
saykana
#!/usr/bin/env ruby
# vim:fileencoding=utf-8
require 'MeCab'
require 'kakasi'
require 'kconv'
require 'uri'
# http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-list/33639
class String
def without_match(re)
re = Regexp.new('(' + re.source + ')')
split(re).map { |s|
if re =~ s
s
else
yield s
end
}.join
end
def remove_url
self.gsub(URI.regexp(['http','https']),'')
end
end
def kakasi(word)
return Kakasi::kakasi("-JK -HK",word.tosjis).toutf8
end
@chasen = MeCab::Tagger.new()
def pronounce(word)
return @chasen.parse(word).map{|l|l.chomp}.reject{|l|l=="EOS"}.map { |line|
word, data = line.split("\t")
#puts
#puts "word : #{word}"
# 品詞, 品詞細分類1, 品詞細分類2, 品詞細分類3, 活用形, 活用型, 原型, 読み, 発音
speech, category1, category2, category3, inflected_form, inflected_format, original, reading, pronounciation = data.split(",") unless data.nil?
#puts "data : #{data}"
#puts "read : #{pronounciation unless pronounciation.nil?}"
pronounciation.nil? ? kakasi(word) : pronounciation
}
end
def sayKana(str)
kana = str.split(" ").map{|word|
word.without_match(/\d[\d.-]*(?:年|月|日|時|分|秒|円|階|ヶ月|カロリー|級|行|曲|キロ|件|個
|人|才|時間|台|丁目|月|番|本|匹|%)?/) {|word|
pronounce(word)
}
}.join(" ").remove_url
puts kana
fork do
open('|SayKana -s 87 -f -', 'w') do |f|
f.puts kana
end
end
end
if __FILE__ == $0
s = ARGV[0]
puts s
sayKana(s)
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment