Created
February 22, 2011 02:57
-
-
Save littlefolk/838134 to your computer and use it in GitHub Desktop.
keywordlist_furigana2skkdic.rb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- keywordlist_furigana2skkdic.rb.org 2010-09-16 10:59:23.000000000 +0900 | |
+++ keywordlist_furigana2skkdic.rb 2010-09-16 10:59:23.000000000 +0900 | |
@@ -34,8 +34,6 @@ | |
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
# THE SOFTWARE. | |
-require 'nkf' | |
- | |
class String | |
# borrowed from http://openlab.jp/skk/skk/tools/filters/skkdictools.rb | |
def concatify | |
@@ -49,11 +47,17 @@ | |
return self | |
end | |
end | |
+ | |
+ # via. http://www.serendip.ws/archives/2312 | |
+ def to_hiragana | |
+ self.tr("ァ-ン", "ぁ-ん") | |
+ end | |
end | |
# http://d.hatena.ne.jp/hatenadiary/20060922/1158908401 | |
# http://d.hatena.ne.jp/images/keyword/keywordlist_furigana.csv | |
ARGF.each do |line| | |
+begin | |
line.chomp! | |
furigana, keyword = line.split(/\t/, 2) | |
if furigana.empty? | |
@@ -62,8 +66,11 @@ | |
if furigana == keyword | |
next # ひらがなだけのキーワードを無視 | |
end | |
- if furigana == NKF.nkf('-Ee -m0 --hiragana', keyword) | |
+ if furigana == keyword.to_hiragana | |
next # カタカナ語は無視 | |
end | |
+rescue | |
+else | |
puts "#{furigana} /#{keyword.gsub(/&/, '&').concatify}/" | |
end | |
+end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment