Skip to content

Instantly share code, notes, and snippets.

@shunsukeaihara
Created January 23, 2013 08:30
Show Gist options
  • Save shunsukeaihara/4603152 to your computer and use it in GitHub Desktop.
Save shunsukeaihara/4603152 to your computer and use it in GitHub Desktop.
ruby wrapper for japanese dependency parser cabocha 0.5
#!/usr/bin/ruby -Kw
$KCODE="UTF-8"
require "CaboCha"
require "kconv"
class DepAnalyzeByCabocha
@cabo
def initialize(args=[])
@cabo = CaboCha::Parser.new(args)
end
def analyze(str)
if str.split(//).length > 500
return []
else
return analyzeSub(str)
end
end
def analyzeSub(str)
[email protected](str.toeuc)
phrases=Array.new
begin#解析誤りへの対策
size = tree.size()
rescue
return []
end
cid = 0
phrase=nil
(0..size-1).each{|i|
token = tree.token(i)
if (token.hasChunk())
if phrase != nil
phrases.push(phrase)
end
phrase=Phrase.new(token.chunk,phrases,cid)
cid = cid + 1
end
phrase.pushToken(token)
}
if phrase!=nil
phrases.push(phrase)
end
phrases.each{|x|
x.setDep
}
phrases
end
class Phrase
@chunk
@tokens
@head
@dependents
@sentence
@id
def initialize(chunk,phrases,id)
@chunk=chunk
@tokens=Array.new
@dependents=Array.new
@sentence=phrases
@id=id
end
attr_accessor :dependents,:head,:tokens,:chunk,:id,:sentence
def pushToken(token)
@tokens.push(token)
end
def setDep
@head=@sentence[@chunk.link()]
@head.setDependent(@id)
end
def getHead
[@chunk.rel,@head]
end
def setDependent(id)
@dependents.push(@sentence[id])
end
def ne?()
@tokens.each{|x|
if x.ne!="O"
return x.ne
end
}
nil
end
def num?()
@tokens.each{|x|
if x.pos.toutf8=~/名詞-数/
return true
end
}
nil
end
def getSurface
@tokens.map{|x|
x.surface()
}.join("")
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment