Skip to content

Instantly share code, notes, and snippets.

@nazt
Created February 12, 2010 14:42
Show Gist options
  • Save nazt/302615 to your computer and use it in GitHub Desktop.
Save nazt/302615 to your computer and use it in GitHub Desktop.
import org.nazt.lexto.*
class Tokenizer
{
private def lextoObj = new LongLexTo(new File('lexitron.txt'))
private def TypeList,IndexList=[]
def tokenizedText=[]
Tokenizer(String srcText)
{
lextoObj.wordInstance(srcText)
def indexPtr=0
TypeList = lextoObj.getTypeList()
IndexList= lextoObj.getIndexList()
IndexList.eachWithIndex{ val, idx ->
tokenizedText<<srcText[indexPtr..<val]
indexPtr=val
}
}
def getTokenizedText()
{
return this.tokenizedText
}
def getTokenizedText(String str)
{
this.tokenizedText.join("|")
}
def getTypeList() {this.TypeList}
def getIndexList() {this.IndexList}
}
def c = new Tokenizer("ทดสอบการตัดคำโดยใช้Lextoนะครับนะหิวข้าวจังเลยตอนนี้แต่ว่าคิดไม่ออกว่าจะกินอะไรดี")
println c.getTokenizedText()
println c.tokenizedText
println c.getTokenizedText("String")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment