Created
January 3, 2012 09:09
-
-
Save hallettj/1554198 to your computer and use it in GitHub Desktop.
Scala parser for WordNet's Prolog-format database
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* An incomplete parser intended to import rules from the WordNet[1] | |
* Prolog-formatted database. Rules are of the form: | |
* | |
* s(101942869,1,'abalone',n,1,0). | |
* sk(101942869,1,'abalone%1:05:00::'). | |
* | |
* and so forth. | |
* | |
* [1]: http://wordnet.princeton.edu/ "WordNet" | |
*/ | |
package us.sitr.wordnet | |
import Integer.parseInt | |
import scala.util.parsing.combinator.RegexParsers | |
sealed trait Relation | |
case class Sense( | |
val synset_id: Int, | |
val w_num: Int, | |
val word: String, | |
val ss_type: Char, | |
val sense_number: Int, | |
val tag_count: Int | |
) extends Relation | |
case class SenseKey(val synset_id: Int, w_num: Int, sense_key: String) extends Relation | |
trait PrologRelationParsers extends RegexParsers with PrimitiveParsers with ArgumentLists { | |
def program: Parser[List[Relation]] = rep1(relation <~ ".") | |
def relation = ( | |
sense | |
| senseKey | |
// | glossRelation | |
// | syntaxRelation | |
// | hypernym | |
) | |
def sense: Parser[Sense] = "s" ~> arguments6(synset_id, w_num, word, ss_type, sense_number, tag_count) ^^ { args => | |
(Sense.apply _) tupled args | |
} | |
def senseKey: Parser[SenseKey] = "sk" ~> arguments3(synset_id, w_num, sense_key) ^^ { args => | |
(SenseKey.apply _) tupled args | |
} | |
def glossRelation = "g" ~> arguments2(synset_id, gloss) | |
def syntaxRelation = "syntax" ~> arguments3(synset_id, w_num, syntax) | |
def hypernym = "hyp" ~> arguments2(synset_id, synset_id) | |
def synset_id = integer | |
def w_num = integer | |
def word = string | |
def ss_type = char | |
def sense_number = integer | |
def tag_count = integer | |
def sense_key = string | |
def gloss = string | |
def syntax = string | |
} | |
trait PrimitiveParsers extends RegexParsers { | |
def integer: Parser[Int] = """[0-9]+""".r ^^ { parseInt(_, 10) } | |
def string: Parser[String] = "'"~> """[^']*""".r <~"'" | |
def char: Parser[Char] = """[a-zA-Z]""".r ^^ { _.head } | |
def hexadecimal: Parser[Int] = """[0-9A-Fa-f]+""".r ^^ { parseInt(_, 16) } | |
} | |
trait ArgumentLists extends RegexParsers { | |
def arguments2[A,B](a: Parser[A], b: Parser[B]): Parser[(A, B)] = { | |
"(" ~> a ~ "," ~ b <~ ")" ^^ { | |
case ap~_~bp => (ap, bp) | |
} | |
} | |
def arguments3[A,B,C](a: Parser[A], b: Parser[B], c: Parser[C]): Parser[(A, B, C)] = { | |
"(" ~> a ~ "," ~ b ~ "," ~ c <~ ")" ^^ { | |
case ap~_~bp~_~cp => (ap, bp, cp) | |
} | |
} | |
def arguments6[A,B,C,D,E,F]( | |
a: Parser[A], b: Parser[B], c: Parser[C], d: Parser[D], e: Parser[E], f: Parser[F] | |
): Parser[(A, B, C, D, E, F)] = { | |
"(" ~> a ~ "," ~ b ~ "," ~ c ~ "," ~ d ~ "," ~ e ~ "," ~ f <~ ")" ^^ { | |
case ap~_~bp~_~cp~_~dp~_~ep~_~fp => (ap, bp, cp, dp, ep, fp) | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment