Skip to content

Instantly share code, notes, and snippets.

@hallettj
Created January 3, 2012 09:09
Show Gist options
  • Save hallettj/1554198 to your computer and use it in GitHub Desktop.
Save hallettj/1554198 to your computer and use it in GitHub Desktop.
Scala parser for WordNet's Prolog-format database
/*
* An incomplete parser intended to import rules from the WordNet[1]
* Prolog-formatted database. Rules are of the form:
*
* s(101942869,1,'abalone',n,1,0).
* sk(101942869,1,'abalone%1:05:00::').
*
* and so forth.
*
* [1]: http://wordnet.princeton.edu/ "WordNet"
*/
package us.sitr.wordnet
import Integer.parseInt
import scala.util.parsing.combinator.RegexParsers
sealed trait Relation
case class Sense(
val synset_id: Int,
val w_num: Int,
val word: String,
val ss_type: Char,
val sense_number: Int,
val tag_count: Int
) extends Relation
case class SenseKey(val synset_id: Int, w_num: Int, sense_key: String) extends Relation
trait PrologRelationParsers extends RegexParsers with PrimitiveParsers with ArgumentLists {
def program: Parser[List[Relation]] = rep1(relation <~ ".")
def relation = (
sense
| senseKey
// | glossRelation
// | syntaxRelation
// | hypernym
)
def sense: Parser[Sense] = "s" ~> arguments6(synset_id, w_num, word, ss_type, sense_number, tag_count) ^^ { args =>
(Sense.apply _) tupled args
}
def senseKey: Parser[SenseKey] = "sk" ~> arguments3(synset_id, w_num, sense_key) ^^ { args =>
(SenseKey.apply _) tupled args
}
def glossRelation = "g" ~> arguments2(synset_id, gloss)
def syntaxRelation = "syntax" ~> arguments3(synset_id, w_num, syntax)
def hypernym = "hyp" ~> arguments2(synset_id, synset_id)
def synset_id = integer
def w_num = integer
def word = string
def ss_type = char
def sense_number = integer
def tag_count = integer
def sense_key = string
def gloss = string
def syntax = string
}
trait PrimitiveParsers extends RegexParsers {
def integer: Parser[Int] = """[0-9]+""".r ^^ { parseInt(_, 10) }
def string: Parser[String] = "'"~> """[^']*""".r <~"'"
def char: Parser[Char] = """[a-zA-Z]""".r ^^ { _.head }
def hexadecimal: Parser[Int] = """[0-9A-Fa-f]+""".r ^^ { parseInt(_, 16) }
}
trait ArgumentLists extends RegexParsers {
def arguments2[A,B](a: Parser[A], b: Parser[B]): Parser[(A, B)] = {
"(" ~> a ~ "," ~ b <~ ")" ^^ {
case ap~_~bp => (ap, bp)
}
}
def arguments3[A,B,C](a: Parser[A], b: Parser[B], c: Parser[C]): Parser[(A, B, C)] = {
"(" ~> a ~ "," ~ b ~ "," ~ c <~ ")" ^^ {
case ap~_~bp~_~cp => (ap, bp, cp)
}
}
def arguments6[A,B,C,D,E,F](
a: Parser[A], b: Parser[B], c: Parser[C], d: Parser[D], e: Parser[E], f: Parser[F]
): Parser[(A, B, C, D, E, F)] = {
"(" ~> a ~ "," ~ b ~ "," ~ c ~ "," ~ d ~ "," ~ e ~ "," ~ f <~ ")" ^^ {
case ap~_~bp~_~cp~_~dp~_~ep~_~fp => (ap, bp, cp, dp, ep, fp)
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment