-
-
Save iomonad/ee247a568e9c3b55eb047a887e2e642c to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package alwyzon.irc | |
/* | |
<message> ::= [':' <prefix> <SPACE> ] <command> <params> <crlf> | |
<prefix> ::= <servername> | <nick> [ '!' <user> ] [ '@' <host> ] | |
<command> ::= <letter> { <letter> } | <number> <number> <number> | |
<SPACE> ::= ' ' { ' ' } | |
<params> ::= <SPACE> [ ':' <trailing> | <middle> <params> ] | |
<middle> ::= <Any *non-empty* sequence of octets not including SPACE or NUL or CR or LF, the first of which may not be ':'> | |
<trailing> ::= <Any, possibly *empty*, sequence of octets not including NUL or CR or LF> | |
<crlf> ::= CR LF | |
<target> ::= <to> [ "," <target> ] | |
<to> ::= <channel> | <user> '@' <servername> | <nick> | <mask> | |
<channel> ::= ('#' | '&') <chstring> | |
<servername> ::= <host> | |
<host> ::= see RFC 952 [DNS:4] for details on allowed hostnames | |
<nick> ::= <letter> { <letter> | <number> | <special> | '|' } | |
<mask> ::= ('#' | '$') <chstring> | |
<chstring> ::= <any 8bit code except SPACE, BELL, NUL, CR, LF and comma (',')> | |
<user> ::= <nonwhite> { <nonwhite> } | |
<letter> ::= 'a' ... 'z' | 'A' ... 'Z' | |
<number> ::= '0' ... '9' | |
<special> ::= '-' | '_' | '[' | ']' | '\' | '`' | '^' | '{' | '}' | |
<nonwhite> ::= <any 8bit code except SPACE (0x20), NUL (0x0), CR (0xd), and LF (0xa)> | |
NOTE: In addition to RFC1459 or RFC2813 the '|' is accepted by the parser in nicks since it seems to be in common use. | |
*/ | |
import alwyzon.irc.Tokens._ | |
import scala.util.parsing.combinator.RegexParsers | |
/** | |
* Regex based parser used by [[alwyzon.irc.Client]] to parse incoming IRC messages. | |
*/ | |
object Parser extends RegexParsers { | |
lazy val message: Parser[IrcMessage] = | |
opt(":" ~> prefix <~ space) ~ command ~ params ^^ { | |
case (prefix ~ command) ~ params => IrcMessage(prefix, command, params) | |
} | |
lazy val prefix: Parser[Prefix] = | |
(host | nick) ~ opt('!' ~> user) ~ opt('@' ~> host) ^^ { | |
case t ~ u ~ s => Prefix(t, u, s) | |
} | |
lazy val command: Parser[Command] = | |
("""[0-9]{3}""".r | word) ^^ Command | |
lazy val space = rep(' ') | |
lazy val params = | |
space ~> opt(repsep(middle, ' ') ~ (space ~> opt(':' ~> trailing))) ^^ { | |
case Some(result) => | |
result match { | |
case ps ~ None => ps | |
case ps ~ Some(tr) => ps :+ tr | |
} | |
case None => | |
List() | |
} | |
lazy val middle = not(':') ~> """[^\s\r\n]+""".r | |
lazy val trailing = """[^\r\n]*""".r | |
lazy val crlf = """\r\n""".r | |
lazy val channel = """[#!&\+].+""".r | |
// have to specifically specify what a host is not because the parser is NOT | |
// backtracking. Thus will comsume up to the character that is an invalid | |
// host char (like _) and then fail. | |
lazy val host = """[a-zA-Z0-9.:\-^_\-\[\]\\/`]+""".r | |
lazy val nick = """[\p{L}0-9\-_\[\]\\`^\{\}\|]+""".r | |
lazy val user = """[^(\s|@)]+""".r | |
lazy val mask: Parser[UserMask] = """[#|$].+""".r ^^ UserMask | |
lazy val letter = """[a-zA-Z]""".r | |
lazy val startsWithColon = """:.+""".r | |
lazy val word = """[a-zA-Z]*""".r | |
lazy val number = """[0-9]""".r | |
lazy val special = """[-_\[\]\\`^\{\}]""".r | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment