Skip to content

Instantly share code, notes, and snippets.

@Aatch
Created December 22, 2013 23:58
Show Gist options
  • Save Aatch/8089884 to your computer and use it in GitHub Desktop.
Save Aatch/8089884 to your computer and use it in GitHub Desktop.
Rust-based Parser
use std::hashmap::HashMap;
#[deriving(Eq,Clone,IterBytes)]
pub struct Ident(u32);
#[deriving(Clone)]
pub struct Pos {
col: u16,
row: u16
}
#[deriving(Clone)]
pub struct Span {
start: Pos,
end: Pos
}
pub struct Item {
name: Ident,
span: Span,
info: ItemInfo
}
pub enum ItemInfo {
Xid,
XidUnion(~[Ident]),
Typedef(Type),
Struct(~[Field]),
Request(RequestInfo),
Enum(~[(Ident, Option<~Expr>)]),
SetOf(Option<Ident>, ~[Ident]),
}
pub struct RequestInfo {
opcode: uint,
fields: ~[Field],
errors: ~[Ident],
reply: Option<~Reply>
}
pub struct Reply {
name: Option<Ident>,
fields: ~[Field]
}
pub enum Field {
Padding(Expr),
Data(DataField),
Switch(Ident, Expr, ~[(Ident,Type)])
}
pub struct DataField {
name: Ident,
ty: Type,
info: HashMap<Ident, Expr>
}
#[deriving(Clone)]
pub enum Type {
Void,
Bool,
Uint8,
Uint16,
Uint32,
Int8,
Int16,
Int32,
Type(Ident),
List(~Type),
Tuple(~[Type])
}
impl Type {
pub fn is_list(&self) -> bool {
match self {
&List(_) => true,
_ => false
}
}
}
#[deriving(Clone)]
pub enum Expr {
IdentExpr(Ident),
NumExpr(uint),
BinopExpr(Op, ~Expr, ~Expr),
PadExpr(~Expr),
ParenExpr(~Expr),
NegExpr(~Expr),
BoolExpr(bool)
}
#[deriving(Clone, Ord)]
pub enum Op {
Div,
Mul,
Add,
Sub,
}
impl Op {
pub fn prec(self) -> uint {
match self {
Sub => 10,
Add => 10,
Mul => 20,
Div => 20,
}
}
}
impl Span {
pub fn from_pos(start: Pos, end: Pos) -> Span {
Span {
start: start,
end: end
}
}
}
xid Window;
xid Pixmap;
xid Cursor;
xid Font;
xid GContext;
xid Colormap;
xid Atom;
xidunion Drawable {
Window;
Pixmap;
}
xidunion Fontable {
Font;
GContext;
}
struct Char2B {
field byte1 : u8;
field byte2 : u8;
}
typedef VisualID : u32;
typedef Timestamp : u32;
typedef Keysym : u32;
typedef Keycode : u8;
typedef Button : u8;
typedef String8 : list<u8>;
struct Point {
field x : i16;
field y : i16;
}
struct Rectangle {
field x: i16;
field y: i16;
field width : u16;
field height: u16;
}
struct Arc {
field x : i16;
field y : i16;
field width : u16;
field height : u16;
field angle1 : i16;
field angle2 : i16;
}
struct Format {
field depth : u8;
field bits-per-pixel : u8;
field scanline-pad : u8;
pad = 5;
}
enum VisualClass {
StaticGray;
GrayScale;
StaticColor;
PseudoColor;
TrueColor;
DirectColor;
}
struct VisualType {
field visual-id : VisualID;
field class : VisualClass;
field bits-per-rgb-value : u8;
field colormap-entries : u16;
field red-mask : u32;
field green-mask : u32;
field blue-mask : u32;
pad = 4;
}
struct Depth {
field depth : u8;
pad = 1;
field visuals-len : u16;
pad = 4;
field visuals : list<VisualType> {
length = visuals-len;
}
}
setof Event {
KeyPress;
KeyRelease;
ButtonPress;
ButtonRelease;
EnterWindow;
LeaveWindow;
PointerMotion;
PointerMotionHint;
Button1Motion;
Button2Motion;
Button3Motion;
Button4Motion;
Button5Motion;
ButtonMotion;
KeymapState;
Exposure;
VisibilityChange;
StructureNotify;
ResizeRedirect;
SubstructureNotify;
SubstructureRedirect;
FocusChange;
PropertyChange;
ColorMapChange;
OwnerGrabButton;
}
setof PointerEvent : Event {
ButtonPress;
ButtonRelease;
EnterWindow;
LeaveWindow;
PointerMotion;
PointerMotionHint;
Button1Motion;
Button2Motion;
Button3Motion;
Button4Motion;
Button5Motion;
ButtonMotion;
KeymapState;
}
setof DeviceEvent : Event {
KeyPress;
KeyRelease;
ButtonPress;
ButtonRelease;
PointerMotion;
Button1Motion;
Button2Motion;
Button3Motion;
Button4Motion;
Button5Motion;
ButtonMotion;
}
setof KeyButtonMask {
Shift;
Lock;
Control;
Mod1;
Mod2;
Mod3;
Mod4;
Mod5;
Button1;
Button2;
Button3;
Button4;
Button5;
}
setof KeyMask : KeyButtonMask {
Shift;
Lock;
Control;
Mod1;
Mod2;
Mod3;
Mod4;
Mod5;
}
enum BackingStore {
NotUseful;
WhenMapped;
Always;
}
enum WindowClass {
CopyFromParent;
InputOutput;
InputOnly;
}
enum BitGravity {
Forget;
NorthWest;
North;
NorthEast;
West;
Center;
East;
SouthWest;
South;
SouthEast;
Static;
}
enum WinGravity {
Unmap;
NorthWest;
North;
NorthEast;
West;
Center;
East;
SouthWest;
South;
SouthEast;
Static;
}
enum MapState {
Unmapped;
Unviewable;
Viewable;
}
enum WindowAttr {
BackgroundPixmap;
BackgroundPixel;
BorderPixmap;
BorderPixel;
BitGravity;
WinGravity;
BackingStore;
BackingPlanes;
BackingPixel;
SaveUnder;
EventMask;
DoNotPropagateMask;
OverrideRedirect;
Colormap;
Cursor;
}
struct Screen {
field root : Window;
field default-colormap : Colormap;
field white-pixel : u32;
field black-pixel : u32;
field current-input-masks : Event;
field width-in-pixels : u16;
field height-in-pixels : u16;
field width-in-millimeters : u16;
field height-in-millimeters : u16;
field min-installed-maps : u16;
field max-installed-maps : u16;
field root-visual : VisualID;
field backing-stores : BackingStore;
field save-unders : bool;
field root-depth : u8;
field allowed-depths-len : u8;
field allowed-depths : list<Depth> {
length = allowed-depths-len;
}
}
struct Setup {
field status : u8;
pad = 1;
field protocol-major-version : u16;
field protocol-minor-version : u16;
field addition-data-length : u16;
field release-number : u32;
field resource-id-base : u32;
field resource-id-mask : u32;
field motion-buffer-size : u32;
field vendor-length : u16;
field maximum-request-length : u16;
field num-screens : u8;
field num-formats : u8;
field image-byte-order : ImageByteOrder;
field bitmap-format-bit-order : BitmapFormatBitOrder;
field bitmap-format-scanline-unit : u8;
field bitmap-format-scanline-pad : u8;
field min-keycode : Keycode;
field max-keycode : Keycode;
pad = 4;
field vendor : String8 {
length = vendor-length;
}
field pixmap-formats : list<Format> {
length = num-formats;
}
field roots : list<Screen> {
length = num-screens;
}
}
// Requests
request CreateWindow {
opcode = 1;
field depth : u8;
field wid : Window;
field parent : Window;
field x : i16;
field y : i16;
field width : u16;
field height : u16;
field border-width : u16;
field class : WindowClass { size=2; }
field visual : VisualID;
field value-mask : WindowAttr;
switch value-list = value-mask {
BackgroundPixmap : Pixmap;
BackgroundPixel : u32;
BorderPixmap : Pixmap;
BorderPixel : u32;
BitGravity : BitGravity;
WinGravity : WinGravity;
BackingStore : BackingStore;
BackingPlanes : u32;
BackingPixel : u32;
SaveUnder : bool;
EventMask : Event;
DoNotPropagateMask : DeviceEvent;
OverrideRedirect : bool;
Colormap : Colormap;
Cursor : Cursor;
}
}
request ChangeWindowAttributes {
opcode = 2;
field window : Window;
field value-mask : WindowAttr;
switch value-list = value-mask {
BackgroundPixmap : Pixmap;
BackgroundPixel : u32;
BorderPixmap : Pixmap;
BorderPixel : u32;
BitGravity : BitGravity;
WinGravity : WinGravity;
BackingStore : BackingStore;
BackingPlanes : u32;
BackingPixel : u32;
SaveUnder : bool;
EventMask : Event;
DoNotPropagateMask : DeviceEvent;
OverrideRedirect : bool;
Colormap : Colormap;
Cursor : Cursor;
}
}
request GetWindowAttributes {
opcode = 3;
field window : Window;
reply {
field backing-store : BackingStore;
field visual : VisualID;
field class : WindowClass;
field bit-gravity : BitGravity;
field win-gravity : WinGravity;
field backing-planes : u32;
field backing-pixel : u32;
field save-under : bool;
field map-is-installed : bool;
field map-state : MapState;
field override-redirect : bool;
field colormap : Colormap;
field all-event-masks : Event;
field your-event-masks : Event;
field do-not-propagate-mask : DeviceEvent {
size = 2;
}
pad = 2;
}
}
request DestroyWindow {
opcode = 4;
field window : Window;
}
request DestroySubwindows {
opcode = 5;
field window : Window;
}
enum SetMode {
Insert; Delete;
}
request ChangeSaveSet {
opcode = 6;
field mode : SetMode;
field window : Window;
}
request ReparentWindow {
opcode = 7;
field window : Window;
field parent : Window;
field x : i16;
field y : i16;
}
request MapWindow {
opcode = 8;
field window : Window;
}
request MapSubwindows {
opcode = 9;
field window : Window;
}
request UnmapWindow {
opcode = 10;
field window : Window;
}
request UnmapSubwindows {
opcode = 11;
field window : Window;
}
enum StackMode {
Above;
Below;
TopIf;
BottomIf;
Opposite;
}
enum ConfigWindow {
X;
Y;
Width;
Height;
BorderWidth;
Window;
StackMode;
}
request ConfigureWindow {
opcode = 12;
field window : Window;
field value-mask : ConfigWindow {
size = 2;
}
pad = 2;
switch value-list = value-mask {
X : i16;
Y : i16;
Width : u16;
Height : u16;
BorderWidth : u16;
Window : Window;
StackMode : StackMode;
}
}
enum CirculateDirection {
RaiseLowest;
LowerHighest;
}
request CirculateWindow {
opcode = 13;
field direction : CirculateDirection;
field window : Window;
}
request GetGeometry {
opcode = 14;
field drawable : Drawable;
reply {
field depth : u8;
field root : Window;
field x : i16;
field y : i16;
field width : u16;
field height : u16;
field border-width : u16;
}
}
request QueryTree {
opcode = 15;
field window : Window;
reply {
field root : Window;
field parent : Window;
field num-windows : u16;
pad = 14;
field children : list<Window> {
length = num-windows;
}
}
}
request InternAtom {
opcode = 16;
field only-if-exists : bool;
field name-length : u16;
pad = 2;
field name : String8 { length = name-length; }
reply {
field atom : Atom;
}
}
request GetAtomName {
opcode = 17;
field atom : Atom;
reply {
field name-length : u16;
field name : String8 { length = name-length; }
}
}
enum ChangeMode {
Replace; Prepend; Append;
}
request ChangeProperty {
opcode = 18;
field mode : ChangeMode;
field window : Window;
field property : Atom;
field type : Atom;
field format : u8;
pad = 3;
// Needs fixing, somehow
field data-length : u32;
field data : list<u8> {
length = data-length;
}
}
request DeleteProperty {
opcode = 19;
field window : Window;
field property : Atom;
}
request GetProperty {
opcode = 20;
field delete : bool;
field window : Window;
field property : Atom;
field type : Atom;
field long-offset : u32;
field long-length : u32;
reply {
field type : Atom;
field bytes-after : u32;
field value-length : u32;
pad = 12;
field prop-value : list<u8> {
length = value-length;
}
}
}
request ListProperties {
opcode = 21;
field window : Window;
reply {
field num-atoms : u16;
pad = 22;
field atoms : list<Atom> {
length = num-atoms;
}
}
}
request SetSelectionOwner {
opcode = 22;
field owner : Window;
field selection : Atom;
field time : Timestamp;
}
request GetSelectionOwner {
opcode = 23;
field selection : Atom;
reply {
field owner : Window;
}
}
request ConvertSelection {
opcode = 24;
field requestor : Window;
field selection : Atom;
field target : Atom;
field property : Atom;
field time : Timestamp;
}
request SendEvent {
opcode = 25;
field propagate : bool;
field destination : Window;
field event-mask : Event;
field event : EventStructure;
}
enum PointerMode { Synchronous; Asynchronous; }
enum KeyboardMode { Synchronous; Asynchronous; }
enum GrabStatus {
Success;
AlreadyGrabbed;
InvalidTime;
NotViewable;
Frozen;
}
request GrabPointer {
opcode = 26;
field owner-events : bool;
field grab-window : Window;
field event-mask : PointerEvent {
size = 2;
}
field pointer-mode : PointerMode;
field keyboard-mode : KeyboardMode;
field confine-to : Window;
field cursor : Cursor;
field time : Timestamp;
reply {
field status : GrabStatus;
}
}
use std::vec;
use std::util::replace;
use ast::Ident;
static INITIAL_SIZE : uint = 64;
pub struct IdentInterner {
priv idents: ~[Option<IdentInfo>],
priv size: uint
}
pub struct IdentInfo {
hash: u32,
name: ~str,
}
impl IdentInterner {
pub fn new() -> IdentInterner {
IdentInterner {
idents: vec::from_fn(INITIAL_SIZE, |_| None),
size: 0
}
}
pub fn intern(&mut self, ident: ~str) -> Ident {
let hash = murmur_hash(ident);
if !self.has_hash(hash) {
self.insert_ident(hash, ident);
}
Ident(hash)
}
pub fn get_str<'r>(&'r self, ident: Ident) -> &'r str {
let Ident(hash) = ident;
let mut idx = (hash as uint) % self.idents.len();
let start = idx;
loop {
match &self.idents[idx] {
&None => fail!("Ident isn't interned!?"),
&Some(IdentInfo { hash: ident_hash, name: ref name}) if ident_hash == hash => {
return name.as_slice();
}
_ => idx = (idx + 1) % self.idents.len()
}
if start == idx {
fail!("Ident isn't interned!?")
}
}
}
fn has_hash(&self, hash: u32) -> bool {
let mut idx = (hash as uint) % self.idents.len();
let start = idx;
loop {
match &self.idents[idx] {
&None => return false,
&Some(IdentInfo { hash: ident_hash, ..}) if ident_hash == hash => return true,
_ => idx = (idx + 1) % self.idents.len()
}
if start == idx { return false; }
}
}
fn insert_ident(&mut self, hash: u32, ident: ~str) {
self.expand();
self.insert_ident_info(IdentInfo {
hash: hash,
name: ident,
});
}
fn insert_ident_info(&mut self, info: IdentInfo) {
if (self.size >= self.idents.len()) {
fail!("HashMap is full, trying to insert {:?} (size: {:?}, len: {:?}",
info, self.size, self.idents.len());
}
let mut idx = (info.hash as uint) % self.idents.len();
loop {
match self.idents[idx] {
None => {}
Some(_) => {
idx = (idx + 1) % self.idents.len();
continue;
}
}
self.idents[idx] = Some(info);
break;
}
self.size += 1;
}
fn expand(&mut self) {
if ((self.idents.len() / 4) * 3) <= self.size {
let new_len = self.idents.len()*2;
let old_items = replace(&mut self.idents,
vec::from_fn(new_len, |_| None));
self.size = 0;
for i in old_items.move_iter() {
match i {
Some(ident) => self.insert_ident_info(ident),
None => {}
}
}
}
}
}
#[inline(always)]
fn rotl32(x: u32, r: i8) -> u32 {
(x << r) | (x >> (32 - r))
}
#[inline]
fn murmur_hash(s:&str) -> u32 {
static C1 : u32 = 0xcc9e2d51;
static C2 : u32 = 0x1b873593;
let len = s.len();
let ptr = s.as_ptr();
let mut hash = 0x34567890;
unsafe {
let num_blocks = len / 4;
let blocks = ptr.offset((num_blocks*4) as int) as *u32;
let mut i = -(num_blocks as int);
while (i < 0) {
let mut k1 = *(blocks.offset(i));
k1 *= C1;
k1 = rotl32(k1, 15);
k1 *= C2;
hash ^= k1;
hash = rotl32(hash, 13);
hash = hash*5+0xe6546b64;
i += 1;
}
let tail = ptr.offset((num_blocks*4) as int);
let mut k1 : u32 = 0;
let mut leftover = len & 3;
if leftover == 3 {
k1 ^= (*tail.offset(2) as u32) << 16;
leftover -= 1;
}
if leftover == 2 {
k1 ^= (*tail.offset(1) as u32) << 8;
leftover -= 1;
}
if leftover == 1 {
k1 ^= *tail as u32;
k1 *= C1;
k1 = rotl32(k1, 15);
k1 *= C2;
hash ^= k1;
}
hash ^= (len as u32);
hash ^= (hash >> 16);
hash *= 0x85ebca6b;
hash ^= hash >> 13;
hash *= 0xc2b2ae35;
hash ^= hash >> 16;
}
hash
}
#[cfg(test)]
mod test {
use super::IdentInterner;
use extra::test::BenchHarness;
use std::rand::{Rand,Rng,weak_rng};
#[test]
fn test_intern_match1() {
let mut interner = IdentInterner::new();
for i in range(1,1024) {
let s = format!("s{:02}", i);
let ident = interner.intern(s.to_owned());
let returned = interner.get_str(ident);
if returned != s.as_slice() {
fail!("Ident {:?} not interned properly: {:s} != {:s}", ident, returned, s);
}
}
}
#[test]
fn test_intern_match2() {
let mut interner = IdentInterner::new();
let s = "TestStr";
let the_ident = interner.intern(s.to_owned());
assert_eq!(the_ident, interner.intern(s.to_owned()));
assert_eq!(the_ident, interner.intern(s.to_owned()));
assert_eq!(the_ident, interner.intern(s.to_owned()));
assert_eq!(the_ident, interner.intern(s.to_owned()));
}
#[bench]
fn bench_intern(bh: &mut BenchHarness) {
let mut rng = weak_rng();
let mut interner = IdentInterner::new();
let mut n = 0;
bh.iter(|| {
let s = format!("s{:08x}", n);
let ident = interner.intern(s.to_owned());
n += 1;
})
}
#[bench]
fn bench_intern_same_str(bh: &mut BenchHarness) {
let mut interner = IdentInterner::new();
let s = "TestStr";
interner.intern(s.to_owned());
bh.iter(|| {
interner.intern(s.to_owned());
})
}
#[bench]
fn bench_intern_same_strs(bh: &mut BenchHarness) {
let mut interner = IdentInterner::new();
let s1 = "TestStr_1";
for i in range(1,64) {
let s = format!("s{:02}", i);
let mut interner = IdentInterner::new();
let ident = interner.intern(s.to_owned());
let returned = interner.get_str(ident);
if returned != s.as_slice() {
fail!("Ident {:?} not interned properly: {:s} != {:s}", ident, returned, s);
}
}
interner.intern(s1.to_owned());
bh.iter(|| {
interner.intern(s1.to_owned());
});
}
#[bench]
fn bench_get_str(bh: &mut BenchHarness) {
let mut interner = IdentInterner::new();
let s = "TestStr";
let the_ident = interner.intern(s.to_owned());
bh.iter(|| {
let _a = interner.get_str(the_ident);
})
}
}
#[feature(globs)];
#[feature(macro_rules)];
#[cfg(test)]
extern mod extra;
use std::path::Path;
use std::io;
use std::io::{File,Buffer};
use std::io::buffered::BufferedStream;
use std::hashmap::HashMap;
mod token;
mod ast;
mod parse;
mod intern;
fn open_file(path: &Path) -> BufferedStream<File> {
let file = File::open(path).expect("Error opening file");
BufferedStream::new(file)
}
fn main() {
let args = std::os::args();
let filename = args[1];
let mut file = open_file(&Path::new(filename));
let mut items = ~[];
let mut interner = intern::IdentInterner::new();
{
let tokenizer = token::Tokenizer::new(&mut file, &mut interner);
let mut parser = parse::Parser::new(tokenizer);
loop {
match parser.parse_item() {
Some(item) => {
items.push(item);
}
None => break
}
}
}
println!("Parsed {} Items\n", items.len());
}
use std;
use std::io::{Stream,Buffer};
use std::hashmap::HashMap;
use token::*;
use intern;
use ast;
macro_rules! prefill_idents {
($($name:ident: $token:expr),*) => (
struct Idents {
$($name : ast::Ident),*
}
fn prefill_interner(interner: &mut intern::IdentInterner) -> Idents {
let mut idents : Idents = unsafe { std::unstable::intrinsics::uninit() };
$(idents.$name = interner.intern($token);)*
idents
}
)
}
prefill_idents! {
Void: ~"void",
Bool: ~"bool",
Uint8: ~"u8",
Uint16: ~"u16",
Uint32: ~"u32",
Int8: ~"i8",
Int16: ~"i16",
Int32: ~"i32"
}
pub struct Parser<'t, 's, S> {
tokenizer: Tokenizer<'t, 's, S>,
lookahead: ~[Token],
priv idents: Idents
}
impl<'t, 's, S:Stream+Buffer> Parser<'t, 's, S> {
pub fn new<'r, 's>(tokenizer: Tokenizer<'r, 's, S>) -> Parser<'r, 's, S> {
let idents = prefill_interner(tokenizer.interner);
Parser {
tokenizer: tokenizer,
lookahead: ~[],
idents: idents
}
}
pub fn parse_item(&mut self) -> Option<ast::Item> {
let token = self.read_token();
let start = self.tokenizer.get_pos();
match token {
Eof => None,
Xid => {
let ident = self.expect_ident();
self.expect(Semicolon);
let end = self.tokenizer.get_pos();
let span = ast::Span::from_pos(start, end);
Some(self.mk_item(ident, ast::Xid, span))
}
XidUnion => {
let ident = self.expect_ident();
self.expect(LBrace);
let mut members = ~[];
loop {
match self.read_token() {
RBrace => break,
Identifier(s) => {
members.push(s);
self.expect(Semicolon);
}
t => self.fail(format!("Unexpected token `{}` expected '\\{' or identifier", t.to_str()))
}
}
let end = self.tokenizer.get_pos();
let span = ast::Span::from_pos(start, end);
Some(self.mk_item(ident, ast::XidUnion(members), span))
}
Struct => {
let ident = self.expect_ident();
self.expect(LBrace);
let mut fields = ~[];
loop {
if self.eat_token(RBrace) {
break;
} else {
fields.push(self.parse_field());
}
}
let end = self.tokenizer.get_pos();
let span = ast::Span::from_pos(start, end);
Some(self.mk_item(ident, ast::Struct(fields), span))
}
Request => {
let ident = self.expect_ident();
self.expect(LBrace);
let mut opcode = None;
let mut reply = None;
let mut fields = ~[];
loop {
if self.eat_token(RBrace) {
break;
} else if self.eat_token(Opcode) {
self.expect(Equal);
let code = self.expect_number();
self.expect(Semicolon);
opcode = Some(code);
} else if reply.is_none() && self.eat_token(Reply) {
let mut name = None;
match self.read_token() {
Identifier(nm) => {
name = Some(nm);
self.expect(LBrace);
}
LBrace => { }
t => self.fail(format!("Unexpected token `{}` expected '\\{' or identifier", t.to_str()))
}
let mut fields = ~[];
loop {
if self.eat_token(RBrace) {
break;
} else {
fields.push(self.parse_field());
}
}
reply = Some(~ast::Reply { name: name, fields: fields });
} else {
fields.push(self.parse_field());
}
}
let end = self.tokenizer.get_pos();
let span = ast::Span::from_pos(start, end);
Some(self.mk_item(ident, ast::Request(ast::RequestInfo {
opcode: opcode.expect("Requests should have an opcode"),
fields: fields,
errors: ~[],
reply: reply
}), span))
}
Typedef => {
let ident = self.expect_ident();
self.expect(Colon);
let ty = self.parse_type();
self.expect(Semicolon);
let end = self.tokenizer.get_pos();
let span = ast::Span::from_pos(start, end);
Some(self.mk_item(ident, ast::Typedef(ty), span))
}
Enum => {
let ident = self.expect_ident();
self.expect(LBrace);
let mut members = ~[];
loop {
match self.read_token() {
RBrace => break,
Identifier(s) => {
let mut expr = None;
if self.eat_token(Equal) {
expr = Some(~self.parse_expr());
}
self.expect(Semicolon);
members.push((s, expr));
}
t => self.unexpected(t,[RBrace])
}
}
let end = self.tokenizer.get_pos();
let span = ast::Span::from_pos(start, end);
Some(self.mk_item(ident, ast::Enum(members), span))
}
SetOf => {
let ident = self.expect_ident();
let mut parent = None;
if self.eat_token(Colon) {
parent = Some(self.expect_ident());
}
self.expect(LBrace);
let mut members = ~[];
loop {
match self.read_token() {
RBrace => break,
Identifier(s) => {
self.expect(Semicolon);
members.push(s);
}
t => self.unexpected(t, [RBrace])
}
}
let end = self.tokenizer.get_pos();
let span = ast::Span::from_pos(start, end);
Some(self.mk_item(ident, ast::SetOf(parent, members), span))
}
_ => self.unexpected(token, [Xid, XidUnion, Struct, Request, Typedef])
}
}
fn parse_type(&mut self) -> ast::Type {
match self.read_token() {
Identifier(s) => {
if s == self.idents.Void {
ast::Void
} else if s == self.idents.Bool {
ast::Bool
} else if s == self.idents.Uint8 {
ast::Uint8
} else if s == self.idents.Uint16 {
ast::Uint16
} else if s == self.idents.Uint32 {
ast::Uint32
} else if s == self.idents.Int8 {
ast::Int8
} else if s == self.idents.Int16 {
ast::Int16
} else if s == self.idents.Int32 {
ast::Int32
} else {
ast::Type(s)
}
}
List => {
self.expect(LAngle);
let ty = ~self.parse_type();
self.expect(RAngle);
ast::List(ty)
}
LParen => {
let mut tys = ~[];
tys.push(self.parse_type());
loop {
match self.read_token() {
RParen => break,
Comma => {
tys.push(self.parse_type());
}
t => self.fail(format!("Unexpected token {}, expected type", t.to_str()))
}
}
if tys.len() == 0 {
tys[0]
} else {
ast::Tuple(tys)
}
}
s => self.fail(format!("Unexpected start of type {}", s.to_str()))
}
}
fn parse_field(&mut self) -> ast::Field {
match self.read_token() {
Field => {
let ident = self.expect_ident();
self.expect(Colon);
let ty = self.parse_type();
let mut info = HashMap::new();
match self.read_token() {
Semicolon => {}
LBrace => {
loop {
match self.read_token() {
RBrace => break,
Identifier(key) => {
self.expect(Equal);
let val = self.parse_expr();
self.expect(Semicolon);
info.insert(key,val);
}
t => self.fail(format!("Unexpected token `{}`, expected '\\}', number or identifier",
t.to_str()))
}
}
}
t => self.unexpected(t, [Semicolon, LBrace])
}
ast::Data(ast::DataField {
name: ident,
ty: ty,
info: info
})
}
Pad => {
self.expect(Equal);
let f = ast::Padding(self.parse_expr());
self.expect(Semicolon);
f
}
Switch => {
let ident = self.expect_ident();
self.expect(Equal);
let expr = self.parse_expr();
self.expect(LBrace);
let mut arms = ~[];
loop {
match self.read_token() {
RBrace => break,
Identifier(s) => {
self.expect(Colon);
let ty = self.parse_type();
self.expect(Semicolon);
arms.push((s, ty));
}
t => self.fail(format!("Unexpected token `{}`, expected '\\}' or identifier", t.to_str()))
}
}
ast::Switch(ident, expr, arms)
}
t => self.unexpected(t, [Field,Switch,Pad])
}
}
fn parse_expr(&mut self) -> ast::Expr {
let expr = self.parse_primary_expr();
self.parse_op_expr(expr, 0)
}
fn parse_primary_expr(&mut self) -> ast::Expr {
match self.read_token() {
Identifier(ident) => ast::IdentExpr(ident),
Number(num) => ast::NumExpr(num),
Pad => {
self.expect(LParen);
let expr = self.parse_expr();
self.expect(RParen);
expr
}
LParen => {
let expr = ~self.parse_expr();
self.expect(RParen);
ast::ParenExpr(expr)
}
Minus => {
ast::NegExpr(~self.parse_primary_expr())
}
True => ast::BoolExpr(true),
False => ast::BoolExpr(false),
t => self.fail(format!("Unexpected start of expression `{}`", t.to_str()))
}
}
fn parse_op_expr(&mut self, mut lhs: ast::Expr, prec: uint) -> ast::Expr {
let mut rhs;
loop {
match self.peek_op() {
Some(op) if op.prec() >= prec => {
self.bump();
rhs = self.parse_primary_expr();
loop {
match self.peek_op() {
Some(next_op) if next_op.prec() > op.prec() => {
rhs = self.parse_op_expr(rhs, next_op.prec());
}
_ => break
}
}
lhs = ast::BinopExpr(op, ~lhs, ~rhs);
}
_ => break
}
}
return lhs;
}
fn peek_op(&mut self) -> Option<ast::Op> {
Some(match self.peek_token(0) {
Plus => ast::Add,
Minus => ast::Sub,
Divide => ast::Div,
Star => ast::Mul,
_ => return None
})
}
fn mk_item(&self, ident: ast::Ident, info: ast::ItemInfo, span: ast::Span) -> ast::Item {
ast::Item {
name: ident,
span: span,
info: info,
}
}
fn unexpected(&self, token: Token, expected: &[Token]) -> ! {
self.fail(format!("Unexpected token `{}` expected `{}`", token.to_str(), expected.to_str()))
}
fn fail(&self, msg: ~str) -> ! {
fail!("Failure {:s} on line {:u}", msg, self.tokenizer.cur_line)
}
fn read_token(&mut self) -> Token {
if self.lookahead.len() > 0 {
self.lookahead.shift()
} else {
self.tokenizer.read_token()
}
}
fn peek_token(&mut self, n: uint) -> Token {
if self.lookahead.len() <= n {
let mut to_read = (n - self.lookahead.len()) + 1;
while to_read > 0 {
self.lookahead.push(self.tokenizer.read_token());
to_read -= 1;
}
}
self.lookahead[n].clone()
}
fn bump(&mut self) { self.read_token(); }
fn expect(&mut self, token: Token) {
let t = self.read_token();
if t != token {
self.unexpected(t, [token])
}
}
fn expect_ident(&mut self) -> ast::Ident {
match self.read_token() {
Identifier(s) => s,
t => self.fail(format!("Unexpected token `{}` expected identifier", t.to_str()))
}
}
fn expect_number(&mut self) -> uint {
match self.read_token() {
Number(n) => n,
t => self.fail(format!("Unexpected token `{}` expected number", t.to_str()))
}
}
fn eat_token(&mut self, token: Token) -> bool {
let t = self.read_token();
if t == token {
true
} else {
self.lookahead.unshift(t);
false
}
}
}
use std;
use std::io::{Stream,Buffer};
use ast;
use ast::Ident;
use intern::IdentInterner;
#[deriving(Eq, ToStr, Clone)]
pub enum Token {
Eof,
Xid,
XidUnion,
Struct,
Request,
Opcode,
Field,
Pad,
Switch,
Typedef,
Enum,
SetOf,
List,
Reply,
True,
False,
Colon,
Semicolon,
Comma,
Pipe,
LBrace,
RBrace,
Equal,
Minus,
Plus,
Divide,
Star,
LParen,
RParen,
LAngle,
RAngle,
FatArrow,
Identifier(Ident),
Number(uint)
}
pub struct Tokenizer<'stream, 'intern, S> {
stream: &'stream mut S,
lookahead: char,
interner: &'intern mut IdentInterner,
cur_col: uint,
cur_line: uint
}
impl<'stream, 'intern, S:Stream+Buffer> Tokenizer<'stream, 'intern, S> {
pub fn new<'r, 's>(stream: &'r mut S, interner: &'s mut IdentInterner) -> Tokenizer<'r, 's, S> {
Tokenizer {
stream: stream,
lookahead: '\0',
interner: interner,
cur_col: 1,
cur_line: 1
}
}
pub fn read_token(&mut self) -> Token {
self.skip_whitespace();
let c = self.read_char();
match c {
'\0' => Eof,
',' => Comma,
';' => Semicolon,
':' => Colon,
'{' => LBrace,
'}' => RBrace,
'|' => Pipe,
'=' => {
if self.eat_char('>') {
FatArrow
} else {
Equal
}
},
'-' => Minus,
'+' => Plus,
'/' => {
if self.eat_char('/') {
loop {
match self.read_char() {
'\n' | '\r' => return self.read_token(),
_ => {}
}
}
} else {
Divide
}
}
'*' => Star,
'(' => LParen,
')' => RParen,
'<' => LAngle,
'>' => RAngle,
'0'..'9' => {
let mut num = (c as u32) - ('0' as u32);
loop {
let c = self.read_char();
match c {
'0'..'9' => {
num = num << 1;
num += (c as u32) - ('0' as u32);
}
_ => {
self.lookahead = c;
break;
}
}
}
Number(num as uint)
}
_ => {
let mut chars = ~[c];
loop {
let c = self.read_char();
match c {
'a'..'z' | 'A'..'Z'
| '-' | '0'..'9' => chars.push(c),
_ => {
self.lookahead = c;
break;
}
}
}
let s = std::str::from_chars(chars);
match s.as_slice() {
"xid" => Xid,
"xidunion" => XidUnion,
"struct" => Struct,
"request" => Request,
"opcode" => Opcode,
"field" => Field,
"pad" => Pad,
"switch" => Switch,
"typedef" => Typedef,
"enum" => Enum,
"setof" => SetOf,
"list" => List,
"reply" => Reply,
"true" => True,
"false" => False,
_ => Identifier(self.interner.intern(s))
}
}
}
}
pub fn get_pos(&self) -> ast::Pos {
ast::Pos {
row: self.cur_line as u16,
col: self.cur_col as u16
}
}
fn read_char(&mut self) -> char {
if self.lookahead != '\0' {
let c = self.lookahead;
self.lookahead = '\0';
return c;
} else {
match self.stream.read_char() {
Some(c) => {
if c == '\n' {
self.cur_col = 0;
self.cur_line += 1;
}
self.cur_col += 1;
c
},
None => '\0'
}
}
}
fn eat_char(&mut self, c: char) -> bool {
let r = self.read_char();
if r == c {
true
} else {
self.lookahead = r;
false
}
}
fn skip_whitespace(&mut self) {
loop {
match self.read_char() {
' ' | '\n' | '\t' | '\r' => {}
'\0' => break,
c => {
self.lookahead = c;
break;
}
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment