Created
December 22, 2013 23:58
-
-
Save Aatch/8089884 to your computer and use it in GitHub Desktop.
Rust-based Parser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use std::hashmap::HashMap; | |
#[deriving(Eq,Clone,IterBytes)] | |
pub struct Ident(u32); | |
#[deriving(Clone)] | |
pub struct Pos { | |
col: u16, | |
row: u16 | |
} | |
#[deriving(Clone)] | |
pub struct Span { | |
start: Pos, | |
end: Pos | |
} | |
pub struct Item { | |
name: Ident, | |
span: Span, | |
info: ItemInfo | |
} | |
pub enum ItemInfo { | |
Xid, | |
XidUnion(~[Ident]), | |
Typedef(Type), | |
Struct(~[Field]), | |
Request(RequestInfo), | |
Enum(~[(Ident, Option<~Expr>)]), | |
SetOf(Option<Ident>, ~[Ident]), | |
} | |
pub struct RequestInfo { | |
opcode: uint, | |
fields: ~[Field], | |
errors: ~[Ident], | |
reply: Option<~Reply> | |
} | |
pub struct Reply { | |
name: Option<Ident>, | |
fields: ~[Field] | |
} | |
pub enum Field { | |
Padding(Expr), | |
Data(DataField), | |
Switch(Ident, Expr, ~[(Ident,Type)]) | |
} | |
pub struct DataField { | |
name: Ident, | |
ty: Type, | |
info: HashMap<Ident, Expr> | |
} | |
#[deriving(Clone)] | |
pub enum Type { | |
Void, | |
Bool, | |
Uint8, | |
Uint16, | |
Uint32, | |
Int8, | |
Int16, | |
Int32, | |
Type(Ident), | |
List(~Type), | |
Tuple(~[Type]) | |
} | |
impl Type { | |
pub fn is_list(&self) -> bool { | |
match self { | |
&List(_) => true, | |
_ => false | |
} | |
} | |
} | |
#[deriving(Clone)] | |
pub enum Expr { | |
IdentExpr(Ident), | |
NumExpr(uint), | |
BinopExpr(Op, ~Expr, ~Expr), | |
PadExpr(~Expr), | |
ParenExpr(~Expr), | |
NegExpr(~Expr), | |
BoolExpr(bool) | |
} | |
#[deriving(Clone, Ord)] | |
pub enum Op { | |
Div, | |
Mul, | |
Add, | |
Sub, | |
} | |
impl Op { | |
pub fn prec(self) -> uint { | |
match self { | |
Sub => 10, | |
Add => 10, | |
Mul => 20, | |
Div => 20, | |
} | |
} | |
} | |
impl Span { | |
pub fn from_pos(start: Pos, end: Pos) -> Span { | |
Span { | |
start: start, | |
end: end | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
xid Window; | |
xid Pixmap; | |
xid Cursor; | |
xid Font; | |
xid GContext; | |
xid Colormap; | |
xid Atom; | |
xidunion Drawable { | |
Window; | |
Pixmap; | |
} | |
xidunion Fontable { | |
Font; | |
GContext; | |
} | |
struct Char2B { | |
field byte1 : u8; | |
field byte2 : u8; | |
} | |
typedef VisualID : u32; | |
typedef Timestamp : u32; | |
typedef Keysym : u32; | |
typedef Keycode : u8; | |
typedef Button : u8; | |
typedef String8 : list<u8>; | |
struct Point { | |
field x : i16; | |
field y : i16; | |
} | |
struct Rectangle { | |
field x: i16; | |
field y: i16; | |
field width : u16; | |
field height: u16; | |
} | |
struct Arc { | |
field x : i16; | |
field y : i16; | |
field width : u16; | |
field height : u16; | |
field angle1 : i16; | |
field angle2 : i16; | |
} | |
struct Format { | |
field depth : u8; | |
field bits-per-pixel : u8; | |
field scanline-pad : u8; | |
pad = 5; | |
} | |
enum VisualClass { | |
StaticGray; | |
GrayScale; | |
StaticColor; | |
PseudoColor; | |
TrueColor; | |
DirectColor; | |
} | |
struct VisualType { | |
field visual-id : VisualID; | |
field class : VisualClass; | |
field bits-per-rgb-value : u8; | |
field colormap-entries : u16; | |
field red-mask : u32; | |
field green-mask : u32; | |
field blue-mask : u32; | |
pad = 4; | |
} | |
struct Depth { | |
field depth : u8; | |
pad = 1; | |
field visuals-len : u16; | |
pad = 4; | |
field visuals : list<VisualType> { | |
length = visuals-len; | |
} | |
} | |
setof Event { | |
KeyPress; | |
KeyRelease; | |
ButtonPress; | |
ButtonRelease; | |
EnterWindow; | |
LeaveWindow; | |
PointerMotion; | |
PointerMotionHint; | |
Button1Motion; | |
Button2Motion; | |
Button3Motion; | |
Button4Motion; | |
Button5Motion; | |
ButtonMotion; | |
KeymapState; | |
Exposure; | |
VisibilityChange; | |
StructureNotify; | |
ResizeRedirect; | |
SubstructureNotify; | |
SubstructureRedirect; | |
FocusChange; | |
PropertyChange; | |
ColorMapChange; | |
OwnerGrabButton; | |
} | |
setof PointerEvent : Event { | |
ButtonPress; | |
ButtonRelease; | |
EnterWindow; | |
LeaveWindow; | |
PointerMotion; | |
PointerMotionHint; | |
Button1Motion; | |
Button2Motion; | |
Button3Motion; | |
Button4Motion; | |
Button5Motion; | |
ButtonMotion; | |
KeymapState; | |
} | |
setof DeviceEvent : Event { | |
KeyPress; | |
KeyRelease; | |
ButtonPress; | |
ButtonRelease; | |
PointerMotion; | |
Button1Motion; | |
Button2Motion; | |
Button3Motion; | |
Button4Motion; | |
Button5Motion; | |
ButtonMotion; | |
} | |
setof KeyButtonMask { | |
Shift; | |
Lock; | |
Control; | |
Mod1; | |
Mod2; | |
Mod3; | |
Mod4; | |
Mod5; | |
Button1; | |
Button2; | |
Button3; | |
Button4; | |
Button5; | |
} | |
setof KeyMask : KeyButtonMask { | |
Shift; | |
Lock; | |
Control; | |
Mod1; | |
Mod2; | |
Mod3; | |
Mod4; | |
Mod5; | |
} | |
enum BackingStore { | |
NotUseful; | |
WhenMapped; | |
Always; | |
} | |
enum WindowClass { | |
CopyFromParent; | |
InputOutput; | |
InputOnly; | |
} | |
enum BitGravity { | |
Forget; | |
NorthWest; | |
North; | |
NorthEast; | |
West; | |
Center; | |
East; | |
SouthWest; | |
South; | |
SouthEast; | |
Static; | |
} | |
enum WinGravity { | |
Unmap; | |
NorthWest; | |
North; | |
NorthEast; | |
West; | |
Center; | |
East; | |
SouthWest; | |
South; | |
SouthEast; | |
Static; | |
} | |
enum MapState { | |
Unmapped; | |
Unviewable; | |
Viewable; | |
} | |
enum WindowAttr { | |
BackgroundPixmap; | |
BackgroundPixel; | |
BorderPixmap; | |
BorderPixel; | |
BitGravity; | |
WinGravity; | |
BackingStore; | |
BackingPlanes; | |
BackingPixel; | |
SaveUnder; | |
EventMask; | |
DoNotPropagateMask; | |
OverrideRedirect; | |
Colormap; | |
Cursor; | |
} | |
struct Screen { | |
field root : Window; | |
field default-colormap : Colormap; | |
field white-pixel : u32; | |
field black-pixel : u32; | |
field current-input-masks : Event; | |
field width-in-pixels : u16; | |
field height-in-pixels : u16; | |
field width-in-millimeters : u16; | |
field height-in-millimeters : u16; | |
field min-installed-maps : u16; | |
field max-installed-maps : u16; | |
field root-visual : VisualID; | |
field backing-stores : BackingStore; | |
field save-unders : bool; | |
field root-depth : u8; | |
field allowed-depths-len : u8; | |
field allowed-depths : list<Depth> { | |
length = allowed-depths-len; | |
} | |
} | |
struct Setup { | |
field status : u8; | |
pad = 1; | |
field protocol-major-version : u16; | |
field protocol-minor-version : u16; | |
field addition-data-length : u16; | |
field release-number : u32; | |
field resource-id-base : u32; | |
field resource-id-mask : u32; | |
field motion-buffer-size : u32; | |
field vendor-length : u16; | |
field maximum-request-length : u16; | |
field num-screens : u8; | |
field num-formats : u8; | |
field image-byte-order : ImageByteOrder; | |
field bitmap-format-bit-order : BitmapFormatBitOrder; | |
field bitmap-format-scanline-unit : u8; | |
field bitmap-format-scanline-pad : u8; | |
field min-keycode : Keycode; | |
field max-keycode : Keycode; | |
pad = 4; | |
field vendor : String8 { | |
length = vendor-length; | |
} | |
field pixmap-formats : list<Format> { | |
length = num-formats; | |
} | |
field roots : list<Screen> { | |
length = num-screens; | |
} | |
} | |
// Requests | |
request CreateWindow { | |
opcode = 1; | |
field depth : u8; | |
field wid : Window; | |
field parent : Window; | |
field x : i16; | |
field y : i16; | |
field width : u16; | |
field height : u16; | |
field border-width : u16; | |
field class : WindowClass { size=2; } | |
field visual : VisualID; | |
field value-mask : WindowAttr; | |
switch value-list = value-mask { | |
BackgroundPixmap : Pixmap; | |
BackgroundPixel : u32; | |
BorderPixmap : Pixmap; | |
BorderPixel : u32; | |
BitGravity : BitGravity; | |
WinGravity : WinGravity; | |
BackingStore : BackingStore; | |
BackingPlanes : u32; | |
BackingPixel : u32; | |
SaveUnder : bool; | |
EventMask : Event; | |
DoNotPropagateMask : DeviceEvent; | |
OverrideRedirect : bool; | |
Colormap : Colormap; | |
Cursor : Cursor; | |
} | |
} | |
request ChangeWindowAttributes { | |
opcode = 2; | |
field window : Window; | |
field value-mask : WindowAttr; | |
switch value-list = value-mask { | |
BackgroundPixmap : Pixmap; | |
BackgroundPixel : u32; | |
BorderPixmap : Pixmap; | |
BorderPixel : u32; | |
BitGravity : BitGravity; | |
WinGravity : WinGravity; | |
BackingStore : BackingStore; | |
BackingPlanes : u32; | |
BackingPixel : u32; | |
SaveUnder : bool; | |
EventMask : Event; | |
DoNotPropagateMask : DeviceEvent; | |
OverrideRedirect : bool; | |
Colormap : Colormap; | |
Cursor : Cursor; | |
} | |
} | |
request GetWindowAttributes { | |
opcode = 3; | |
field window : Window; | |
reply { | |
field backing-store : BackingStore; | |
field visual : VisualID; | |
field class : WindowClass; | |
field bit-gravity : BitGravity; | |
field win-gravity : WinGravity; | |
field backing-planes : u32; | |
field backing-pixel : u32; | |
field save-under : bool; | |
field map-is-installed : bool; | |
field map-state : MapState; | |
field override-redirect : bool; | |
field colormap : Colormap; | |
field all-event-masks : Event; | |
field your-event-masks : Event; | |
field do-not-propagate-mask : DeviceEvent { | |
size = 2; | |
} | |
pad = 2; | |
} | |
} | |
request DestroyWindow { | |
opcode = 4; | |
field window : Window; | |
} | |
request DestroySubwindows { | |
opcode = 5; | |
field window : Window; | |
} | |
enum SetMode { | |
Insert; Delete; | |
} | |
request ChangeSaveSet { | |
opcode = 6; | |
field mode : SetMode; | |
field window : Window; | |
} | |
request ReparentWindow { | |
opcode = 7; | |
field window : Window; | |
field parent : Window; | |
field x : i16; | |
field y : i16; | |
} | |
request MapWindow { | |
opcode = 8; | |
field window : Window; | |
} | |
request MapSubwindows { | |
opcode = 9; | |
field window : Window; | |
} | |
request UnmapWindow { | |
opcode = 10; | |
field window : Window; | |
} | |
request UnmapSubwindows { | |
opcode = 11; | |
field window : Window; | |
} | |
enum StackMode { | |
Above; | |
Below; | |
TopIf; | |
BottomIf; | |
Opposite; | |
} | |
enum ConfigWindow { | |
X; | |
Y; | |
Width; | |
Height; | |
BorderWidth; | |
Window; | |
StackMode; | |
} | |
request ConfigureWindow { | |
opcode = 12; | |
field window : Window; | |
field value-mask : ConfigWindow { | |
size = 2; | |
} | |
pad = 2; | |
switch value-list = value-mask { | |
X : i16; | |
Y : i16; | |
Width : u16; | |
Height : u16; | |
BorderWidth : u16; | |
Window : Window; | |
StackMode : StackMode; | |
} | |
} | |
enum CirculateDirection { | |
RaiseLowest; | |
LowerHighest; | |
} | |
request CirculateWindow { | |
opcode = 13; | |
field direction : CirculateDirection; | |
field window : Window; | |
} | |
request GetGeometry { | |
opcode = 14; | |
field drawable : Drawable; | |
reply { | |
field depth : u8; | |
field root : Window; | |
field x : i16; | |
field y : i16; | |
field width : u16; | |
field height : u16; | |
field border-width : u16; | |
} | |
} | |
request QueryTree { | |
opcode = 15; | |
field window : Window; | |
reply { | |
field root : Window; | |
field parent : Window; | |
field num-windows : u16; | |
pad = 14; | |
field children : list<Window> { | |
length = num-windows; | |
} | |
} | |
} | |
request InternAtom { | |
opcode = 16; | |
field only-if-exists : bool; | |
field name-length : u16; | |
pad = 2; | |
field name : String8 { length = name-length; } | |
reply { | |
field atom : Atom; | |
} | |
} | |
request GetAtomName { | |
opcode = 17; | |
field atom : Atom; | |
reply { | |
field name-length : u16; | |
field name : String8 { length = name-length; } | |
} | |
} | |
enum ChangeMode { | |
Replace; Prepend; Append; | |
} | |
request ChangeProperty { | |
opcode = 18; | |
field mode : ChangeMode; | |
field window : Window; | |
field property : Atom; | |
field type : Atom; | |
field format : u8; | |
pad = 3; | |
// Needs fixing, somehow | |
field data-length : u32; | |
field data : list<u8> { | |
length = data-length; | |
} | |
} | |
request DeleteProperty { | |
opcode = 19; | |
field window : Window; | |
field property : Atom; | |
} | |
request GetProperty { | |
opcode = 20; | |
field delete : bool; | |
field window : Window; | |
field property : Atom; | |
field type : Atom; | |
field long-offset : u32; | |
field long-length : u32; | |
reply { | |
field type : Atom; | |
field bytes-after : u32; | |
field value-length : u32; | |
pad = 12; | |
field prop-value : list<u8> { | |
length = value-length; | |
} | |
} | |
} | |
request ListProperties { | |
opcode = 21; | |
field window : Window; | |
reply { | |
field num-atoms : u16; | |
pad = 22; | |
field atoms : list<Atom> { | |
length = num-atoms; | |
} | |
} | |
} | |
request SetSelectionOwner { | |
opcode = 22; | |
field owner : Window; | |
field selection : Atom; | |
field time : Timestamp; | |
} | |
request GetSelectionOwner { | |
opcode = 23; | |
field selection : Atom; | |
reply { | |
field owner : Window; | |
} | |
} | |
request ConvertSelection { | |
opcode = 24; | |
field requestor : Window; | |
field selection : Atom; | |
field target : Atom; | |
field property : Atom; | |
field time : Timestamp; | |
} | |
request SendEvent { | |
opcode = 25; | |
field propagate : bool; | |
field destination : Window; | |
field event-mask : Event; | |
field event : EventStructure; | |
} | |
enum PointerMode { Synchronous; Asynchronous; } | |
enum KeyboardMode { Synchronous; Asynchronous; } | |
enum GrabStatus { | |
Success; | |
AlreadyGrabbed; | |
InvalidTime; | |
NotViewable; | |
Frozen; | |
} | |
request GrabPointer { | |
opcode = 26; | |
field owner-events : bool; | |
field grab-window : Window; | |
field event-mask : PointerEvent { | |
size = 2; | |
} | |
field pointer-mode : PointerMode; | |
field keyboard-mode : KeyboardMode; | |
field confine-to : Window; | |
field cursor : Cursor; | |
field time : Timestamp; | |
reply { | |
field status : GrabStatus; | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use std::vec; | |
use std::util::replace; | |
use ast::Ident; | |
static INITIAL_SIZE : uint = 64; | |
pub struct IdentInterner { | |
priv idents: ~[Option<IdentInfo>], | |
priv size: uint | |
} | |
pub struct IdentInfo { | |
hash: u32, | |
name: ~str, | |
} | |
impl IdentInterner { | |
pub fn new() -> IdentInterner { | |
IdentInterner { | |
idents: vec::from_fn(INITIAL_SIZE, |_| None), | |
size: 0 | |
} | |
} | |
pub fn intern(&mut self, ident: ~str) -> Ident { | |
let hash = murmur_hash(ident); | |
if !self.has_hash(hash) { | |
self.insert_ident(hash, ident); | |
} | |
Ident(hash) | |
} | |
pub fn get_str<'r>(&'r self, ident: Ident) -> &'r str { | |
let Ident(hash) = ident; | |
let mut idx = (hash as uint) % self.idents.len(); | |
let start = idx; | |
loop { | |
match &self.idents[idx] { | |
&None => fail!("Ident isn't interned!?"), | |
&Some(IdentInfo { hash: ident_hash, name: ref name}) if ident_hash == hash => { | |
return name.as_slice(); | |
} | |
_ => idx = (idx + 1) % self.idents.len() | |
} | |
if start == idx { | |
fail!("Ident isn't interned!?") | |
} | |
} | |
} | |
fn has_hash(&self, hash: u32) -> bool { | |
let mut idx = (hash as uint) % self.idents.len(); | |
let start = idx; | |
loop { | |
match &self.idents[idx] { | |
&None => return false, | |
&Some(IdentInfo { hash: ident_hash, ..}) if ident_hash == hash => return true, | |
_ => idx = (idx + 1) % self.idents.len() | |
} | |
if start == idx { return false; } | |
} | |
} | |
fn insert_ident(&mut self, hash: u32, ident: ~str) { | |
self.expand(); | |
self.insert_ident_info(IdentInfo { | |
hash: hash, | |
name: ident, | |
}); | |
} | |
fn insert_ident_info(&mut self, info: IdentInfo) { | |
if (self.size >= self.idents.len()) { | |
fail!("HashMap is full, trying to insert {:?} (size: {:?}, len: {:?}", | |
info, self.size, self.idents.len()); | |
} | |
let mut idx = (info.hash as uint) % self.idents.len(); | |
loop { | |
match self.idents[idx] { | |
None => {} | |
Some(_) => { | |
idx = (idx + 1) % self.idents.len(); | |
continue; | |
} | |
} | |
self.idents[idx] = Some(info); | |
break; | |
} | |
self.size += 1; | |
} | |
fn expand(&mut self) { | |
if ((self.idents.len() / 4) * 3) <= self.size { | |
let new_len = self.idents.len()*2; | |
let old_items = replace(&mut self.idents, | |
vec::from_fn(new_len, |_| None)); | |
self.size = 0; | |
for i in old_items.move_iter() { | |
match i { | |
Some(ident) => self.insert_ident_info(ident), | |
None => {} | |
} | |
} | |
} | |
} | |
} | |
#[inline(always)] | |
fn rotl32(x: u32, r: i8) -> u32 { | |
(x << r) | (x >> (32 - r)) | |
} | |
#[inline] | |
fn murmur_hash(s:&str) -> u32 { | |
static C1 : u32 = 0xcc9e2d51; | |
static C2 : u32 = 0x1b873593; | |
let len = s.len(); | |
let ptr = s.as_ptr(); | |
let mut hash = 0x34567890; | |
unsafe { | |
let num_blocks = len / 4; | |
let blocks = ptr.offset((num_blocks*4) as int) as *u32; | |
let mut i = -(num_blocks as int); | |
while (i < 0) { | |
let mut k1 = *(blocks.offset(i)); | |
k1 *= C1; | |
k1 = rotl32(k1, 15); | |
k1 *= C2; | |
hash ^= k1; | |
hash = rotl32(hash, 13); | |
hash = hash*5+0xe6546b64; | |
i += 1; | |
} | |
let tail = ptr.offset((num_blocks*4) as int); | |
let mut k1 : u32 = 0; | |
let mut leftover = len & 3; | |
if leftover == 3 { | |
k1 ^= (*tail.offset(2) as u32) << 16; | |
leftover -= 1; | |
} | |
if leftover == 2 { | |
k1 ^= (*tail.offset(1) as u32) << 8; | |
leftover -= 1; | |
} | |
if leftover == 1 { | |
k1 ^= *tail as u32; | |
k1 *= C1; | |
k1 = rotl32(k1, 15); | |
k1 *= C2; | |
hash ^= k1; | |
} | |
hash ^= (len as u32); | |
hash ^= (hash >> 16); | |
hash *= 0x85ebca6b; | |
hash ^= hash >> 13; | |
hash *= 0xc2b2ae35; | |
hash ^= hash >> 16; | |
} | |
hash | |
} | |
#[cfg(test)] | |
mod test { | |
use super::IdentInterner; | |
use extra::test::BenchHarness; | |
use std::rand::{Rand,Rng,weak_rng}; | |
#[test] | |
fn test_intern_match1() { | |
let mut interner = IdentInterner::new(); | |
for i in range(1,1024) { | |
let s = format!("s{:02}", i); | |
let ident = interner.intern(s.to_owned()); | |
let returned = interner.get_str(ident); | |
if returned != s.as_slice() { | |
fail!("Ident {:?} not interned properly: {:s} != {:s}", ident, returned, s); | |
} | |
} | |
} | |
#[test] | |
fn test_intern_match2() { | |
let mut interner = IdentInterner::new(); | |
let s = "TestStr"; | |
let the_ident = interner.intern(s.to_owned()); | |
assert_eq!(the_ident, interner.intern(s.to_owned())); | |
assert_eq!(the_ident, interner.intern(s.to_owned())); | |
assert_eq!(the_ident, interner.intern(s.to_owned())); | |
assert_eq!(the_ident, interner.intern(s.to_owned())); | |
} | |
#[bench] | |
fn bench_intern(bh: &mut BenchHarness) { | |
let mut rng = weak_rng(); | |
let mut interner = IdentInterner::new(); | |
let mut n = 0; | |
bh.iter(|| { | |
let s = format!("s{:08x}", n); | |
let ident = interner.intern(s.to_owned()); | |
n += 1; | |
}) | |
} | |
#[bench] | |
fn bench_intern_same_str(bh: &mut BenchHarness) { | |
let mut interner = IdentInterner::new(); | |
let s = "TestStr"; | |
interner.intern(s.to_owned()); | |
bh.iter(|| { | |
interner.intern(s.to_owned()); | |
}) | |
} | |
#[bench] | |
fn bench_intern_same_strs(bh: &mut BenchHarness) { | |
let mut interner = IdentInterner::new(); | |
let s1 = "TestStr_1"; | |
for i in range(1,64) { | |
let s = format!("s{:02}", i); | |
let mut interner = IdentInterner::new(); | |
let ident = interner.intern(s.to_owned()); | |
let returned = interner.get_str(ident); | |
if returned != s.as_slice() { | |
fail!("Ident {:?} not interned properly: {:s} != {:s}", ident, returned, s); | |
} | |
} | |
interner.intern(s1.to_owned()); | |
bh.iter(|| { | |
interner.intern(s1.to_owned()); | |
}); | |
} | |
#[bench] | |
fn bench_get_str(bh: &mut BenchHarness) { | |
let mut interner = IdentInterner::new(); | |
let s = "TestStr"; | |
let the_ident = interner.intern(s.to_owned()); | |
bh.iter(|| { | |
let _a = interner.get_str(the_ident); | |
}) | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#[feature(globs)]; | |
#[feature(macro_rules)]; | |
#[cfg(test)] | |
extern mod extra; | |
use std::path::Path; | |
use std::io; | |
use std::io::{File,Buffer}; | |
use std::io::buffered::BufferedStream; | |
use std::hashmap::HashMap; | |
mod token; | |
mod ast; | |
mod parse; | |
mod intern; | |
fn open_file(path: &Path) -> BufferedStream<File> { | |
let file = File::open(path).expect("Error opening file"); | |
BufferedStream::new(file) | |
} | |
fn main() { | |
let args = std::os::args(); | |
let filename = args[1]; | |
let mut file = open_file(&Path::new(filename)); | |
let mut items = ~[]; | |
let mut interner = intern::IdentInterner::new(); | |
{ | |
let tokenizer = token::Tokenizer::new(&mut file, &mut interner); | |
let mut parser = parse::Parser::new(tokenizer); | |
loop { | |
match parser.parse_item() { | |
Some(item) => { | |
items.push(item); | |
} | |
None => break | |
} | |
} | |
} | |
println!("Parsed {} Items\n", items.len()); | |
} | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use std; | |
use std::io::{Stream,Buffer}; | |
use std::hashmap::HashMap; | |
use token::*; | |
use intern; | |
use ast; | |
macro_rules! prefill_idents { | |
($($name:ident: $token:expr),*) => ( | |
struct Idents { | |
$($name : ast::Ident),* | |
} | |
fn prefill_interner(interner: &mut intern::IdentInterner) -> Idents { | |
let mut idents : Idents = unsafe { std::unstable::intrinsics::uninit() }; | |
$(idents.$name = interner.intern($token);)* | |
idents | |
} | |
) | |
} | |
prefill_idents! { | |
Void: ~"void", | |
Bool: ~"bool", | |
Uint8: ~"u8", | |
Uint16: ~"u16", | |
Uint32: ~"u32", | |
Int8: ~"i8", | |
Int16: ~"i16", | |
Int32: ~"i32" | |
} | |
pub struct Parser<'t, 's, S> { | |
tokenizer: Tokenizer<'t, 's, S>, | |
lookahead: ~[Token], | |
priv idents: Idents | |
} | |
impl<'t, 's, S:Stream+Buffer> Parser<'t, 's, S> { | |
pub fn new<'r, 's>(tokenizer: Tokenizer<'r, 's, S>) -> Parser<'r, 's, S> { | |
let idents = prefill_interner(tokenizer.interner); | |
Parser { | |
tokenizer: tokenizer, | |
lookahead: ~[], | |
idents: idents | |
} | |
} | |
pub fn parse_item(&mut self) -> Option<ast::Item> { | |
let token = self.read_token(); | |
let start = self.tokenizer.get_pos(); | |
match token { | |
Eof => None, | |
Xid => { | |
let ident = self.expect_ident(); | |
self.expect(Semicolon); | |
let end = self.tokenizer.get_pos(); | |
let span = ast::Span::from_pos(start, end); | |
Some(self.mk_item(ident, ast::Xid, span)) | |
} | |
XidUnion => { | |
let ident = self.expect_ident(); | |
self.expect(LBrace); | |
let mut members = ~[]; | |
loop { | |
match self.read_token() { | |
RBrace => break, | |
Identifier(s) => { | |
members.push(s); | |
self.expect(Semicolon); | |
} | |
t => self.fail(format!("Unexpected token `{}` expected '\\{' or identifier", t.to_str())) | |
} | |
} | |
let end = self.tokenizer.get_pos(); | |
let span = ast::Span::from_pos(start, end); | |
Some(self.mk_item(ident, ast::XidUnion(members), span)) | |
} | |
Struct => { | |
let ident = self.expect_ident(); | |
self.expect(LBrace); | |
let mut fields = ~[]; | |
loop { | |
if self.eat_token(RBrace) { | |
break; | |
} else { | |
fields.push(self.parse_field()); | |
} | |
} | |
let end = self.tokenizer.get_pos(); | |
let span = ast::Span::from_pos(start, end); | |
Some(self.mk_item(ident, ast::Struct(fields), span)) | |
} | |
Request => { | |
let ident = self.expect_ident(); | |
self.expect(LBrace); | |
let mut opcode = None; | |
let mut reply = None; | |
let mut fields = ~[]; | |
loop { | |
if self.eat_token(RBrace) { | |
break; | |
} else if self.eat_token(Opcode) { | |
self.expect(Equal); | |
let code = self.expect_number(); | |
self.expect(Semicolon); | |
opcode = Some(code); | |
} else if reply.is_none() && self.eat_token(Reply) { | |
let mut name = None; | |
match self.read_token() { | |
Identifier(nm) => { | |
name = Some(nm); | |
self.expect(LBrace); | |
} | |
LBrace => { } | |
t => self.fail(format!("Unexpected token `{}` expected '\\{' or identifier", t.to_str())) | |
} | |
let mut fields = ~[]; | |
loop { | |
if self.eat_token(RBrace) { | |
break; | |
} else { | |
fields.push(self.parse_field()); | |
} | |
} | |
reply = Some(~ast::Reply { name: name, fields: fields }); | |
} else { | |
fields.push(self.parse_field()); | |
} | |
} | |
let end = self.tokenizer.get_pos(); | |
let span = ast::Span::from_pos(start, end); | |
Some(self.mk_item(ident, ast::Request(ast::RequestInfo { | |
opcode: opcode.expect("Requests should have an opcode"), | |
fields: fields, | |
errors: ~[], | |
reply: reply | |
}), span)) | |
} | |
Typedef => { | |
let ident = self.expect_ident(); | |
self.expect(Colon); | |
let ty = self.parse_type(); | |
self.expect(Semicolon); | |
let end = self.tokenizer.get_pos(); | |
let span = ast::Span::from_pos(start, end); | |
Some(self.mk_item(ident, ast::Typedef(ty), span)) | |
} | |
Enum => { | |
let ident = self.expect_ident(); | |
self.expect(LBrace); | |
let mut members = ~[]; | |
loop { | |
match self.read_token() { | |
RBrace => break, | |
Identifier(s) => { | |
let mut expr = None; | |
if self.eat_token(Equal) { | |
expr = Some(~self.parse_expr()); | |
} | |
self.expect(Semicolon); | |
members.push((s, expr)); | |
} | |
t => self.unexpected(t,[RBrace]) | |
} | |
} | |
let end = self.tokenizer.get_pos(); | |
let span = ast::Span::from_pos(start, end); | |
Some(self.mk_item(ident, ast::Enum(members), span)) | |
} | |
SetOf => { | |
let ident = self.expect_ident(); | |
let mut parent = None; | |
if self.eat_token(Colon) { | |
parent = Some(self.expect_ident()); | |
} | |
self.expect(LBrace); | |
let mut members = ~[]; | |
loop { | |
match self.read_token() { | |
RBrace => break, | |
Identifier(s) => { | |
self.expect(Semicolon); | |
members.push(s); | |
} | |
t => self.unexpected(t, [RBrace]) | |
} | |
} | |
let end = self.tokenizer.get_pos(); | |
let span = ast::Span::from_pos(start, end); | |
Some(self.mk_item(ident, ast::SetOf(parent, members), span)) | |
} | |
_ => self.unexpected(token, [Xid, XidUnion, Struct, Request, Typedef]) | |
} | |
} | |
fn parse_type(&mut self) -> ast::Type { | |
match self.read_token() { | |
Identifier(s) => { | |
if s == self.idents.Void { | |
ast::Void | |
} else if s == self.idents.Bool { | |
ast::Bool | |
} else if s == self.idents.Uint8 { | |
ast::Uint8 | |
} else if s == self.idents.Uint16 { | |
ast::Uint16 | |
} else if s == self.idents.Uint32 { | |
ast::Uint32 | |
} else if s == self.idents.Int8 { | |
ast::Int8 | |
} else if s == self.idents.Int16 { | |
ast::Int16 | |
} else if s == self.idents.Int32 { | |
ast::Int32 | |
} else { | |
ast::Type(s) | |
} | |
} | |
List => { | |
self.expect(LAngle); | |
let ty = ~self.parse_type(); | |
self.expect(RAngle); | |
ast::List(ty) | |
} | |
LParen => { | |
let mut tys = ~[]; | |
tys.push(self.parse_type()); | |
loop { | |
match self.read_token() { | |
RParen => break, | |
Comma => { | |
tys.push(self.parse_type()); | |
} | |
t => self.fail(format!("Unexpected token {}, expected type", t.to_str())) | |
} | |
} | |
if tys.len() == 0 { | |
tys[0] | |
} else { | |
ast::Tuple(tys) | |
} | |
} | |
s => self.fail(format!("Unexpected start of type {}", s.to_str())) | |
} | |
} | |
fn parse_field(&mut self) -> ast::Field { | |
match self.read_token() { | |
Field => { | |
let ident = self.expect_ident(); | |
self.expect(Colon); | |
let ty = self.parse_type(); | |
let mut info = HashMap::new(); | |
match self.read_token() { | |
Semicolon => {} | |
LBrace => { | |
loop { | |
match self.read_token() { | |
RBrace => break, | |
Identifier(key) => { | |
self.expect(Equal); | |
let val = self.parse_expr(); | |
self.expect(Semicolon); | |
info.insert(key,val); | |
} | |
t => self.fail(format!("Unexpected token `{}`, expected '\\}', number or identifier", | |
t.to_str())) | |
} | |
} | |
} | |
t => self.unexpected(t, [Semicolon, LBrace]) | |
} | |
ast::Data(ast::DataField { | |
name: ident, | |
ty: ty, | |
info: info | |
}) | |
} | |
Pad => { | |
self.expect(Equal); | |
let f = ast::Padding(self.parse_expr()); | |
self.expect(Semicolon); | |
f | |
} | |
Switch => { | |
let ident = self.expect_ident(); | |
self.expect(Equal); | |
let expr = self.parse_expr(); | |
self.expect(LBrace); | |
let mut arms = ~[]; | |
loop { | |
match self.read_token() { | |
RBrace => break, | |
Identifier(s) => { | |
self.expect(Colon); | |
let ty = self.parse_type(); | |
self.expect(Semicolon); | |
arms.push((s, ty)); | |
} | |
t => self.fail(format!("Unexpected token `{}`, expected '\\}' or identifier", t.to_str())) | |
} | |
} | |
ast::Switch(ident, expr, arms) | |
} | |
t => self.unexpected(t, [Field,Switch,Pad]) | |
} | |
} | |
fn parse_expr(&mut self) -> ast::Expr { | |
let expr = self.parse_primary_expr(); | |
self.parse_op_expr(expr, 0) | |
} | |
fn parse_primary_expr(&mut self) -> ast::Expr { | |
match self.read_token() { | |
Identifier(ident) => ast::IdentExpr(ident), | |
Number(num) => ast::NumExpr(num), | |
Pad => { | |
self.expect(LParen); | |
let expr = self.parse_expr(); | |
self.expect(RParen); | |
expr | |
} | |
LParen => { | |
let expr = ~self.parse_expr(); | |
self.expect(RParen); | |
ast::ParenExpr(expr) | |
} | |
Minus => { | |
ast::NegExpr(~self.parse_primary_expr()) | |
} | |
True => ast::BoolExpr(true), | |
False => ast::BoolExpr(false), | |
t => self.fail(format!("Unexpected start of expression `{}`", t.to_str())) | |
} | |
} | |
fn parse_op_expr(&mut self, mut lhs: ast::Expr, prec: uint) -> ast::Expr { | |
let mut rhs; | |
loop { | |
match self.peek_op() { | |
Some(op) if op.prec() >= prec => { | |
self.bump(); | |
rhs = self.parse_primary_expr(); | |
loop { | |
match self.peek_op() { | |
Some(next_op) if next_op.prec() > op.prec() => { | |
rhs = self.parse_op_expr(rhs, next_op.prec()); | |
} | |
_ => break | |
} | |
} | |
lhs = ast::BinopExpr(op, ~lhs, ~rhs); | |
} | |
_ => break | |
} | |
} | |
return lhs; | |
} | |
fn peek_op(&mut self) -> Option<ast::Op> { | |
Some(match self.peek_token(0) { | |
Plus => ast::Add, | |
Minus => ast::Sub, | |
Divide => ast::Div, | |
Star => ast::Mul, | |
_ => return None | |
}) | |
} | |
fn mk_item(&self, ident: ast::Ident, info: ast::ItemInfo, span: ast::Span) -> ast::Item { | |
ast::Item { | |
name: ident, | |
span: span, | |
info: info, | |
} | |
} | |
fn unexpected(&self, token: Token, expected: &[Token]) -> ! { | |
self.fail(format!("Unexpected token `{}` expected `{}`", token.to_str(), expected.to_str())) | |
} | |
fn fail(&self, msg: ~str) -> ! { | |
fail!("Failure {:s} on line {:u}", msg, self.tokenizer.cur_line) | |
} | |
fn read_token(&mut self) -> Token { | |
if self.lookahead.len() > 0 { | |
self.lookahead.shift() | |
} else { | |
self.tokenizer.read_token() | |
} | |
} | |
fn peek_token(&mut self, n: uint) -> Token { | |
if self.lookahead.len() <= n { | |
let mut to_read = (n - self.lookahead.len()) + 1; | |
while to_read > 0 { | |
self.lookahead.push(self.tokenizer.read_token()); | |
to_read -= 1; | |
} | |
} | |
self.lookahead[n].clone() | |
} | |
fn bump(&mut self) { self.read_token(); } | |
fn expect(&mut self, token: Token) { | |
let t = self.read_token(); | |
if t != token { | |
self.unexpected(t, [token]) | |
} | |
} | |
fn expect_ident(&mut self) -> ast::Ident { | |
match self.read_token() { | |
Identifier(s) => s, | |
t => self.fail(format!("Unexpected token `{}` expected identifier", t.to_str())) | |
} | |
} | |
fn expect_number(&mut self) -> uint { | |
match self.read_token() { | |
Number(n) => n, | |
t => self.fail(format!("Unexpected token `{}` expected number", t.to_str())) | |
} | |
} | |
fn eat_token(&mut self, token: Token) -> bool { | |
let t = self.read_token(); | |
if t == token { | |
true | |
} else { | |
self.lookahead.unshift(t); | |
false | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use std; | |
use std::io::{Stream,Buffer}; | |
use ast; | |
use ast::Ident; | |
use intern::IdentInterner; | |
#[deriving(Eq, ToStr, Clone)] | |
pub enum Token { | |
Eof, | |
Xid, | |
XidUnion, | |
Struct, | |
Request, | |
Opcode, | |
Field, | |
Pad, | |
Switch, | |
Typedef, | |
Enum, | |
SetOf, | |
List, | |
Reply, | |
True, | |
False, | |
Colon, | |
Semicolon, | |
Comma, | |
Pipe, | |
LBrace, | |
RBrace, | |
Equal, | |
Minus, | |
Plus, | |
Divide, | |
Star, | |
LParen, | |
RParen, | |
LAngle, | |
RAngle, | |
FatArrow, | |
Identifier(Ident), | |
Number(uint) | |
} | |
pub struct Tokenizer<'stream, 'intern, S> { | |
stream: &'stream mut S, | |
lookahead: char, | |
interner: &'intern mut IdentInterner, | |
cur_col: uint, | |
cur_line: uint | |
} | |
impl<'stream, 'intern, S:Stream+Buffer> Tokenizer<'stream, 'intern, S> { | |
pub fn new<'r, 's>(stream: &'r mut S, interner: &'s mut IdentInterner) -> Tokenizer<'r, 's, S> { | |
Tokenizer { | |
stream: stream, | |
lookahead: '\0', | |
interner: interner, | |
cur_col: 1, | |
cur_line: 1 | |
} | |
} | |
pub fn read_token(&mut self) -> Token { | |
self.skip_whitespace(); | |
let c = self.read_char(); | |
match c { | |
'\0' => Eof, | |
',' => Comma, | |
';' => Semicolon, | |
':' => Colon, | |
'{' => LBrace, | |
'}' => RBrace, | |
'|' => Pipe, | |
'=' => { | |
if self.eat_char('>') { | |
FatArrow | |
} else { | |
Equal | |
} | |
}, | |
'-' => Minus, | |
'+' => Plus, | |
'/' => { | |
if self.eat_char('/') { | |
loop { | |
match self.read_char() { | |
'\n' | '\r' => return self.read_token(), | |
_ => {} | |
} | |
} | |
} else { | |
Divide | |
} | |
} | |
'*' => Star, | |
'(' => LParen, | |
')' => RParen, | |
'<' => LAngle, | |
'>' => RAngle, | |
'0'..'9' => { | |
let mut num = (c as u32) - ('0' as u32); | |
loop { | |
let c = self.read_char(); | |
match c { | |
'0'..'9' => { | |
num = num << 1; | |
num += (c as u32) - ('0' as u32); | |
} | |
_ => { | |
self.lookahead = c; | |
break; | |
} | |
} | |
} | |
Number(num as uint) | |
} | |
_ => { | |
let mut chars = ~[c]; | |
loop { | |
let c = self.read_char(); | |
match c { | |
'a'..'z' | 'A'..'Z' | |
| '-' | '0'..'9' => chars.push(c), | |
_ => { | |
self.lookahead = c; | |
break; | |
} | |
} | |
} | |
let s = std::str::from_chars(chars); | |
match s.as_slice() { | |
"xid" => Xid, | |
"xidunion" => XidUnion, | |
"struct" => Struct, | |
"request" => Request, | |
"opcode" => Opcode, | |
"field" => Field, | |
"pad" => Pad, | |
"switch" => Switch, | |
"typedef" => Typedef, | |
"enum" => Enum, | |
"setof" => SetOf, | |
"list" => List, | |
"reply" => Reply, | |
"true" => True, | |
"false" => False, | |
_ => Identifier(self.interner.intern(s)) | |
} | |
} | |
} | |
} | |
pub fn get_pos(&self) -> ast::Pos { | |
ast::Pos { | |
row: self.cur_line as u16, | |
col: self.cur_col as u16 | |
} | |
} | |
fn read_char(&mut self) -> char { | |
if self.lookahead != '\0' { | |
let c = self.lookahead; | |
self.lookahead = '\0'; | |
return c; | |
} else { | |
match self.stream.read_char() { | |
Some(c) => { | |
if c == '\n' { | |
self.cur_col = 0; | |
self.cur_line += 1; | |
} | |
self.cur_col += 1; | |
c | |
}, | |
None => '\0' | |
} | |
} | |
} | |
fn eat_char(&mut self, c: char) -> bool { | |
let r = self.read_char(); | |
if r == c { | |
true | |
} else { | |
self.lookahead = r; | |
false | |
} | |
} | |
fn skip_whitespace(&mut self) { | |
loop { | |
match self.read_char() { | |
' ' | '\n' | '\t' | '\r' => {} | |
'\0' => break, | |
c => { | |
self.lookahead = c; | |
break; | |
} | |
} | |
} | |
} | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment