Skip to content

Instantly share code, notes, and snippets.

@gingerBill
Last active November 20, 2017 22:49
Show Gist options
  • Save gingerBill/e07bf20fda29adb8d42331bdb7aab771 to your computer and use it in GitHub Desktop.
Save gingerBill/e07bf20fda29adb8d42331bdb7aab771 to your computer and use it in GitHub Desktop.
CEL 2017-11-20
import "core:fmt.odin"
import "core:strconv.odin"
import "core:os.odin"
import "token.odin"
Token :: #alias token.Token;
Tokenizer :: #alias token.Tokenizer;
Array :: []Value;
Dict :: struct {
parent: ^Dict,
fields: map[string]Value,
}
Nil_Value :: struct{};
Value :: union {
Nil_Value,
bool, i64, f64, string,
Array, ^Dict,
}
Parser :: struct {
tokens: [dynamic]Token,
prev_token: Token,
curr_token: Token,
curr_token_index: int,
allocated_strings: [dynamic]string,
error_count: int,
root: ^Dict,
curr_node: ^Dict,
}
parser_init :: proc(p: ^Parser, t: ^Tokenizer) -> bool {
token.init(t, cast([]u8)sample);
for {
tok := token.scan(t);
if tok.kind == token.Kind.Illegal {
return false;
}
append(&p.tokens, tok);
if tok.kind == token.Kind.EOF {
break;
}
}
if t.error_count > 0 {
return false;
}
if len(p.tokens) == 0 {
tok := Token{kind = token.Kind.EOF};
tok.line, tok.column = 1, 1;
append(&p.tokens, tok);
return true;
}
p.curr_token_index = 0;
p.prev_token = p.tokens[p.curr_token_index];
p.curr_token = p.tokens[p.curr_token_index];
p.root = new(Dict);
p.curr_node = p.root;
return true;
}
error :: proc(p: ^Parser, msg: string, args: ...any) {
fmt.print_err("Error: ");
fmt.printf_err(msg, ...args);
fmt.println_err();
p.error_count += 1;
}
next_token :: proc(p: ^Parser) -> Token {
p.prev_token = p.curr_token;
prev := p.prev_token;
if p.curr_token_index+1 < len(p.tokens) {
p.curr_token_index += 1;
p.curr_token = p.tokens[p.curr_token_index];
return prev;
}
p.curr_token_index = len(p.tokens);
p.curr_token = p.tokens[p.curr_token_index-1];
error(p, "Token is EOF");
return prev;
}
allow_token :: proc(p: ^Parser, kind: token.Kind) -> bool {
if p.curr_token.kind == kind {
next_token(p);
return true;
}
return false;
}
expect_token :: proc(p: ^Parser, kind: token.Kind) -> Token {
prev := p.curr_token;
if prev.kind != kind {
error(p, "Expected %s, got %s", kind, prev.lit);
if prev.kind == token.Kind.EOF {
os.exit(1);
}
}
next_token(p);
return prev;
}
expect_operator :: proc(p: ^Parser) -> Token {
prev := p.curr_token;
switch prev.kind {
case token.Kind._operator_start+1 .. token.Kind._operator_end:
// ok
case:
error(p, "Expected an operator, got %s", prev.lit);
}
next_token(p);
return prev;
}
fix_advance :: proc(p: ^Parser) {
for {
using token.Kind;
switch t := p.curr_token; t.kind {
case EOF, Semicolon:
return;
}
next_token(p);
}
}
lookup_value :: proc(p: ^Parser, name: string) -> Value {
for node := p.curr_node; node != nil; node = node.parent {
if val, ok := node.fields[name]; ok {
return val;
}
}
error(p, "Undeclared identifier %s", name);
return nil;
}
parse_operand :: proc(p: ^Parser) -> Value {
using token.Kind;
tok := p.curr_token;
switch p.curr_token.kind {
case Ident:
next_token(p);
return lookup_value(p, tok.lit);
case True:
next_token(p);
return true;
case False:
next_token(p);
return false;
case Nil:
next_token(p);
return Nil_Value{};
case Integer:
next_token(p);
return i64(strconv.parse_i128(tok.lit));
case Float:
next_token(p);
return strconv.parse_f64(tok.lit);
case String:
next_token(p);
return string(tok.lit);
case Open_Paren:
expect_token(p, Open_Paren);
expr := parse_expr(p);
expect_token(p, Close_Paren);
return expr;
case Open_Bracket:
expect_token(p, Open_Bracket);
elems := make([dynamic]Value, 0, 4);
for p.curr_token.kind != Close_Bracket &&
p.curr_token.kind != EOF {
elem := parse_expr(p);
append(&elems, elem);
if !allow_token(p, Comma) {
break;
}
}
expect_token(p, Close_Bracket);
return Array(elems[..]);
case Open_Brace:
expect_token(p, Open_Brace);
dict := new_clone(Dict{parent = p.curr_node});
p.curr_node = dict;
defer p.curr_node = dict.parent;
for p.curr_token.kind != Close_Brace &&
p.curr_token.kind != EOF {
name_tok := expect_token(p, Ident);
name := name_tok.lit;
expect_token(p, Assign);
elem := parse_expr(p);
if _, ok := dict.fields[name]; ok {
error(p, "Previous declaration of %s in this scope", name);
} else {
dict.fields[name] = elem;
}
if !allow_token(p, Comma) {
break;
}
}
expect_token(p, Close_Brace);
return dict;
}
return nil;
}
parse_atom_expr :: proc(p: ^Parser, operand: Value) -> Value {
using token.Kind;
loop := true;
for loop {
switch p.curr_token.kind {
case Period:
next_token(p);
tok := next_token(p);
switch tok.kind {
case Ident:
d, ok := operand.(^Dict);
if !ok || d == nil {
error(p, "Expected a dictionary");
operand = nil;
continue;
}
name := tok.lit;
val, found := d.fields[name];
if !found {
error(p, "Field %s not found in dictionary", name);
operand = nil;
continue;
}
operand = val;
case:
error(p, "Expected a selector, got %s", p.curr_token.lit);
operand = nil;
}
case Open_Bracket:
open := expect_token(p, Open_Bracket);
index := parse_expr(p);
close := expect_token(p, Close_Bracket);
if a, ok := operand.(Array); ok {
i, ok := index.(i64);
if !ok {
error(p, "Index must be an integer for an array");
operand = nil;
continue;
}
if 0 <= i && i < i64(len(a)) {
operand = a[i];
} else {
error(p, "Index %d out of bounds range 0..%d", i, len(a));
operand = nil;
continue;
}
} else if d, ok := operand.(^Dict); ok {
name, ok := index.(string);
if !ok {
error(p, "Index must be a string for a dictionary");
operand = nil;
continue;
}
val, found := d.fields[name];
if !found {
error(p, "Field %s not found in dictionary", name);
operand = nil;
continue;
}
operand = val;
}
case:
loop = false;
}
}
return operand;
}
parse_unary_expr :: proc(p: ^Parser) -> Value {
using token.Kind;
op := p.curr_token;
switch p.curr_token.kind {
case Add, Sub:
next_token(p);
// TODO(bill): Calcuate values as you go!
expr := parse_unary_expr(p);
switch e in expr {
case i64: if op.kind == Sub do return -e;
case f64: if op.kind == Sub do return -e;
case:
error(p, "Unary operator %s can only be used on integers or floats", op.lit);
return nil;
}
return expr;
case Not:
next_token(p);
expr := parse_unary_expr(p);
if v, ok := expr.(bool); ok {
return !v;
}
error(p, "Unary operator %s can only be used on booleans", op.lit);
return nil;
}
operand := parse_operand(p);
return parse_atom_expr(p, operand);
}
value_order :: proc(v: Value) -> int {
switch _ in v {
case bool, string:
return 1;
case i64:
return 2;
case f64:
return 3;
}
return 0;
}
match_values :: proc(left, right: ^Value) -> bool {
if value_order(right^) < value_order(left^) {
return match_values(right, left);
}
switch x in left^ {
case:
right^ = left^;
case bool, string:
return true;
case i64:
switch y in right^ {
case i64:
return true;
case f64:
left^ = f64(x);
return true;
}
case f64:
switch y in right {
case f64:
return true;
}
}
return false;
}
calculate_binary_value :: proc(p: ^Parser, op: token.Kind, x, y: Value) -> (Value, bool) {
// TODO(bill): Calculate value as you go!
match_values(&x, &y);
using token.Kind;
switch a in x {
case: return x, true;
case i64:
b, ok := y.(i64);
if !ok do return nil, false;
switch op {
case Add: return a + b, true;
case Sub: return a - b, true;
case Mul: return a * b, true;
case Quo: return a / b, true;
case Rem: return a % b, true;
}
case f64:
b, ok := y.(f64);
if !ok do return nil, false;
switch op {
case Add: return a + b, true;
case Sub: return a - b, true;
case Mul: return a * b, true;
case Quo: return a / b, true;
}
case string:
b, ok := y.(string);
if !ok do return nil, false;
switch op {
case Add:
n := len(a) + len(b);
data := make([]u8, n);
copy(data[..], cast([]u8)a);
copy(data[len(a)..], cast([]u8)b);
s := string(data);
append(&p.allocated_strings, s);
return s, true;
}
}
return nil, false;
}
parse_binary_expr :: proc(p: ^Parser, prec_in: int) -> Value {
expr := parse_unary_expr(p);
for prec := token.precedence(p.curr_token.kind); prec >= prec_in; prec -= 1 {
for {
op := p.curr_token;
op_prec := token.precedence(op.kind);
if op_prec != prec {
break;
}
expect_operator(p);
right := parse_binary_expr(p, prec+1);
if right == nil {
error(p, "Expected expression on the right-hand side of the binary operator %s", op.lit);
}
left := expr;
ok: bool;
expr, ok = calculate_binary_value(p, op.kind, left, right);
if !ok {
error(p, "Invalid binary operation");
}
}
}
return expr;
}
parse_expr :: proc(p: ^Parser) -> Value {
return parse_binary_expr(p, 1);
}
expect_semicolon :: proc(p: ^Parser) {
using token.Kind;
kind := p.curr_token.kind;
switch kind {
case Comma:
error(p, "Expected ';', got ','");
next_token(p);
case Semicolon:
next_token(p);
case EOF:
// okay
case:
error(p, "Expected ';', got %s", p.curr_token.lit);
fix_advance(p);
}
}
parse_assignment :: proc(p: ^Parser) -> bool {
using token.Kind;
if p.curr_token.kind == Semicolon {
next_token(p);
return true;
}
if p.curr_token.kind == EOF {
return false;
}
tok := p.curr_token;
if allow_token(p, Ident) {
expect_token(p, Assign);
name := tok.lit;
expr := parse_expr(p);
if _, ok := p.curr_node.fields[name]; ok {
error(p, "Previous declaration of %s", name);
} else {
p.curr_node.fields[name] = expr;
}
expect_semicolon(p);
return true;
}
error(p, "Expected an assignment, got %s", tok.kind);
fix_advance(p);
return false;
}
sample := `
x = 123
y = 321
z = x * (y - 1) / 2
w = "foo" + "bar"
# Foo
# Bar
a = {id = {b = 123}} # Dict
b = a.id.b
c = a["id"]
f = [1, 4, 9]
g = f[2]
`;
main :: proc() {
t: Tokenizer;
token.init(&t, cast([]u8)sample);
p: Parser;
if ok := parser_init(&p, &t); !ok {
return;
}
for p.curr_token.kind != token.Kind.EOF &&
p.curr_token.kind != token.Kind.Illegal &&
p.curr_token_index < len(p.tokens) {
if !parse_assignment(&p) {
break;
}
}
fmt.println(p.root.fields);
}
import "core:fmt.odin"
import "core:utf8.odin"
Kind :: enum {
Illegal,
EOF,
Comment,
_literal_start,
Ident,
Integer,
Float,
Char,
String,
_literal_end,
_keyword_start,
True,
False,
Nil,
_keyword_end,
_operator_start,
Add,
Sub,
Mul,
Quo,
Rem,
Not,
Assign,
_operator_end,
_punc_start,
Open_Paren,
Close_Paren,
Open_Bracket,
Close_Bracket,
Open_Brace,
Close_Brace,
Semicolon,
Comma,
Period,
_punc_end,
}
Pos :: struct {
line: int,
column: int,
}
Token :: struct {
kind: Kind,
using pos: Pos,
lit: string,
}
Tokenizer :: struct {
src: []u8,
curr_rune: rune,
offset: int,
read_offset: int,
line_offset: int,
line_count: int,
insert_semi: bool,
error_count: int,
}
keywords := map[string]Kind{
"true" = Kind.True,
"false" = Kind.False,
"nil" = Kind.Nil,
};
precedence :: proc(op: Kind) -> int {
switch op {
case Kind.Add, Kind.Sub:
return 1;
case Kind.Mul, Kind.Quo, Kind.Rem:
return 2;
}
return 0;
}
token_lookup :: proc(ident: string) -> Kind {
if tok, is_keyword := keywords[ident]; is_keyword {
return tok;
}
return Kind.Ident;
}
is_literal :: proc(tok: Kind) -> bool do return Kind._literal_start < tok && tok < Kind._literal_end;
is_operator :: proc(tok: Kind) -> bool do return Kind._operator_start < tok && tok < Kind._operator_end;
is_keyword :: proc(tok: Kind) -> bool do return Kind._keyword_start < tok && tok < Kind._keyword_end;
init :: proc(t: ^Tokenizer, src: []u8) {
t.src = src;
t.curr_rune = ' ';
t.offset = 0;
t.read_offset = 0;
t.line_offset = 0;
t.line_count = 1;
advance_to_next_rune(t);
if t.curr_rune == utf8.RUNE_BOM {
advance_to_next_rune(t);
}
}
error :: proc(t: ^Tokenizer, msg: string, args: ...any) {
fmt.print_err("Error: ");
fmt.printf_err(msg, ...args);
fmt.println_err();
t.error_count += 1;
}
advance_to_next_rune :: proc(t: ^Tokenizer) {
if t.read_offset < len(t.src) {
t.offset = t.read_offset;
if t.curr_rune == '\n' {
t.line_offset = t.offset;
t.line_count += 1;
}
r, w := rune(t.src[t.read_offset]), 1;
switch {
case r == 0:
error(t, "Illegal character NUL");
case r >= utf8.RUNE_SELF:
r, w = utf8.decode_rune(t.src[t.read_offset..]);
if r == utf8.RUNE_ERROR && w == 1 {
error(t, "Illegal utf-8 encoding");
} else if r == utf8.RUNE_BOM && t.offset > 0 {
error(t, "Illegal byte order mark");
}
}
t.read_offset += w;
t.curr_rune = r;
} else {
t.offset = len(t.src);
if t.curr_rune == '\n' {
t.line_offset = t.offset;
t.line_count += 1;
}
t.curr_rune = utf8.RUNE_EOF;
}
}
get_pos :: proc(t: ^Tokenizer) -> Pos {
return Pos {
line = t.line_count,
column = t.offset - t.line_offset + 1,
};
}
is_letter :: proc(r: rune) -> bool {
switch r {
case 'a'...'z', 'A'...'Z', '_':
return true;
}
return false;
}
is_digit :: proc(r: rune) -> bool {
switch r {
case '0'...'9':
return true;
}
return false;
}
skip_whitespace :: proc(t: ^Tokenizer) {
loop: for {
switch t.curr_rune {
case '\n':
if t.insert_semi {
break loop;
}
fallthrough;
case ' ', '\t', '\r':
advance_to_next_rune(t);
}
break loop;
}
}
scan_identifier :: proc(t: ^Tokenizer) -> string {
offset := t.offset;
for is_letter(t.curr_rune) || is_digit(t.curr_rune) {
advance_to_next_rune(t);
}
return string(t.src[offset .. t.offset]);
}
digit_value :: proc(r: rune) -> int {
switch r {
case '0'...'9': return int(r - '0');
case 'a'...'f': return int(r - 'a' + 10);
case 'A'...'F': return int(r - 'A' + 10);
}
return 16;
}
scan_number :: proc(t: ^Tokenizer, seen_decimal_point: bool) -> (Kind, string) {
scan_manitissa :: proc(t: ^Tokenizer, base: int) {
for digit_value(t.curr_rune) < base {
advance_to_next_rune(t);
}
}
scan_exponent :: proc(t: ^Tokenizer, tok: Kind, offset: int) -> (Kind, string) {
if t.curr_rune == 'e' || t.curr_rune == 'E' {
tok = Kind.Float;
advance_to_next_rune(t);
if t.curr_rune == '-' || t.curr_rune == '+' {
advance_to_next_rune(t);
}
if digit_value(t.curr_rune) < 10 {
scan_manitissa(t, 10);
} else {
error(t, "Illegal floating point exponent");
}
}
return tok, string(t.src[offset .. t.offset]);
}
scan_fraction :: proc(t: ^Tokenizer, tok: Kind, offset: int) -> (Kind, string) {
if t.curr_rune == '.' {
tok = Kind.Float;
advance_to_next_rune(t);
scan_manitissa(t, 10);
}
return scan_exponent(t, tok, offset);
}
offset := t.offset;
tok := Kind.Integer;
if seen_decimal_point {
offset -= 1;
tok = Kind.Float;
scan_manitissa(t, 10);
return scan_exponent(t, tok, offset);
}
if t.curr_rune == '0' {
offset := t.offset;
advance_to_next_rune(t);
switch t.curr_rune {
case 'b', 'B':
advance_to_next_rune(t);
scan_manitissa(t, 2);
if t.offset - offset <= 2 {
error(t, "Illegal binary number");
}
case 'o', 'O':
advance_to_next_rune(t);
scan_manitissa(t, 8);
if t.offset - offset <= 2 {
error(t, "Illegal octal number");
}
case 'x', 'X':
advance_to_next_rune(t);
scan_manitissa(t, 16);
if t.offset - offset <= 2 {
error(t, "Illegal hexadecimal number");
}
case:
scan_manitissa(t, 10);
switch t.curr_rune {
case '.', 'e', 'E':
return scan_fraction(t, tok, offset);
}
}
return tok, string(t.src[offset .. t.offset]);
}
scan_manitissa(t, 10);
return scan_fraction(t, tok, offset);
}
scan :: proc(t: ^Tokenizer) -> Token {
skip_whitespace(t);
offset := t.offset;
tok: Kind;
pos := get_pos(t);
lit: string;
insert_semi := false;
switch r := t.curr_rune; {
case is_letter(r):
lit = scan_identifier(t);
tok = Kind.Ident;
if len(lit) > 1 {
tok = token_lookup(lit);
}
if tok == Kind.Ident {
insert_semi = true;
}
case '0' <= r && r <= '9':
insert_semi = true;
tok, lit = scan_number(t, false);
case:
advance_to_next_rune(t);
switch r {
case -1:
if t.insert_semi {
t.insert_semi = false;
return Token{Kind.Semicolon, pos, "\n"};
}
return Token{Kind.EOF, pos, "\n"};
case '\n':
t.insert_semi = false;
return Token{Kind.Semicolon, pos, "\n"};
case '"':
quote := r;
tok = Kind.String;
for {
r := t.curr_rune;
if r == '\n' || r < 0 {
error(t, "String literal not terminated");
break;
}
advance_to_next_rune(t);
if r == quote {
break;
}
// TODO(bill); Handle properly
if r == '\\' && t.curr_rune == quote {
advance_to_next_rune(t);
}
}
lit = string(t.src[offset+1..t.offset-1]);
case '#':
for t.curr_rune != '\n' && t.curr_rune >= 0 {
advance_to_next_rune(t);
}
advance_to_next_rune(t);
tok = Kind.Semicolon;
lit = ";";
case ',': tok = Kind.Comma;
case ';': tok = Kind.Semicolon;
case '(': tok = Kind.Open_Paren;
case ')': tok = Kind.Close_Paren; insert_semi = true;
case '[': tok = Kind.Open_Bracket;
case ']': tok = Kind.Close_Bracket; insert_semi = true;
case '{': tok = Kind.Open_Brace;
case '}': tok = Kind.Close_Brace; insert_semi = true;
case '+': tok = Kind.Add;
case '-': tok = Kind.Sub;
case '*': tok = Kind.Mul;
case '/': tok = Kind.Quo;
case '%': tok = Kind.Rem;
case '!': tok = Kind.Not;
case '=': tok = Kind.Assign;
case '.':
if '0' <= t.curr_rune && t.curr_rune <= '9' {
insert_semi = true;
tok, lit = scan_number(t, true);
} else {
tok = Kind.Period;
}
case:
if r != utf8.RUNE_BOM {
error(t, "Illegal character %r", r);
}
insert_semi = t.insert_semi;
tok = Kind.Illegal;
}
}
t.insert_semi = insert_semi;
if lit == "" {
lit = string(t.src[offset .. t.offset]);
}
return Token{tok, pos, lit};
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment