Skip to content

Instantly share code, notes, and snippets.

@doccaico
Last active April 11, 2024 02:31
Show Gist options
  • Save doccaico/3fe0cd3e81074520ea11c9872be01af4 to your computer and use it in GitHub Desktop.
Save doccaico/3fe0cd3e81074520ea11c9872be01af4 to your computer and use it in GitHub Desktop.
const std = @import("std");
const fmt = std.fmt;
const mem = std.mem;
const print = std.debug.print;
const assert = std.debug.assert;
const expect = std.testing.expect;
// https://medium.com/@tharunappu2004/writing-a-lexer-in-c-a-step-by-step-guide-a1d5c55ac04d
pub var allocator: std.mem.Allocator = undefined;
const TokenType = enum {
eof,
number,
eq,
lparen,
rparen,
binaryop,
identifier,
let,
};
const Token = struct {
typ: TokenType,
value: []const u8,
};
pub const keywords = std.ComptimeStringMap(TokenType, .{
.{ "let", .let },
});
fn tokenize(input: []const u8) !std.ArrayList(Token) {
var tokens = std.ArrayList(Token).init(allocator);
var src = std.ArrayList([]const u8).init(allocator);
src = try splitString(input);
while (src.items.len != 0) {
if (std.mem.eql(u8, src.items[0], "(")) {
try tokens.append(.{ .typ = .lparen, .value = src.orderedRemove(0) });
} else if (std.mem.eql(u8, src.items[0], ")")) {
try tokens.append(.{ .typ = .rparen, .value = src.orderedRemove(0) });
} else if (std.mem.eql(u8, src.items[0], "=")) {
try tokens.append(.{ .typ = .eq, .value = src.orderedRemove(0) });
} else if (std.mem.eql(u8, src.items[0], "+") or
std.mem.eql(u8, src.items[0], "-") or
std.mem.eql(u8, src.items[0], "*") or
std.mem.eql(u8, src.items[0], "/"))
{
try tokens.append(.{ .typ = .binaryop, .value = src.orderedRemove(0) });
} else {
if (isNumber(src.items[0])) {
try tokens.append(.{ .typ = .number, .value = src.orderedRemove(0) });
} else if (isAlpha(src.items[0])) {
const key = src.orderedRemove(0);
const typ = keywords.get(key) orelse .identifier;
try tokens.append(.{ .typ = typ, .value = key });
} else {
return error.InvalidChar;
}
}
}
return tokens;
}
fn isNumber(str: []const u8) bool {
for (str) |ch| {
if (!std.ascii.isDigit(ch))
return false;
}
return true;
}
fn isAlpha(str: []const u8) bool {
for (str) |ch| {
if (!std.ascii.isAlphabetic(ch))
return false;
}
return true;
}
fn splitString(input: []const u8) !std.ArrayList([]const u8) {
var words = std.ArrayList([]const u8).init(allocator);
var word = std.ArrayList(u8).init(allocator);
for (input) |ch| {
if (!std.ascii.isWhitespace(ch)) {
try word.append(ch);
} else if (word.items.len != 0) {
try words.append(try allocator.dupe(u8, word.items));
word.clearRetainingCapacity();
}
}
if (word.items.len != 0) {
try words.append(word.items);
}
return words;
}
test "testLexer" {
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
defer arena.deinit();
allocator = arena.allocator();
const input =
\\ let x = 45 + ( foo * bar )
;
const tests = [_]Token{
.{ .typ = .let, .value = "let" },
.{ .typ = .identifier, .value = "x" },
.{ .typ = .eq, .value = "=" },
.{ .typ = .number, .value = "45" },
.{ .typ = .binaryop, .value = "+" },
.{ .typ = .lparen, .value = "(" },
.{ .typ = .identifier, .value = "foo" },
.{ .typ = .binaryop, .value = "*" },
.{ .typ = .identifier, .value = "bar" },
.{ .typ = .rparen, .value = ")" },
};
const got = try tokenize(input);
for (tests, 0..got.items.len) |t, i| {
try std.testing.expectEqualDeep(t, got.items[i]);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment