Skip to content

Instantly share code, notes, and snippets.

@darilrt
Created December 26, 2024 20:57
Show Gist options
  • Save darilrt/7d70586516169df617bf150b53877a71 to your computer and use it in GitHub Desktop.
Save darilrt/7d70586516169df617bf150b53877a71 to your computer and use it in GitHub Desktop.
#include "lexer/lexer.hpp"
const std::string keywords[] = {"if", "else", "while", "for", "ret", "break", "continue", "true", "false", "null", "def", "interface", "extern"};
char Lexer::peek() const
{
return source[index];
}
void Lexer::advance()
{
if (index < source.size())
{
if (source[index] == '\n')
{
location.line++;
location.column = 1;
}
else
{
location.column++;
}
index++;
}
}
void Lexer::skip_whitespace()
{
while (index < source.size() && std::isspace(peek()))
{
advance();
}
}
Token Lexer::next_token()
{
skip_whitespace();
if (index >= source.size())
{
return Token{Token::Type::EndOfFile, "", location};
}
char p = peek();
if (std::isalpha(p) || p == '_')
{
return lex_identifier();
}
else if (std::isdigit(p))
{
return lex_number();
}
else if (p == '"')
{
return lex_string();
}
else if (p == '\'')
{
return lex_char();
}
advance();
if (p == '(' || p == ')' || p == '{' || p == '}' || p == '[' || p == ']' || p == ',' || p == ';' || p == ':')
{
return Token{Token::Type::Punctuation, std::string(1, p), location};
}
switch (p)
{
case '(':
case ')':
case '{':
case '}':
case '[':
case ']':
case ',':
case '.':
case ';':
case ':':
return Token{Token::Type::Punctuation, std::string(1, p), location};
case '+':
case '-':
if (peek() == '>')
{
advance();
return Token{Token::Type::Operator, "->", location};
}
case '*':
case '/':
case '%':
case '=':
case '!':
case '<':
case '>':
case '&':
case '|':
if (peek() == '=')
{
advance();
return Token{Token::Type::Operator, std::string(1, p) + '=', location};
}
return Token{Token::Type::Operator, std::string(1, p), location};
}
advance();
return Token{Token::Type::Unknown, std::string(1, p), location};
}
Token Lexer::lex_identifier()
{
std::string lexeme;
auto p = peek();
while (std::isalnum(p) || p == '_')
{
lexeme += p;
advance();
p = peek();
}
for (const auto &keyword : keywords)
{
if (lexeme == keyword)
{
return Token{Token::Type::Keyword, lexeme, location};
}
}
return Token{Token::Type::Identifier, lexeme, location};
}
Token Lexer::lex_number()
{
std::string lexeme;
auto p = peek();
while (std::isdigit(p))
{
lexeme += p;
advance();
p = peek();
}
if (p != '.')
{
return Token{Token::Type::Integer, lexeme, location};
}
lexeme += p;
advance();
p = peek();
while (std::isdigit(p))
{
lexeme += p;
advance();
p = peek();
}
return Token{Token::Type::Float, lexeme, location};
}
Token Lexer::lex_string()
{
std::string lexeme;
advance();
auto p = peek();
while (p != '"')
{
lexeme += p;
advance();
p = peek();
}
advance();
return Token{Token::Type::String, lexeme, location};
}
Token Lexer::lex_char()
{
std::string lexeme;
advance();
auto p = peek();
lexeme += p;
advance();
if (peek() != '\'')
{
return Token{Token::Type::Unknown, lexeme, location};
}
advance();
return Token{Token::Type::Char, lexeme, location};
}
std::vector<Token> lex(const std::string &source, const std::string &file_name)
{
Lexer lexer(source, file_name);
std::vector<Token> tokens;
while (true)
{
auto token = lexer.next_token();
if (token.type == Token::Type::EndOfFile)
{
break;
}
tokens.push_back(token);
}
return tokens;
}
#pragma once
#include <string>
#include <vector>
#include "lexer/token.hpp"
class Lexer
{
public:
std::string file_name;
std::string source;
std::vector<Token> tokens;
Location location;
int index;
Lexer(const std::string &source, const std::string &file_name = "<stdio>")
: file_name(file_name), source(source), index(0), location({1, 1, file_name}) {}
char peek() const;
void advance();
void skip_whitespace();
Token next_token();
Token lex_identifier();
Token lex_number();
Token lex_string();
Token lex_char();
};
std::vector<Token> lex(const std::string &source, const std::string &file_name = "<stdio>");
#pragma once
#include <string>
#include <iostream>
class Location
{
public:
int line;
int column;
std::string file;
Location copy() const
{
return {line, column, file};
}
friend std::ostream &operator<<(std::ostream &os, const Location &location)
{
os << "Location {" << "line=" << location.line << ", "
<< "column=" << location.column << ", "
<< "file='" << location.file << "'"
<< "}";
return os;
}
};
class Token
{
public:
enum class Type
{
Identifier, // Variable name
Integer, // Integer number
Float, // Floating point number
String, // String "Hello, World!"
Char, // Character 'c'
Operator, // Operator like +, -, *, /, etc.
Punctuation, // Punctuation like ;, (, ), etc.
Keyword, // Keyword like if, else, while, etc.
EndOfFile, // End of file
Unknown // Unknown token
} type;
std::string lexeme;
Location location;
friend std::ostream &operator<<(std::ostream &os, const Token &token)
{
std::string type;
switch (token.type)
{
case Type::Identifier:
type = "Identifier";
break;
case Type::Integer:
type = "Integer";
break;
case Type::Float:
type = "Float";
break;
case Type::String:
type = "String";
break;
case Type::Char:
type = "Char";
break;
case Type::Operator:
type = "Operator";
break;
case Type::Punctuation:
type = "Punctuation";
break;
case Type::Keyword:
type = "Keyword";
break;
case Type::EndOfFile:
type = "EndOfFile";
break;
case Type::Unknown:
type = "Unknown";
break;
}
os << "Token {"
<< "type=" << type << ", "
<< "lexeme='" << token.lexeme << "', "
<< "location=" << token.location << "}";
return os;
}
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment