Last active
June 23, 2018 06:05
-
-
Save justinmeiners/71c701ae440a69d659c61e8be757635c to your computer and use it in GitHub Desktop.
Incomplete Lisp interpreter. See the one on my github.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <string> | |
#include <vector> | |
#include <map> | |
#include <regex> | |
#include <ctype.h> | |
namespace Lisp | |
{ | |
struct Cell | |
{ | |
enum Type | |
{ | |
NONE, LIST, ID, NUMBER, STRING, BOOL | |
}; | |
Type type; | |
std::string string_value; | |
union | |
{ | |
int int_value; | |
float float_value; | |
bool bool_value; | |
}; | |
std::vector<Cell> list; | |
Cell() : type(Cell::Type::NONE) {} | |
Cell(Type type) : type(type) {} | |
friend std::ostream& operator<<(std::ostream& stream, Cell cell) | |
{ | |
switch (cell.type) | |
{ | |
case Type::NONE: | |
stream << "NONE"; | |
break; | |
case Type::NUMBER: | |
stream << cell.float_value; | |
break; | |
case Type::BOOL: | |
stream << cell.bool_value; | |
break; | |
case Type::ID: | |
case Type::STRING: | |
stream << cell.string_value; | |
break; | |
case Type::LIST: | |
stream << "["; | |
for (auto x : cell.list) | |
stream << x << " " ; | |
stream << "]"; | |
break; | |
} | |
return stream; | |
} | |
}; | |
class Parser | |
{ | |
private: | |
using Iter = std::string::const_iterator; | |
struct Token | |
{ | |
enum Type | |
{ | |
NONE = 0, L_PAREN, R_PAREN, // language syntax | |
ID, // names | |
STRING, INT, HEX, FLOAT // literals | |
}; | |
Type type; | |
Iter start, end; | |
Token(Type type, Iter start, Iter end) | |
: type(type), start(start), end(end) {} | |
}; | |
std::vector< std::pair<Token::Type, std::regex> > types; | |
std::pair<bool, Token> NextToken(Iter begin, Iter end) | |
{ | |
// skip whitespace | |
while (begin != end && isspace(*begin)) | |
++begin; | |
// find a matching token type | |
for (auto t : types) | |
{ | |
std::smatch m; | |
if (std::regex_search(begin, end, m, t.second)) | |
return std::make_pair(true, Token(t.first, begin, m.suffix().first)); | |
} | |
return std::make_pair(false, Token(Token::Type::NONE, begin, end)); | |
} | |
std::vector<Token> Tokenize(const std::string& source) | |
{ | |
std::vector<Token> tokens; | |
Iter c = source.begin(); | |
while (c != source.end()) | |
{ | |
auto result = NextToken(c, source.end()); | |
if (!result.first) | |
break; | |
c = result.second.end; | |
tokens.push_back(result.second); | |
//std::cout << Token::type_name_table[result.second.type] << std::endl; | |
} | |
return tokens; | |
} | |
template <typename I> | |
Cell BuildAtom(I& begin, I& end) | |
{ | |
Token tok = *begin; | |
++begin; | |
switch (tok.type) | |
{ | |
case Token::Type::INT: | |
{ | |
std::string value(tok.start, tok.end); | |
Cell cell(Cell::Type::NUMBER); | |
cell.int_value = std::stoi(value); | |
return cell; | |
} | |
case Token::Type::HEX: | |
{ | |
std::string value(tok.start, tok.end); | |
Cell cell(Cell::Type::NUMBER); | |
cell.int_value = (int)strtol(value.c_str(), NULL, 16); | |
return cell; | |
} | |
case Token::Type::FLOAT: | |
{ | |
std::string value(tok.start, tok.end); | |
Cell cell(Cell::Type::NUMBER); | |
cell.float_value = std::stof(value); | |
return cell; | |
} | |
case Token::Type::STRING: | |
{ | |
Cell cell(Cell::Type::STRING); | |
cell.string_value = std::string(tok.start + 1, tok.end - 1); | |
return cell; | |
} | |
case Token::Type::ID: | |
{ | |
Cell cell(Cell::Type::ID); | |
cell.string_value = std::string(tok.start, tok.end); | |
return cell; | |
} | |
default: | |
return Cell(Cell::Type::NONE); | |
} | |
} | |
template <typename I> | |
Cell BuildAst(I& begin, I& end) | |
{ | |
if (begin == end) | |
{ | |
std::cout << "Syntax error" << std::endl; | |
return Cell(Cell::Type::NONE); | |
} | |
if (begin->type == Token::Type::L_PAREN) | |
{ | |
++begin; | |
Cell new_list(Cell::Type::LIST); | |
while (begin->type != Token::Type::R_PAREN) | |
new_list.list.push_back(BuildAst(begin, end)); | |
++begin; | |
return new_list; | |
} | |
else | |
{ | |
return BuildAtom(begin, end); | |
} | |
} | |
public: | |
Parser() | |
{ | |
#define Reg(T, L) types.push_back(std::make_pair(T, std::regex(L))) | |
Reg(Token::Type::L_PAREN, R"(^\()"); | |
Reg(Token::Type::R_PAREN, R"(^\))"); | |
Reg(Token::Type::ID, R"(^([a-zA-Z\-\+\*\/\<\>])([a-zA-Z0-9_])*)"); | |
Reg(Token::Type::FLOAT, R"(^([0-9])+(\.)([0-9])*)"); | |
Reg(Token::Type::HEX, R"(^0(x|X)[0-9]+)"); | |
Reg(Token::Type::INT, R"(^[0-9]+)"); | |
Reg(Token::Type::STRING, R"(^\"([^"])*")"); | |
#undef Reg | |
} | |
Cell Parse(const std::string& source) | |
{ | |
auto tokens = Tokenize(source); | |
auto start = tokens.begin(); | |
auto end = tokens.end(); | |
return BuildAst(start, end); | |
} | |
}; | |
struct Env | |
{ | |
std::map<std::string, Cell> variables; | |
std::map<std::string, std::function<Cell(const Cell&)>> procedures; | |
static Env Global() | |
{ | |
Env env; | |
env.procedures["*"] =[](const Cell& l) { | |
float product = l.list[0].float_value; | |
for (auto it = l.list.begin() + 1; it != l.list.end(); ++it) | |
product *= it->float_value; | |
Cell result(Cell::Type::NUMBER); | |
result.float_value = product; | |
return result; | |
}; | |
env.procedures["+"] =[](const Cell& l) { | |
float sum = l.list[0].float_value; | |
for (auto it = l.list.begin() + 1; it != l.list.end(); ++it) | |
sum += it->float_value; | |
Cell result(Cell::Type::NUMBER); | |
result.float_value = sum; | |
return result; | |
}; | |
env.procedures[">"] =[](const Cell& l) { | |
Cell result(Cell::Type::BOOL); | |
result.bool_value = l.list[0].float_value > l.list[1].float_value; | |
return result; | |
}; | |
env.procedures["display"] = [](const Cell& l) { | |
std::cout << l.list[0]; | |
return l; | |
}; | |
env.procedures["newline"] = [](const Cell& l) { | |
std::cout << std::endl; | |
return l; | |
}; | |
return env; | |
} | |
}; | |
class Interpreter | |
{ | |
private: | |
public: | |
Interpreter() { | |
env = Env::Global(); | |
} | |
Env env; | |
Cell Eval(const Cell& cell) | |
{ | |
if (cell.type == Cell::Type::ID) | |
{ | |
return env.variables[cell.string_value]; | |
} | |
else if (cell.type == Cell::Type::LIST) | |
{ | |
if (cell.list[0].string_value == "if") | |
{ | |
// if conditionals | |
if (Eval(cell.list[1]).bool_value) | |
{ | |
return Eval(cell.list[2]); | |
} | |
else | |
{ | |
return Eval(cell.list[3]); | |
} | |
} | |
else if (cell.list[0].string_value == "define") | |
{ | |
// variable definitions | |
auto var = Eval(cell.list[2]); | |
env.variables[cell.list[1].string_value] = var; | |
return var; | |
} | |
else | |
{ | |
// procedure calls | |
Cell arg_list(Cell::Type::LIST); | |
for (auto it = cell.list.begin() + 1; it != cell.list.end(); ++it) | |
arg_list.list.push_back(Eval(*it)); | |
return env.procedures[cell.list[0].string_value](arg_list); | |
} | |
} | |
else | |
{ | |
return cell; | |
} | |
} | |
}; | |
} | |
using namespace Lisp; | |
int main(int argc, const char* argv[]) | |
{ | |
//std::string prog("(hello (world 1) \"dawg\" 3.14)"); | |
std::string prog("(display (if (> 5.0 4.0) (* 5.0 4.0) (+ 1.0 2.0)))"); | |
Parser parser; | |
auto cell = parser.Parse(prog); | |
Interpreter interpreter; | |
Cell result = interpreter.Eval(cell); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment