Created
December 10, 2019 17:04
-
-
Save amyinorbit/ac3b401c78c4fbe74838fcbedb54f0e8 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// main.cpp | |
// formatter | |
// | |
// Created by Amy Parent on 12/10/2019. | |
// Copyright © 2019 Amy Parent. All rights reserved. | |
// | |
#include <iostream> | |
#include <fstream> | |
#include <string> | |
#include <algorithm> | |
#include <filesystem> | |
#include <iterator> | |
#include <set> | |
#include <vector> | |
#include <apfun/maybe.hpp> | |
#include <apfun/ranges.hpp> | |
using namespace amyinorbit; | |
using amyinorbit::maybe; | |
struct Token { | |
enum Kind { | |
select, | |
where, | |
braceL, | |
braceR, | |
star, | |
constant, | |
is, | |
isnot, | |
logAnd, | |
logOr, | |
field, | |
keyword, | |
resource | |
} kind; | |
std::string_view value; | |
}; | |
constexpr bool operator==(const Token& left, const Token& right) { | |
return left.kind == right.kind && left.value == right.value; | |
} | |
constexpr bool operator==(const Token& left, const std::string& right) { | |
return left.value == right; | |
} | |
const char* name(const Token& token) { | |
switch (token.kind) { | |
case Token::select: return "kw_select"; | |
case Token::where: return "kw_where"; | |
case Token::braceL: return "l_brace"; | |
case Token::braceR: return "r_brace"; | |
case Token::star: return "star"; | |
case Token::constant: return "constant"; | |
case Token::is: return "op_is"; | |
case Token::isnot: return "op_is_not"; | |
case Token::logAnd: return "op_and"; | |
case Token::logOr: return "op_or"; | |
case Token::field: return "field_name"; | |
case Token::keyword: return "keyword"; | |
case Token::resource: return "resource"; | |
} | |
return "INVALID"; | |
} | |
std::ostream& operator<<(std::ostream& out, const Token& token) { | |
out << "[" << name(token) << "] '" << token.value << "'"; | |
return out; | |
} | |
struct Record { | |
std::string ident; | |
std::string region; | |
maybe<std::string> name; | |
maybe<std::string> airport; | |
}; | |
constexpr static struct end_tag_t {} end_tag; | |
constexpr static struct begin_tag_t {} begin_tag; | |
class Scanner { | |
public: | |
using value_type = maybe<Token>; | |
using reference = value_type; | |
using pointer = value_type*; | |
using difference_type = int; | |
using iterator_category = std::forward_iterator_tag; | |
explicit Scanner(const std::string& source) : source_(source) {} | |
Scanner(begin_tag_t, const std::string& source) : source_(source), index_(0) { next(); } | |
Scanner(end_tag_t, const std::string& source) : source_(source), index_(source.size()+1) {} | |
bool operator==(const Scanner& other) const { | |
return other.source_.data() == source_.data() && other.index_ == index_; | |
} | |
bool operator!=(const Scanner& other) const { | |
return !(*this == other); | |
} | |
value_type operator*() const { | |
return current_; | |
} | |
Scanner operator++() { | |
Scanner r = *this; | |
next(); | |
return r; | |
} | |
Scanner& operator++(int) { | |
next(); | |
return *this; | |
} | |
maybe<Token> next() { | |
while(index_ < source_.size()) { | |
auto start = index_; | |
char c = source_[index_++]; | |
switch(c) { | |
// whitespace, we just loop | |
case 0x0020: | |
case 0x000d: | |
case 0x0009: | |
case 0x000b: | |
case 0x000c: | |
case 0x000a: | |
break; | |
case '(': return produce(Token::braceL); | |
case ')': return produce(Token::braceL); | |
case '!': return withNext('=', Token::isnot); | |
case '<': return withNext('>', Token::isnot); | |
case '=': return withNext('=', Token::is); | |
case '&': return withNext('&', Token::logAnd); | |
case '|': return withNext('|', Token::logOr); | |
case '*': return produce(Token::star); | |
case '"': return string(start); | |
default: return identifier(start); | |
} | |
} | |
index_ = source_.size()+1; | |
return nothing(); | |
} | |
maybe<Token> current() const { | |
return current_; | |
} | |
private: | |
maybe<Token> withNext(char next, Token::Kind kind) { | |
if(index_ >= source_.size()) return nothing(); | |
if(source_[index_++] != next) return nothing(); | |
current_ = Token{kind, std::string_view(source_.data() + index_ - 2, 2)}; | |
return current_; | |
} | |
maybe<Token> produce(Token::Kind kind, std::size_t start = -1) { | |
start = start == -1 ? index_ - 1 : start; | |
current_ = Token{kind, std::string_view(source_.data() + start, index_ - start)}; | |
return current_; | |
} | |
maybe<Token> string(std::size_t start) { | |
while(index_ < source_.size() && source_[index_] != '"') { | |
index_++; | |
} | |
if(index_ < source_.size()) { | |
index_++; | |
auto length = (index_ - start) - 2; | |
current_ = Token{Token::constant, std::string_view(source_.data() + start + 1, length)}; | |
return current_; | |
} else { | |
return nothing(); | |
} | |
} | |
maybe<Token> identifier(std::size_t start) { | |
while(index_ < source_.size() && isIdentifier(source_[index_])) { | |
index_++; | |
} | |
auto length = index_ - start; | |
std::string_view ident(source_.data() + start, length); | |
Token::Kind kind = Token::constant; | |
if(ident == "select") | |
kind = Token::select; | |
else if(ident == "where") | |
kind = Token::where; | |
else if(ident == "and") | |
kind = Token::logAnd; | |
else if(ident == "or") | |
kind = Token::logOr; | |
else if(ident == "is") | |
kind = Token::is; | |
else if(ident == "airports" || ident == "waypoints" || ident == "navaids" || ident == "airways") | |
kind = Token::resource; | |
else if(ident == "name" || ident == "icao" || ident == "region" || ident == "airport") | |
kind = Token::field; | |
current_ = Token{kind, ident}; | |
return current_; | |
} | |
bool isIdentifier(char c) { | |
return c == '_' | |
|| c == '-' | |
|| (c >= 'a' && c <= 'z') | |
|| (c >= 'A' && c <= 'Z') | |
|| (c >= '0' && c <= '9'); | |
} | |
std::string_view source_; | |
std::size_t index_ = 0; | |
maybe<Token> current_ = nothing(); | |
}; | |
class Parser { | |
public: | |
Parser(const std::string& source) : scanner(source) {} | |
bool failed() const { return failed_; } | |
protected: | |
bool is(Token::Kind kind) const { | |
if(!scanner.current()) return false; | |
return scanner.current()->kind == kind; | |
} | |
bool is(const std::set<Token::Kind>& kind) const { | |
if(!scanner.current()) return false; | |
return kind.count(scanner.current()->kind); | |
} | |
maybe<Token> match(Token::Kind kind) { | |
if(is(kind)) { | |
auto t = *scanner; | |
scanner.next(); | |
return t; | |
} | |
return nothing(); | |
} | |
maybe<Token> match(const std::set<Token::Kind>& kind) { | |
if(is(kind)) { | |
auto t = *scanner; | |
scanner.next(); | |
return t; | |
} | |
return nothing(); | |
} | |
void eat() { | |
scanner.next(); | |
} | |
maybe<Token> expect(Token::Kind kind, const std::string& message = "unexpected token") { | |
if(failed_) { | |
while(*scanner && !is(kind)) ++scanner; | |
auto t = *scanner; | |
scanner.next(); | |
return t; | |
} else { | |
if(is(kind)) { | |
auto t = *scanner; | |
scanner.next(); | |
return t; | |
} | |
error(message); | |
return nothing(); | |
} | |
} | |
maybe<Token> expect(const std::set<Token::Kind> kind, const std::string& message = "unexpected token") { | |
if(failed_) { | |
while(*scanner && !is(kind)) ++scanner; | |
auto t = *scanner; | |
scanner.next(); | |
return t; | |
} else { | |
if(is(kind)) { | |
auto t = *scanner; | |
scanner.next(); | |
return t; | |
} | |
error(message); | |
return nothing(); | |
} | |
} | |
bool any() { | |
return scanner.next().has_value(); | |
} | |
// MARK: - parsing stuff | |
protected: | |
void error(const std::string& message) { | |
if(failed_) return; | |
std::cerr << "error: " << message << "\n"; | |
std::cerr << " > " << scanner.current() << "\n"; | |
failed_ = true; | |
} | |
Scanner scanner; | |
private: | |
bool failed_ = false; | |
}; | |
struct Predicate { | |
enum opcode { | |
op_eq, | |
op_neq, | |
op_and, | |
op_or, | |
op_halt, | |
op_fname, | |
op_ficao, | |
op_fregion, | |
op_fairport, | |
}; | |
bool run(const Record& rec) { | |
ip = 0; | |
sp = 0; | |
bool a, b; | |
while(ip < bytecode.size()) { | |
opcode inst = (opcode)read(); | |
switch (inst) { | |
case op_eq: | |
push(is(rec, (opcode)read(), read())); | |
break; | |
case op_neq: | |
push(!is(rec, (opcode)read(), read())); | |
break; | |
case op_and: | |
a = pop(); | |
b = pop(); | |
push(a && b); | |
break; | |
case op_or: | |
a = pop(); | |
b = pop(); | |
push(a || b); | |
break; | |
case op_halt: | |
return false; | |
break; | |
default: | |
break; | |
} | |
} | |
assert(sp < 64); | |
return pop(); | |
} | |
void emitOp(opcode op) { | |
bytecode.push_back(op); | |
} | |
void emitConst(const std::string_view& string) { | |
auto it = std::find(constants.begin(), constants.end(), string); | |
if(it == constants.end()) { | |
bytecode.push_back((std::uint8_t)constants.size()); | |
constants.push_back(std::string(string.begin(), string.end())); | |
} else { | |
bytecode.push_back((std::uint8_t)(it - constants.begin())); | |
} | |
} | |
private: | |
bool is(const Record& rec, opcode field, std::uint8_t idx) { | |
switch (field) { | |
case op_fname: return rec.name == constants[idx]; | |
case op_ficao: return rec.ident == constants[idx]; | |
case op_fregion: return rec.region == constants[idx]; | |
case op_fairport: return rec.airport == constants[idx]; | |
default: abort(); | |
} | |
return false; | |
} | |
std::vector<std::string> constants; | |
std::vector<std::uint8_t> bytecode; | |
void push(bool value) { | |
stack[sp++] = value; | |
} | |
bool pop() { | |
return stack[--sp]; | |
} | |
std::uint8_t read() { | |
return bytecode[ip++]; | |
} | |
bool stack[64]; | |
int sp, ip; | |
}; | |
class QueryParser : public Parser { | |
public: | |
using Parser::Parser; | |
maybe<Predicate> statement() { | |
scanner.next(); | |
select(); | |
if(failed()) return nothing(); | |
return pred; | |
} | |
private: | |
void select() { | |
expect(Token::select); | |
auto res = expect(Token::resource); | |
if(match(Token::star)) { | |
// flip a flag for detailed records | |
} | |
if(match(Token::where)) whereClause(); | |
} | |
void whereClause() { | |
expression(); | |
} | |
void expression() { | |
factor(); | |
maybe<Token> op; | |
while((op = match({Token::logAnd, Token::logOr}))) { | |
factor(); | |
pred.emitOp(makeLogOp(op)); | |
} | |
} | |
void factor() { | |
if(match(Token::braceL)) { | |
expression(); | |
expect(Token::braceR); | |
} else { | |
auto field = expect(Token::field); | |
auto op = expect({Token::is, Token::isnot}); | |
auto constant = expect(Token::constant); | |
pred.emitOp(makeRelOp(op)); | |
pred.emitOp(makeField(field)); | |
pred.emitConst(constant->value); | |
} | |
} | |
static Predicate::opcode makeRelOp(const maybe<Token>& tok) { | |
if(!tok) return Predicate::op_halt; | |
if(tok->kind == Token::is) return Predicate::op_eq; | |
if(tok->kind == Token::isnot) return Predicate::op_neq; | |
return Predicate::op_halt; | |
} | |
static Predicate::opcode makeLogOp(const maybe<Token>& tok) { | |
if(!tok) return Predicate::op_halt; | |
if(tok->kind == Token::logAnd) return Predicate::op_and; | |
if(tok->kind == Token::logOr) return Predicate::op_or; | |
return Predicate::op_halt; | |
} | |
static Predicate::opcode makeField(const maybe<Token>& tok) { | |
if(!tok) return Predicate::op_halt; | |
if(tok->kind != Token::field) return Predicate::op_halt; | |
if(*tok == "icao") return Predicate::op_ficao; | |
if(*tok == "region") return Predicate::op_fregion; | |
if(*tok == "name") return Predicate::op_fname; | |
if(*tok == "airport") return Predicate::op_fairport; | |
return Predicate::op_halt; | |
} | |
Predicate pred; | |
}; | |
int main() { | |
std::vector<Record> records = { | |
{"EGPN", "EG", "Dundee"}, | |
{"EGPH", "EG", "Dundee"}, | |
{"EGPF", "EG", "Glasgow"}, | |
{"LSGG", "LS", "Geneve Cointrin"}, | |
{"LFPG", "LF", "Paris Charles-de-Gaulle"}, | |
{"LFPO", "LF", "Paris Orly"} | |
}; | |
std::string source; | |
while(std::getline(std::cin, source, '\n')) { | |
// std::cout << "source: " << source << "\n"; | |
QueryParser p(source); | |
auto pred = p.statement(); | |
if(!pred) continue; | |
for(const auto& rec: records) { | |
if(!pred->run(rec)) continue; | |
std::cout << "match: " << rec.ident << ", " << rec.region << "\n"; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment