Created
December 26, 2012 09:10
-
-
Save anonymous/4379035 to your computer and use it in GitHub Desktop.
partial experimental implementation using Spirit.Qi to generate token with additional information.
Still working on source position now.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <tuple> | |
#include <sstream> | |
#include <boost/preprocessor.hpp> | |
#include <boost/spirit/include/classic_position_iterator.hpp> | |
#include <boost/spirit/include/classic_position_iterator_fwd.hpp> | |
#include <boost/spirit/include/phoenix.hpp> | |
#include <boost/spirit/include/qi.hpp> | |
#include "nodes.hpp" | |
namespace classic = boost::spirit::classic; | |
namespace qi = boost::spirit::qi; | |
namespace phx = boost::phoenix; | |
#define SEQ_KEYWORD \ | |
(void ) \ | |
(bool ) \ | |
(int ) \ | |
(return ) \ | |
(function) | |
#define SEQ_OPERATOR \ | |
(("(")( left_paren)) \ | |
((")")(right_paren)) \ | |
(("{")( left_brace)) \ | |
(("}")(right_brace)) \ | |
((":")( colon)) \ | |
((",")( comma)) \ | |
(("+")( add )) \ | |
(("-")( sub )) \ | |
(("*")( mul )) \ | |
(("/")( div )) | |
#define DECL_TOKEN_KEYWORD(r, data, elem) \ | |
struct BOOST_PP_CAT(token_, elem) { \ | |
unsigned line ; \ | |
unsigned column; \ | |
}; | |
#define DECL_RULE_KEYWORD(r, data, elem) \ | |
qi::rule< \ | |
iterator, \ | |
BOOST_PP_CAT(token_, elem) (), \ | |
qi::ascii::space_type \ | |
> BOOST_PP_CAT(kw_, elem); | |
#define DEF_RULE_KEYWORD(r, data, elem) \ | |
BOOST_PP_CAT(kw_, elem) = \ | |
eps[store_source_position(_val)] \ | |
>> lit(BOOST_PP_STRINGIZE(elem)) \ | |
; | |
#define DECL_TOKEN_OPERATOR(r, data, elem) \ | |
struct BOOST_PP_CAT(token_, BOOST_PP_SEQ_ELEM(1, elem)) { \ | |
unsigned line ; \ | |
unsigned column; \ | |
}; | |
#define DECL_RULE_OPERATOR(r, data, elem) \ | |
qi::rule< \ | |
iterator, \ | |
BOOST_PP_CAT(token_, BOOST_PP_SEQ_ELEM(1, elem)) (), \ | |
qi::ascii::space_type \ | |
> BOOST_PP_CAT(op_, BOOST_PP_SEQ_ELEM(1, elem)); | |
#define DEF_RULE_OPERATOR(r, data, elem) \ | |
BOOST_PP_CAT(op_, BOOST_PP_SEQ_ELEM(1, elem)) = \ | |
eps[store_source_position(_val)] \ | |
>> lit(BOOST_PP_SEQ_ELEM(0, elem)) \ | |
; | |
namespace { | |
unsigned current_line = 0; | |
unsigned current_column = 0; | |
struct store_source_position_impl { | |
template<typename arg_type> | |
struct result { | |
typedef void type; | |
}; | |
template<typename arg_type> | |
typename result<arg_type>::type operator()(arg_type &arg) const | |
{ | |
arg.line = current_line; | |
arg.column = current_column++; | |
} | |
}; | |
struct attach_source_position_impl { | |
template<typename node_type, typename refered_type> | |
struct result { | |
typedef void type; | |
}; | |
template<typename node_type, typename refered_type> | |
typename result<node_type, refered_type>::type operator()(node_type *node, const refered_type &refered) const | |
{ | |
node->line = refered.line; | |
node->column = refered.column; | |
} | |
}; | |
boost::phoenix::function<store_source_position_impl> store_source_position; | |
boost::phoenix::function<attach_source_position_impl> attach_source_position; | |
BOOST_PP_SEQ_FOR_EACH(DECL_TOKEN_KEYWORD , _, SEQ_KEYWORD ); | |
BOOST_PP_SEQ_FOR_EACH(DECL_TOKEN_OPERATOR, _, SEQ_OPERATOR); | |
} | |
template<typename iterator> | |
struct function_grammar : qi::grammar<iterator, tree::function*(), qi::ascii::space_type> | |
{ | |
function_grammar() | |
: function_grammar::base_type(start) | |
{ | |
using boost::spirit::ascii::alpha; | |
using qi::_1; | |
using qi::_2; | |
using qi::_3; | |
using qi::_4; | |
using qi::_val; | |
using qi::as_string; | |
using qi::eps; | |
using qi::int_; | |
using qi::lexeme; | |
using qi::lit; | |
BOOST_PP_SEQ_FOR_EACH(DEF_RULE_KEYWORD , _, SEQ_KEYWORD ); | |
BOOST_PP_SEQ_FOR_EACH(DEF_RULE_OPERATOR, _, SEQ_OPERATOR); | |
start = function.alias(); | |
function = | |
( | |
kw_function | |
>> name | |
>> '(' | |
>> ')' | |
>> ':' | |
>> type | |
>> block | |
) | |
[_val = phx::new_<tree::function>(_2, _3, _4), attach_source_position(_val, _1)] | |
; | |
name = | |
as_string[lexeme[+alpha]] | |
[_val = phx::new_<tree::identifier>(_1), store_source_position(*_val)] | |
; | |
type = | |
kw_void[_val = phx::new_<tree::type>(tree::type::VOID), store_source_position(*_val)] | |
| kw_bool[_val = phx::new_<tree::type>(tree::type::BOOL), store_source_position(*_val)] | |
| kw_int [_val = phx::new_<tree::type>(tree::type::INT ), store_source_position(*_val)] | |
; | |
block = | |
op_left_brace[_val = phx::new_<tree::block>()] | |
>> *((expression >> ';')[phx::bind(&tree::block::append_expression, _val, _1)]) | |
>> '}' | |
; | |
expression = | |
primary_expr[_val = _1] | |
>> ( | |
(op_add >> expression)[_val = phx::new_<tree::binary_expr>(tree::binary_expr::ADD, _val, _2), attach_source_position(_val, _1)] | |
| (op_sub >> expression)[_val = phx::new_<tree::binary_expr>(tree::binary_expr::SUB, _val, _2), attach_source_position(_val, _1)] | |
| eps | |
) | |
; | |
primary_expr = | |
integer[_val = phx::new_<tree::primary_expr>(_1), attach_source_position(_val, *_1)] | |
; | |
integer = | |
int_ | |
[_val = phx::new_<tree::integer>(_1), store_source_position(*_val)] | |
; | |
} | |
qi::rule<iterator, tree::function* (), qi::ascii::space_type> start; | |
qi::rule<iterator, tree::function* (), qi::ascii::space_type> function; | |
qi::rule<iterator, tree::identifier* (), qi::ascii::space_type> name; | |
qi::rule<iterator, tree::type* (), qi::ascii::space_type> type; | |
qi::rule<iterator, tree::block* (), qi::ascii::space_type> block; | |
qi::rule<iterator, tree::expression* (), qi::ascii::space_type> expression; | |
qi::rule<iterator, tree::primary_expr*(), qi::ascii::space_type> primary_expr; | |
qi::rule<iterator, tree::integer* (), qi::ascii::space_type> integer; | |
BOOST_PP_SEQ_FOR_EACH(DECL_RULE_KEYWORD , _, SEQ_KEYWORD ); | |
BOOST_PP_SEQ_FOR_EACH(DECL_RULE_OPERATOR, _, SEQ_OPERATOR); | |
}; | |
int main() { | |
std::stringstream stream; | |
stream << "function " << "foo" << "( )" << ':' << std::endl << " int" << std::endl << " { }"; | |
std::string source = stream.str(); | |
typedef classic::position_iterator<std::string::iterator> position_iterator_type; | |
position_iterator_type current_pos = position_iterator_type(source.begin(), source.end()); | |
position_iterator_type current_end = position_iterator_type(); | |
tree::function* func = nullptr; | |
function_grammar<position_iterator_type> grammar; | |
const bool is_success = qi::phrase_parse(current_pos, current_end, grammar, qi::ascii::space, func); | |
std::cout << std::boolalpha << is_success << std::endl; | |
return 0; | |
} | |
#undef DEF_RULE_OPERATOR | |
#undef DECL_RULE_OPERATOR | |
#undef DECL_TOKEN_OPERATOR | |
#undef DEF_RULE_KEYWORD | |
#undef DECL_RULE_KEYWORD | |
#undef DECL_TOKEN_KEYWORD | |
#undef SEQ_KEYWORD |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <string> | |
#include <utility> | |
#include "nodes.hpp" | |
namespace tree { | |
node::~node() = default; | |
identifier::identifier(std::string name) | |
: name(std::move(name)) | |
{ | |
} | |
type::type(primitive kind) | |
: kind(kind) | |
{ | |
} | |
integer::integer(int value) | |
: value(value) | |
{ | |
} | |
primary_expr::primary_expr(integer *value) | |
: value(value) | |
{ | |
} | |
binary_expr::binary_expr(op_type op, expression *lhs, expression *rhs) | |
: op(op) | |
, lhs(lhs) | |
, rhs(rhs) | |
{ | |
} | |
void block::append_expression(expression* expr) | |
{ | |
exprs.emplace_back(expr); | |
} | |
function::function(identifier *name, type *result_type, block *body) | |
: name(name) | |
, result_type(result_type) | |
, body(body) | |
{ | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifndef NODES_HPP | |
#define NODES_HPP | |
#include <string> | |
#include <vector> | |
#include <boost/variant.hpp> | |
namespace tree { | |
struct node { | |
virtual ~node() = 0; | |
unsigned line = 0; | |
unsigned column = 0; | |
}; | |
struct type : node { | |
enum primitive { | |
VOID, | |
BOOL, | |
INT , | |
}; | |
explicit type(primitive kind); | |
primitive kind; | |
}; | |
struct integer_literal : node { | |
int value = 0; | |
}; | |
struct identifier : node { | |
explicit identifier(std::string name); | |
std::string name; | |
}; | |
//struct parameter : node { | |
// identifier* name = nullptr; | |
// type* t = nullptr; | |
//}; | |
struct integer : node { | |
explicit integer(int value); | |
int value; | |
}; | |
struct expression : node { | |
}; | |
struct primary_expr : expression { | |
explicit primary_expr(integer *value); | |
integer* value; | |
}; | |
struct binary_expr : expression { | |
enum op_type { | |
ADD, | |
SUB, | |
}; | |
binary_expr(op_type op, expression *lhs, expression *rhs); | |
op_type op; | |
expression *lhs; | |
expression *rhs; | |
}; | |
struct block : node { | |
void append_expression(expression* expr); | |
std::vector<expression*> exprs; | |
}; | |
struct function : node { | |
function(identifier *name, type *result_type, block *body); | |
// std::vector<parameter*> parameters; | |
identifier *name ; | |
type *result_type; | |
block *body ; | |
}; | |
} | |
#endif /* NODES_HPP */ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment