Created
November 19, 2012 17:32
-
-
Save splinterofchaos/4112114 to your computer and use it in GitHub Desktop.
Monadic parsing in C++
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <memory> | |
#include <iostream> | |
#include <sstream> | |
#include <utility> | |
#include <algorithm> | |
#include <iterator> | |
struct sequence_tag {}; | |
struct pointer_tag {}; | |
template< class X > | |
X category( ... ); | |
template< class S > | |
auto category( const S& s ) -> decltype( std::begin(s), sequence_tag() ); | |
template< class Ptr > | |
auto category( const Ptr& p ) -> decltype( *p, p==nullptr, pointer_tag() ); | |
template< class T > struct Category { | |
using type = decltype( category<T>(std::declval<T>()) ); | |
}; | |
template< class R, class ... X > struct Category< R(&)(X...) > { | |
using type = R(&)(X...); | |
}; | |
template< class T > | |
using Cat = typename Category< typename std::decay<T>::type >::type; | |
template<class...> struct Part; | |
template< class F, class X > | |
struct Part< F, X > | |
{ | |
F f; | |
X x; | |
template< class _F, class _X > | |
constexpr Part( _F&& f, _X&& x ) | |
: f(std::forward<_F>(f)), x(std::forward<_X>(x)) | |
{ | |
} | |
/* | |
* The return type of F only gets deduced based on the number of xuments | |
* supplied. Part otherwise has no idea whether f takes 1 or 10 xs. | |
*/ | |
template< class ... Xs > | |
constexpr auto operator() ( Xs&& ...xs ) | |
-> decltype( f(x,std::declval<Xs>()...) ) | |
{ | |
return f( x, std::forward<Xs>(xs)... ); | |
} | |
}; | |
/* Recursive, variadic version. */ | |
template< class F, class X1, class ...Xs > | |
struct Part< F, X1, Xs... > | |
: public Part< Part<F,X1>, Xs... > | |
{ | |
template< class _F, class _X1, class ..._Xs > | |
constexpr Part( _F&& f, _X1&& x1, _Xs&& ...xs ) | |
: Part< Part<F,X1>, Xs... > ( | |
Part<F,X1>( std::forward<_F>(f), std::forward<_X1>(x1) ), | |
std::forward<_Xs>(xs)... | |
) | |
{ | |
} | |
}; | |
template< class F, class ...X > | |
constexpr Part<F,X...> closure( F&& f, X&& ...x ) { | |
return Part<F,X...>( std::forward<F>(f), std::forward<X>(x)... ); | |
} | |
template< class F, class ...X > | |
constexpr Part<F,X...> closet( F f, X ...x ) { | |
return Part<F,X...>( std::move(f), std::move(x)... ); | |
} | |
template< class F, class ...G > | |
struct Composition; | |
template< class F, class G > | |
struct Composition<F,G> | |
{ | |
F f; G g; | |
template< class _F, class _G > | |
constexpr Composition( _F&& f, _G&& g ) | |
: f(std::forward<_F>(f)), g(std::forward<_G>(g)) { } | |
template< class X, class ...Y > | |
constexpr decltype( f(g(std::declval<X>()), std::declval<Y>()...) ) | |
operator() ( X&& x, Y&& ...y ) { | |
return f( g( std::forward<X>(x) ), std::forward<Y>(y)... ); | |
} | |
}; | |
template< class F, class G, class ...H > | |
struct Composition<F,G,H...> : Composition<F,Composition<G,H...>> | |
{ | |
typedef Composition<G,H...> Comp; | |
template< class _F, class _G, class ..._H > | |
constexpr Composition( _F&& f, _G&& g, _H&& ...h ) | |
: Composition<_F,Composition<_G,_H...>> ( | |
std::forward<_F>(f), | |
Comp( std::forward<_G>(g), std::forward<_H>(h)... ) | |
) | |
{ | |
} | |
}; | |
template< class F, class ...G > | |
constexpr Composition<F,G...> compose( F f, G ...g ) { | |
return Composition<F,G...>( std::move(f), std::move(g)... ); | |
} | |
template< class ... > struct Monad; | |
template< class F, class M, class ...N, class Mo=Monad<Cat<M>> > | |
constexpr auto mbind( F&& f, M&& m, N&& ...n ) | |
-> decltype( Mo::mbind(std::declval<F>(), | |
std::declval<M>(),std::declval<N>()...) ) | |
{ | |
return Mo::mbind( std::forward<F>(f), | |
std::forward<M>(m), std::forward<N>(n)... ); | |
} | |
template< class F, class M, class ...N, class Mo=Monad<Cat<M>> > | |
constexpr auto mdo( F&& f, M&& m ) | |
-> decltype( Mo::mdo(std::declval<F>(), std::declval<M>()) ) | |
{ | |
return Mo::mdo( std::forward<F>(f), std::forward<M>(m) ); | |
} | |
// The first template argument must be explicit! | |
template< class M, class X, class ...Y, class Mo = Monad<Cat<M>> > | |
constexpr auto mreturn( X&& x, Y&& ...y ) | |
-> decltype( Mo::template mreturn<M>( std::declval<X>(), | |
std::declval<Y>()... ) ) | |
{ | |
return Mo::template mreturn<M>( std::forward<X>(x), | |
std::forward<Y>(y)... ); | |
} | |
template< template<class...>class M, class X, class ...Y, | |
class _M = M< typename std::decay<X>::type >, | |
class Mo = Monad<Cat<_M>> > | |
constexpr auto mreturn( X&& x, Y&& ...y ) | |
-> decltype( Mo::template mreturn<_M>( std::declval<X>(), | |
std::declval<Y>()... ) ) | |
{ | |
return Mo::template mreturn<_M>( std::forward<X>(x), | |
std::forward<Y>(y)... ); | |
} | |
// Also has explicit template argument. | |
template< class M, class Mo = Monad<Cat<M>> > | |
auto mfail() -> decltype( Mo::template mfail<M>() ) { | |
return Mo::template mfail<M>(); | |
} | |
template< class M, class F > | |
constexpr auto operator >>= ( M&& m, F&& f ) | |
-> decltype( mbind(std::declval<F>(),std::declval<M>()) ) | |
{ | |
return mbind( std::forward<F>(f), std::forward<M>(m) ); | |
} | |
template< class M, class F > | |
constexpr auto operator >> ( M&& m, F&& f ) | |
-> decltype( mdo(std::declval<M>(),std::declval<F>()) ) | |
{ | |
return mdo( std::forward<M>(m), std::forward<F>(f) ); | |
} | |
template< class F, class M > | |
constexpr auto operator ^ ( F&& f, M&& m ) | |
-> decltype( fmap(std::declval<F>(),std::declval<M>()) ) | |
{ | |
return fmap( std::forward<F>(f), std::forward<M>(m) ); | |
} | |
template< > struct Monad< sequence_tag > { | |
template< class S > | |
using mvalue = typename S::value_type; | |
template< class F, template<class...>class S, class X, | |
class R = typename std::result_of<F(X)>::type > | |
static R mbind( F&& f, const S<X>& xs ) { | |
R r; | |
for( const X& x : xs ) { | |
auto ys = std::forward<F>(f)( x ); | |
std::move( std::begin(ys), std::end(ys), std::back_inserter(r) ); | |
} | |
return r; | |
} | |
template< class F, template<class...>class S, | |
class X, class Y, | |
class R = typename std::result_of<F(X,Y)>::type > | |
static R mbind( F&& f, const S<X>& xs, const S<Y>& ys ) { | |
R r; | |
for( const X& x : xs ) { | |
for( const Y& y : ys ) { | |
auto zs = std::forward<F>(f)( x, y ); | |
std::move( std::begin(zs), std::end(zs), | |
std::back_inserter(r) ); | |
} | |
} | |
return r; | |
} | |
template< template< class... > class S, class X, class Y > | |
static S<Y> mdo( const S<X>& mx, const S<Y>& my ) { | |
// Note: This is not a strictly correct definition. | |
// It should return my concatenated to itself for every element of mx. | |
return mx.size() ? my : S<Y>{}; | |
} | |
template< class S, class X > | |
static S mreturn( X&& x ) { | |
return S{ std::forward<X>(x) }; // Construct an S of one element. | |
} | |
template< class S > | |
static S mfail() { return S{}; } | |
}; | |
constexpr bool is_int( char c ) { | |
return c >= '0' and c <= '9'; | |
} | |
/* A Parser is a function taking a string and returning a vector of matches. */ | |
template< class X > struct Parser { | |
// The value is the target of the parse. (For example "1" may parse to int(1). | |
using value_type = X; | |
// A match consists of a value and the rest of the input to process. | |
using parse_state = std::pair<X,std::string>; | |
// A parse results in a list of matches. | |
using result_type = std::vector< std::pair<X,std::string> >; | |
using function_type = std::function< result_type( const std::string& ) >; | |
function_type f; | |
Parser( function_type f ) : f(std::move(f)) { } | |
Parser( const Parser<X>& p ) : f(p.f) { } | |
Parser() { } | |
result_type operator () ( const std::string& s ) const { | |
return f( s ); | |
} | |
}; | |
template< class X, class F > Parser<X> parser( F f ) { | |
using G = typename Parser<X>::function_type; | |
return Parser<X>( G(std::move(f)) ); | |
} | |
std::string tail( const std::string& s ) { | |
return std::string( std::next(s.begin()), s.end() ); | |
} | |
/* | |
* Unconditionally match a char if the string is not empty. | |
* Ex: item("abc") = ('a',"bc") | |
*/ | |
auto item = parser<char> ( | |
[]( const std::string& s ) { | |
using Pair = Parser<char>::parse_state; | |
using R = Parser<char>::result_type; | |
return s.size() ? R{ Pair( s[0], tail(s) ) } : R(); | |
} | |
); | |
template< class X > struct Monad< Parser<X> > { | |
using Pair = typename Parser<X>::parse_state; | |
using R = typename Parser<X>::result_type; | |
/* | |
* mreturn x = parser(s){ vector( pair(x,s) ) } | |
* Return a parsed value. Forwards rest of input to the next parser. | |
*/ | |
template< class M > | |
static M mreturn( X x ) { | |
return parser<typename M::value_type> ( | |
[x]( const std::string& s ) { | |
return R{ Pair(std::move(x),s) }; | |
} | |
); | |
} | |
/* a >> b = b */ | |
template< class Y, class Z > | |
static Parser<Y> mdo( Parser<Z> a, Parser<Y> b ) { | |
return a >>= [b]( const Z& z ) { return b; }; | |
} | |
/* Continue parsing from p into f. */ | |
template< class F, class Y = typename std::result_of<F(X)>::type > | |
static Y mbind( F f, Parser<X> p ) | |
{ | |
using Z = typename Y::value_type; | |
return parser<Z> ( | |
[f,p]( const std::string& s ) { | |
// First, extract p's matches. | |
return p(s) >>= [&]( Pair p ) { | |
// Then construct the new parser from the p's output. | |
// Continue parsing the remaining input with the new parser. | |
return f( std::move(p.first) )( std::move(p.second) ); | |
}; | |
} | |
); | |
} | |
}; | |
template< class ... > struct MonadZero; | |
template< class ... > struct MonadPlus; | |
template< class M, class Mo = MonadZero< Cat<M> > > | |
auto mzero() -> decltype( Mo::template mzero<M>() ) | |
{ | |
return Mo::template mzero<M>(); | |
} | |
template< class A, class B, class Mo = MonadPlus<Cat<A>> > | |
auto mplus( A&& a, B&& b ) -> decltype( Mo::mplus(std::declval<A>(),std::declval<B>()) ) | |
{ | |
return Mo::mplus( std::forward<A>(a), std::forward<B>(b) ); | |
} | |
template<> struct MonadZero< sequence_tag > { | |
template< class S > | |
S mzero() { return S{}; } | |
}; | |
/* mzero: a parser that matches nothing, no matter the input. */ | |
template< class X > struct MonadZero< Parser<X> > { | |
template< class P > | |
static P mzero() { | |
return parser<X> ( | |
[]( const std::string& s ){ | |
return std::vector<std::pair<X,std::string>>(); | |
} | |
); | |
} | |
}; | |
template< class X, class Y > | |
auto operator + ( X&& x, Y&& y ) -> decltype( mplus(std::declval<X>(),std::declval<Y>()) ) | |
{ | |
return mplus( std::forward<X>(x), std::forward<Y>(y) ); | |
} | |
/* mplus( xs, ys ) = "append xs with ys" */ | |
template<> struct MonadPlus< sequence_tag > { | |
template< class A, class B > | |
static A mplus( A a, const B& b ) { | |
std::copy( b.begin(), b.end(), std::back_inserter(a) ); | |
return a; | |
} | |
}; | |
/* mplus( pa, pb ) = "append the results of pa with the results of pb" */ | |
template< class X > struct MonadPlus< Parser<X> > { | |
using P = Parser<X>; | |
static P mplus( P a, P b ) { | |
return parser<X> ( | |
[=]( const std::string& s ) { return a(s) + b(s); } | |
); | |
} | |
}; | |
/* Like mplus, but take only one result. */ | |
template< class X > | |
Parser<X> mplus_first( const Parser<X>& a, const Parser<X>& b ) { | |
return parser<X> ( | |
[=]( const std::string s ) { | |
using V = std::vector< std::pair< X, std::string > >; | |
V c = (a + b)( s ); | |
return c.size() ? V{ c[0] } : V{}; | |
} | |
); | |
} | |
/* sat( pred ) = "item, if pred" */ | |
template< class P > | |
Parser<char> sat( P p ) { | |
return item >>= [=]( char c ) { | |
return p(c) ? mreturn<Parser>(c) : mzero<Parser<char>>(); | |
}; | |
} | |
/* Accept only a specific char. */ | |
Parser<char> pchar( char c ) { | |
return sat( [=](char d){ return c == d; } ); | |
} | |
/* Accept only a specific string. */ | |
Parser<std::string> string_( const std::string& s ) { | |
if( s.size() == 0 ) | |
return mreturn<Parser>( s ); | |
Parser<char> p = pchar( s[0] ); | |
for( auto it=std::next(s.begin()); it != s.end(); it++ ) | |
p = p >> pchar( *it ); | |
return p >> mreturn<Parser>(s); | |
} | |
Parser<std::string> string( const std::string& str ) { | |
return parser<std::string> ( | |
[str]( const std::string& s ) { | |
using R = typename Parser<std::string>::result_type; | |
if( std::equal(str.begin(),str.end(),s.begin()) ) { | |
return R { | |
{ str, s.substr( str.size() ) } | |
}; | |
} else { | |
return R(); | |
} | |
} | |
); | |
} | |
template< class X > | |
Parser<std::vector<X>> many( Parser<X> p ); | |
template< class X > | |
Parser<std::vector<X>> many1( Parser<X> p ); | |
/* Parse one or zero items with p. */ | |
template< class X > | |
Parser<std::vector<X>> many( Parser<X> p ) { | |
using V = std::vector<X>; | |
using Pair = std::pair<V,std::string>; | |
using R = std::vector< Pair >; | |
using P = Parser<V>; | |
return mplus( | |
parser<V> ( | |
[=]( const std::string& s ) { | |
auto matches = p(s); | |
if( not matches.size() ) | |
return R{}; | |
else { | |
// Recurse with the rest of the input. | |
R rest = many(p)( matches[0].second ); | |
// Create a vector out of the result from the first match. | |
V first = { std::move(matches[0].first) }; | |
// Prepend the previous result to every new match. | |
for( auto& match : rest ) | |
match = Pair( first + match.first, match.second ); | |
// Return all the matches. | |
return R { | |
Pair( std::move(first), matches[0].second ) | |
} + rest; | |
} | |
} | |
), | |
mreturn<Parser>( V{} ) | |
); | |
} | |
/* Parse one or zero items with p. */ | |
template< class X > | |
Parser<std::vector<X>> some( Parser<X> p ) { | |
using V = std::vector<X>; | |
using Pair = std::pair<V,std::string>; | |
using R = std::vector< Pair >; | |
using P = Parser<V>; | |
return mplus_first( | |
parser<V> ( | |
[=]( const std::string& s ) { | |
auto matches = p(s); | |
return matches.size() == 0 ? R{} | |
: R{ | |
Pair( | |
V{std::move(matches[0].first)}, | |
std::move( matches[0].second ) | |
) | |
}; | |
} | |
), | |
mreturn<Parser>( V{} ) | |
); | |
} | |
template< class X > | |
using ManyParser = Parser< std::vector<X> >; | |
ManyParser<char> space = some( sat([](char c){return std::isspace(c);}) ); | |
/* Parses p and consumes fallowing whitespace. */ | |
constexpr struct Token { | |
template< class X > | |
Parser<X> operator () ( Parser<X> p ) const { | |
return p >>= []( X x ) { | |
return space >> mreturn<Parser>(std::move(x)); | |
}; | |
} | |
} token{}; | |
/* symb(str) : Tokenize this string! */ | |
auto symb = compose( token, string ); | |
/* csymb(c) : Tokenize this char! */ | |
auto csymb = compose( token, pchar ); | |
/* Consume whitespace, then parse p. */ | |
constexpr struct Apply { | |
template< class X > | |
Parser<X> operator () ( Parser<X> p ) const { | |
return space >> std::move(p); | |
} | |
} apply{}; | |
/* | |
* chain(p,op): Parse with p infinitely (until there is no match) folding with op. | |
* | |
* p and op are both parsers, but op returns a binary function, given some | |
* input, and p returns the inputs to that function. For example, if: | |
* input: "4" | |
* p returns: 4 | |
* No operator is read, no operation is performed. But: | |
* input: "4+4" | |
* p returns: 4 | |
* op is then parsed with the function, rest: | |
* input: "+4" | |
* op returns: do_add | |
* input: "4" | |
* p returns: 4 | |
* rest returns: 8 | |
* rest applies the operation parsed by op. It alternates between parsing p and | |
* op until there are no more matches. op will fail if not fallowed by a p. | |
*/ | |
constexpr struct Chainl1 { | |
template< class X, class F > | |
static Parser<X> rest( const Parser<X>& p, const Parser<F>& op, const X& a ) { | |
// Alternate between op and p until input is consumed or a parse fails. | |
auto r = op >>= [=]( const F& f ) { | |
return p >>= [&]( const X& b ) { | |
return rest( p, op, f(a,b) ); | |
}; | |
}; | |
// Return the first successful parse, or a if none. | |
return mplus_first( r, mreturn<Parser>(a) ); | |
} | |
template< class X, class F > | |
Parser<X> operator () ( Parser<X> p, Parser<F> op ) const { | |
return p >>= closet( rest<X,F>, std::move(p), std::move(op) ); | |
} | |
} chainl1{}; | |
// Binary operations of type int(*)(int,int). | |
int do_add( int x, int y ) { return x + y; } | |
int do_sub( int x, int y ) { return x - y; } | |
int do_mult( int x, int y ) { return x * y; } | |
int do_div( int x, int y ) { return x / y; } | |
/* Convert a string (i.e. "+") and a function (do_add) to a symbolic parser. */ | |
template< class F > | |
Parser<F> csymb_op( char c, F f ) { | |
return csymb(c) >> mreturn<Parser>(f); | |
} | |
auto addop = csymb_op( '+', do_add ) + csymb_op( '-', do_sub ); | |
auto mulop = csymb_op( '*', do_mult ) + csymb_op( '/', do_div ); | |
//auto addop = mplus ( | |
// pchar('+') >> mreturn<Parser>(do_add), | |
// pchar('-') >> mreturn<Parser>(do_sub) | |
//); | |
// | |
//auto mulop = mplus ( | |
// pchar('*') >> mreturn<Parser>(do_mult), | |
// pchar('/') >> mreturn<Parser>(do_div) | |
//); | |
//auto digits = token( sat(is_num) ) >> mreturn<Parser>(consDigit); | |
constexpr bool is_num( char c ) { | |
return c >= '0' and c <= '9'; | |
} | |
/* Parse one digit. */ | |
Parser<int> digit = token( sat(is_num) ) >>= []( char i ) { | |
return mreturn<Parser>(i-'0'); | |
}; | |
/* | |
* Parse numbers. | |
* Ex: "123" -> (1,"23") -> (12,"3") -> 123 | |
*/ | |
int consDigit( int accum, int digit ) { return accum*10 + digit; } | |
Parser<int> num = chainl1( digit, mreturn<Parser>(consDigit) ); | |
/* | |
* Parse terms: series of numbers, multiplications and divisions. | |
* Ex: "1*3*2" -> (3,"*2") -> 6 | |
*/ | |
Parser<int> term = chainl1( num, mulop ); | |
/* | |
* Parse expressions: series of terms, additions and subtractions. | |
* Ex: "1+7*9-1" -> (1."+7*9-1") -> (63,"-1") -> 62 | |
*/ | |
Parser<int> expr = chainl1( term, addop ); | |
auto factor = mplus_first ( | |
digit, | |
symb("(") >> expr >>= []( int n ) { | |
return symb(")") >> mreturn<Parser>(n); | |
} | |
); | |
//auto term = chainl1( factor, mulop ); | |
static std::ostringstream oss; | |
template< class X > | |
static std::string show( const X& x ) { | |
oss.str( "" ); | |
oss << x; | |
return oss.str(); | |
} | |
static std::string show( std::string str ) { | |
return str; | |
} | |
static constexpr const char* show( const char* str ) { | |
return str; | |
} | |
template< class X, class Y, class ...Z > | |
static std::string show( const X& x, const Y& y, const Z& ...z ) | |
{ | |
return show(x) + show(y,z...); | |
} | |
std::ostream& operator << ( std::ostream& os, const std::string& s ) { | |
return os << '"' << s.c_str() << '"'; | |
} | |
template< class X, class Y > | |
std::ostream& operator << ( std::ostream& os, const std::pair<X,Y>& p ) { | |
return os << '(' << p.first << ',' << p.second << ')'; | |
} | |
template< class X > | |
std::ostream& operator << ( std::ostream& os, const std::unique_ptr<X>& p ) { | |
if( p ) | |
os << "Just " << *p; | |
else | |
os << "Nothing"; | |
return os; | |
} | |
template< class X > | |
std::ostream& operator << ( std::ostream& os, const std::vector<X>& v ) { | |
os << '['; | |
for( auto it=std::begin(v); it != std::end(v); it++ ) { | |
os << *it; | |
if( std::next(it) != std::end(v) ) | |
os << ','; | |
} | |
os << ']'; | |
return os; | |
} | |
auto p = item >>= []( char c ) { | |
return item >> item >>= [c]( char d ) { | |
return mreturn<Parser>( std::make_pair(c,d) ); | |
}; | |
}; | |
int main() { | |
using std::cin; | |
using std::cout; | |
using std::endl; | |
cout << "Welcome to the calculator!\n" | |
<< "Press ctrl+d or ctrl+c to exit.\n" | |
<< "Type in an equation and press enter to solve it!\n" << endl; | |
std::string input; | |
while( cout << "Solve : " and std::getline(std::cin,input) ) { | |
auto ans = apply(expr)(input); | |
if( ans.size() and ans[0].second.size() == 0 ) | |
cout << " = " << ans[0].first; | |
else | |
cout << "No answer."; | |
cout << endl; | |
} | |
// TEST CODE | |
//std::cout << p("abc") << endl; | |
//cout << space(" abc") << endl; | |
// | |
//Parser<int> twoDigit = digit >>= []( int x ) { | |
// return digit >>= [x]( int y ) { | |
// return mreturn<Parser>( x*10 + y ); | |
// }; | |
//}; | |
//cout << "digit >> digit \"22\" = " << (twoDigit)("22") << endl; | |
//std::string in = "121"; | |
//std::string in2 = "1221"; | |
//std::string in3 = "22212"; | |
//cout << "pchar '1' 121: " << pchar('1')( in ) << endl; | |
//cout << "pchar '1' >> pchar '2' 121: " << (pchar('1') >> pchar('2'))( in ) << endl; | |
//cout << "pchar '0' 1221: " << pchar('0')( in2 ) << endl; | |
//cout << "string '1' 121: " << string("1")( in ) << endl; | |
//cout << "string '12' 121: " << string("12")( in ) << endl; | |
//cout << "string '121' 121: " << string("121")( in ) << endl; | |
//cout << "string '021' 1221: " << string("121")( in2 ) << endl; | |
//cout << "many (pchar '2') 22212: " << many(pchar('2'))( in3 ) << endl; | |
//cout << "apply expr 1 - 2 = " << expr( "1 - 2 " ) << endl; | |
//cout << "apply expr 1 + 2 = " << expr( "1 + 2 " ) << endl; | |
//cout << "apply expr 1 * 2 = " << expr( "1 * 2 " ) << endl; | |
//cout << "apply expr 1 / 2 = " << expr( "1 / 2 " ) << endl; | |
//cout << "apply expr 1 - 2 * 3 = " << expr( "1 - 2 * 3 " ) << endl; | |
//cout << "apply expr 2 * 3 + 4 = " << expr( "2 * 3 + 4 " ) << endl; | |
//cout << "apply expr 1 - 20 * 3 + 4 = " << expr( "1 - 20 * 3 + 4" ) << endl; | |
//cout << "apply expr 1 - 2 / 3 + 4 = " << expr( "1-6 / 3+4" ) << endl; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <memory> | |
#include <iostream> | |
#include <sstream> | |
#include <utility> | |
#include <algorithm> | |
#include <iterator> | |
struct sequence_tag {}; | |
struct pointer_tag {}; | |
template< class X > | |
X category( ... ); | |
template< class S > | |
auto category( const S& s ) -> decltype( std::begin(s), sequence_tag() ); | |
template< class Ptr > | |
auto category( const Ptr& p ) -> decltype( *p, p==nullptr, pointer_tag() ); | |
template< class T > struct Category { | |
using type = decltype( category<T>(std::declval<T>()) ); | |
}; | |
template< class R, class ... X > struct Category< R(&)(X...) > { | |
using type = R(&)(X...); | |
}; | |
template< class T > | |
using Cat = typename Category< typename std::decay<T>::type >::type; | |
template<class...> struct Part; | |
template< class F, class X > | |
struct Part< F, X > | |
{ | |
F f; | |
X x; | |
template< class _F, class _X > | |
constexpr Part( _F&& f, _X&& x ) | |
: f(std::forward<_F>(f)), x(std::forward<_X>(x)) | |
{ | |
} | |
/* | |
* The return type of F only gets deduced based on the number of xuments | |
* supplied. Part otherwise has no idea whether f takes 1 or 10 xs. | |
*/ | |
template< class ... Xs > | |
constexpr auto operator() ( Xs&& ...xs ) | |
-> decltype( f(x,std::declval<Xs>()...) ) | |
{ | |
return f( x, std::forward<Xs>(xs)... ); | |
} | |
}; | |
/* Recursive, variadic version. */ | |
template< class F, class X1, class ...Xs > | |
struct Part< F, X1, Xs... > | |
: public Part< Part<F,X1>, Xs... > | |
{ | |
template< class _F, class _X1, class ..._Xs > | |
constexpr Part( _F&& f, _X1&& x1, _Xs&& ...xs ) | |
: Part< Part<F,X1>, Xs... > ( | |
Part<F,X1>( std::forward<_F>(f), std::forward<_X1>(x1) ), | |
std::forward<_Xs>(xs)... | |
) | |
{ | |
} | |
}; | |
template< class F, class ...X > | |
constexpr Part<F,X...> closure( F&& f, X&& ...x ) { | |
return Part<F,X...>( std::forward<F>(f), std::forward<X>(x)... ); | |
} | |
template< class F, class ...X > | |
constexpr Part<F,X...> closet( F f, X ...x ) { | |
return Part<F,X...>( std::move(f), std::move(x)... ); | |
} | |
template< class F, class ...G > | |
struct Composition; | |
template< class F, class G > | |
struct Composition<F,G> | |
{ | |
F f; G g; | |
template< class _F, class _G > | |
constexpr Composition( _F&& f, _G&& g ) | |
: f(std::forward<_F>(f)), g(std::forward<_G>(g)) { } | |
template< class X, class ...Y > | |
constexpr decltype( f(g(std::declval<X>()), std::declval<Y>()...) ) | |
operator() ( X&& x, Y&& ...y ) { | |
return f( g( std::forward<X>(x) ), std::forward<Y>(y)... ); | |
} | |
constexpr decltype( f(g()) ) operator () () { | |
return f(g()); | |
} | |
}; | |
template< class F, class G, class ...H > | |
struct Composition<F,G,H...> : Composition<F,Composition<G,H...>> | |
{ | |
typedef Composition<G,H...> Comp; | |
template< class _F, class _G, class ..._H > | |
constexpr Composition( _F&& f, _G&& g, _H&& ...h ) | |
: Composition<_F,Composition<_G,_H...>> ( | |
std::forward<_F>(f), | |
Comp( std::forward<_G>(g), std::forward<_H>(h)... ) | |
) | |
{ | |
} | |
}; | |
template< class F, class ...G > | |
constexpr Composition<F,G...> compose( F f, G ...g ) { | |
return Composition<F,G...>( std::move(f), std::move(g)... ); | |
} | |
template< class... > struct Functor; | |
template< class F, class FX, class Fun=Functor< Cat<FX> > > | |
constexpr auto fmap( F&& f, FX&& fx ) | |
-> decltype( Fun::fmap( std::declval<F>(), std::declval<FX>() ) ) | |
{ | |
return Fun::fmap( std::forward<F>(f), std::forward<FX>(fx) ); | |
} | |
// General case: compose | |
template< class Function > struct Functor<Function> { | |
template< class F, class G, class C = Composition<F,G> > | |
static constexpr C fmap( F f, G g ) { | |
C( std::move(f), std::move(g) ); | |
} | |
}; | |
template<> struct Functor< sequence_tag > { | |
template< class F, template<class...>class S, class X, | |
class R = typename std::result_of<F(X)>::type > | |
static S<R> fmap( F&& f, const S<X>& s ) { | |
S<R> r; | |
r.reserve( s.size() ); | |
std::transform( std::begin(s), std::end(s), | |
std::back_inserter(r), | |
std::forward<F>(f) ); | |
return r; | |
} | |
}; | |
template<> struct Functor< pointer_tag > { | |
template< class F, template<class...>class Ptr, class X, | |
class R = typename std::result_of<F(X)>::type > | |
static Ptr<R> fmap( F&& f, const Ptr<X>& p ) | |
{ | |
return p != nullptr | |
? Ptr<R>( new R( std::forward<F>(f)(*p) ) ) | |
: nullptr; | |
} | |
}; | |
template< class ... > struct Monad; | |
template< class F, class M, class ...N, class Mo=Monad<Cat<M>> > | |
constexpr auto mbind( F&& f, M&& m, N&& ...n ) | |
-> decltype( Mo::mbind(std::declval<F>(), | |
std::declval<M>(),std::declval<N>()...) ) | |
{ | |
return Mo::mbind( std::forward<F>(f), | |
std::forward<M>(m), std::forward<N>(n)... ); | |
} | |
template< class F, class M, class ...N, class Mo=Monad<Cat<M>> > | |
constexpr auto mdo( F&& f, M&& m ) | |
-> decltype( Mo::mdo(std::declval<F>(), std::declval<M>()) ) | |
{ | |
return Mo::mdo( std::forward<F>(f), std::forward<M>(m) ); | |
} | |
// The first template argument must be explicit! | |
template< class M, class X, class ...Y, class Mo = Monad<Cat<M>> > | |
constexpr auto mreturn( X&& x, Y&& ...y ) | |
-> decltype( Mo::template mreturn<M>( std::declval<X>(), | |
std::declval<Y>()... ) ) | |
{ | |
return Mo::template mreturn<M>( std::forward<X>(x), | |
std::forward<Y>(y)... ); | |
} | |
template< template<class...>class M, class X, class ...Y, | |
class _M = M< typename std::decay<X>::type >, | |
class Mo = Monad<Cat<_M>> > | |
constexpr auto mreturn( X&& x, Y&& ...y ) | |
-> decltype( Mo::template mreturn<_M>( std::declval<X>(), | |
std::declval<Y>()... ) ) | |
{ | |
return Mo::template mreturn<_M>( std::forward<X>(x), | |
std::forward<Y>(y)... ); | |
} | |
// Also has explicit template argument. | |
template< class M, class Mo = Monad<Cat<M>> > | |
auto mfail() -> decltype( Mo::template mfail<M>() ) { | |
return Mo::template mfail<M>(); | |
} | |
namespace monad { | |
template< class M, class F > | |
constexpr auto operator >>= ( M&& m, F&& f ) | |
-> decltype( mbind(std::declval<F>(),std::declval<M>()) ) | |
{ | |
return mbind( std::forward<F>(f), std::forward<M>(m) ); | |
} | |
template< class M, class F > | |
constexpr auto operator >> ( M&& m, F&& f ) | |
-> decltype( mdo(std::declval<M>(),std::declval<F>()) ) | |
{ | |
return mdo( std::forward<M>(m), std::forward<F>(f) ); | |
} | |
template< class F, class M > | |
constexpr auto operator ^ ( F&& f, M&& m ) | |
-> decltype( fmap(std::declval<F>(),std::declval<M>()) ) | |
{ | |
return fmap( std::forward<F>(f), std::forward<M>(m) ); | |
} | |
} | |
template< > struct Monad< pointer_tag > { | |
template< class P > | |
using mvalue = typename P::element_type; | |
template< class F, template<class...>class Ptr, class X, | |
class R = typename std::result_of<F(X)>::type > | |
static R mbind( F&& f, const Ptr<X>& p ) { | |
return p ? std::forward<F>(f)( *p ) : nullptr; | |
} | |
template< class F, template<class...>class Ptr, | |
class X, class Y, | |
class R = typename std::result_of<F(X,Y)>::type > | |
static R mbind( F&& f, const Ptr<X>& p, const Ptr<Y>& q ) { | |
return p and q ? std::forward<F>(f)( *p, *q ) : nullptr; | |
} | |
template< template< class... > class M, class X, class Y > | |
static M<Y> mdo( const M<X>& mx, const M<Y>& my ) { | |
return mx ? (my ? mreturn<M<Y>>(*my) : nullptr) | |
: nullptr; | |
} | |
template< class M, class X > | |
static M mreturn( X&& x ) { | |
using Y = typename M::element_type; | |
return M( new Y(std::forward<X>(x)) ); | |
} | |
template< class M > | |
static M mfail() { return nullptr; } | |
}; | |
template< > struct Monad< sequence_tag > { | |
template< class S > | |
using mvalue = typename S::value_type; | |
template< class F, template<class...>class S, class X, | |
class R = typename std::result_of<F(X)>::type > | |
static R mbind( F&& f, const S<X>& xs ) { | |
R r; | |
for( const X& x : xs ) { | |
auto ys = std::forward<F>(f)( x ); | |
std::move( std::begin(ys), std::end(ys), std::back_inserter(r) ); | |
} | |
return r; | |
} | |
template< class F, template<class...>class S, | |
class X, class Y, | |
class R = typename std::result_of<F(X,Y)>::type > | |
static R mbind( F&& f, const S<X>& xs, const S<Y>& ys ) { | |
R r; | |
for( const X& x : xs ) { | |
for( const Y& y : ys ) { | |
auto zs = std::forward<F>(f)( x, y ); | |
std::move( std::begin(zs), std::end(zs), | |
std::back_inserter(r) ); | |
} | |
} | |
return r; | |
} | |
template< template< class... > class S, class X, class Y > | |
static S<Y> mdo( const S<X>& mx, const S<Y>& my ) { | |
// Note: This is not a strictly correct definition. | |
// It should return my concatenated to itself for every element of mx. | |
return mx.size() ? my : S<Y>{}; | |
} | |
template< class S, class X > | |
static S mreturn( X&& x ) { | |
return S{ std::forward<X>(x) }; // Construct an S of one element. | |
} | |
template< class S > | |
static S mfail() { return S{}; } | |
}; | |
struct ExprType { | |
virtual int eval() = 0; | |
}; | |
using Expr = std::unique_ptr<ExprType>; | |
template< class E > | |
Expr expr( E e ) { | |
return Expr( new E(std::move(e)) ); | |
} | |
template< class R, class ...E > | |
Expr expr( E&& ...e ) { | |
return Expr( new R(std::forward<E>(e)...) ); | |
} | |
struct SubExp : ExprType { | |
Expr expr; | |
SubExp( Expr e ) : expr(std::move(e)) { } | |
}; | |
struct Int : ExprType { | |
int x = 0; | |
constexpr Int( int x ) : x(x) { } | |
Int( const std::string& str ) { | |
std::istringstream iss( str ); | |
iss >> x; | |
if( not iss ) | |
throw "expected int, got " + str; | |
} | |
int eval() { return x; } | |
}; | |
template< class F > | |
struct BinOp : ExprType { | |
Expr left, right; | |
BinOp( Expr left, Expr right ) | |
: left(std::move(left)), right(std::move(right)) { } | |
int eval() { | |
return F()( left->eval(), right->eval() ); | |
} | |
}; | |
using Adder = BinOp< std::plus<int> >; | |
using Subtractor = BinOp< std::minus<int> >; | |
using Multiplier = BinOp< std::multiplies<int> >; | |
using Divider = BinOp< std::divides<int> >; | |
std::vector<std::string> words( const std::string& sentence ) { | |
auto it = sentence.begin(); | |
std::vector<std::string> ws = {""}; | |
while( it != sentence.end() ) { | |
if( *it != ' ' ) | |
ws.back().push_back( *it ); | |
else | |
ws.emplace_back( "" ); | |
it++; | |
} | |
return ws; | |
} | |
struct Token { | |
enum TokenClass { | |
NUMBER, OPERATOR, PARREN, NCLASSES | |
}; | |
static const char* classNames[NCLASSES]; | |
TokenClass type; | |
std::string lexeme; | |
Token( TokenClass c, std::string s ) : type(c), lexeme(std::move(s)) { } | |
}; | |
const char* Token::classNames[] = { | |
"Number", "Operator", "Parren" | |
}; | |
constexpr bool is_int( char c ) { | |
return c >= '0' and c <= '9'; | |
} | |
#include <sstream> | |
std::vector<Token> tokenize( const std::string& sentence ) { | |
std::vector<Token> ts; | |
auto it = std::begin( sentence ); | |
for( ; *it and it != std::end(sentence); it++ ) { | |
if( *it == ' ' ) | |
continue; | |
else if( *it == ')' ) | |
ts.emplace_back( Token::PARREN, ")" ); | |
else if( *it == '(' ) | |
ts.emplace_back( Token::PARREN, "(" ); | |
else if( *it == '+' ) | |
ts.emplace_back( Token::OPERATOR, "+" ); | |
else if( *it == '-' ) | |
ts.emplace_back( Token::OPERATOR, "-" ); | |
else if( *it == '*' ) | |
ts.emplace_back( Token::OPERATOR, "*" ); | |
else if( *it == '/' ) | |
ts.emplace_back( Token::OPERATOR, "/" ); | |
else if( is_int(*it) ) { | |
auto e = it; | |
for( ; *e and is_int(*e); e++ ) | |
; | |
ts.emplace_back( Token::NUMBER, std::string(it,e) ); | |
it = e - 1; | |
} else { | |
std::stringstream ss; | |
ss << "unparsable token: '" << *it << "' (" << (int)*it << ')'; | |
throw ss.str(); | |
} | |
} | |
return ts; | |
} | |
using Tokens = std::vector<Token>; | |
using TokIt = Tokens::const_iterator; | |
struct TokenRange { | |
TokIt b, e; | |
TokenRange( Tokens ts ) : b(ts.begin()), e(ts.end()) { } | |
TokenRange( TokIt b, TokIt e ) : b(b), e(e) { } | |
bool empty() { return b == e; } | |
size_t size() { return e - b; } | |
}; | |
TokenRange shrink( TokenRange r ) { | |
r.b++; | |
return r; | |
} | |
Expr parse( const Tokens& ); | |
Expr parse_num_state( TokenRange ); | |
Expr parse_raise_state( Expr&& e, TokenRange ); | |
std::pair<TokenRange,Expr> parse_mult( TokenRange r ); | |
Expr parse( const Tokens& ts ) { | |
return parse_mult( ts ).second; | |
} | |
using ParseState = std::pair<TokenRange,Expr>; | |
ParseState parse_paren( TokenRange r ) { | |
unsigned int nestCount = 1; | |
auto it = std::find_if ( | |
r.b, r.e, [&]( const Token& t ){ | |
if( t.lexeme == "(" ) | |
nestCount++; | |
else if( t.lexeme == ")" ) | |
nestCount--; | |
return nestCount == 0; | |
} | |
); | |
return std::make_pair ( | |
TokenRange( std::next(it), r.e ), | |
parse_num_state( TokenRange(r.b,it) ) | |
); | |
} | |
Expr parse_num_state( TokenRange r ) { | |
if( r.empty() ) | |
throw "nothing to parse"; | |
if( r.b->type == Token::PARREN ) { | |
if( r.b->lexeme == ")" ) | |
throw "unmatched closing parren"; | |
auto state = parse_paren( shrink(r) ); | |
return parse_raise_state ( | |
std::move( state.second ), | |
state.first | |
); | |
} | |
const std::string& i = r.b->lexeme; | |
return parse_raise_state( expr<Int>(i), shrink(r) ); | |
} | |
std::pair<TokenRange,Expr> parse_mult( TokenRange r ) { | |
if( r.empty() ) | |
throw "nothing to parse"; | |
if( r.b->type == Token::PARREN ) { | |
if( r.b->lexeme == ")" ) | |
throw "unmatched closing parren"; | |
auto state = parse_paren( shrink(r) ); | |
return std::make_pair ( | |
state.first, | |
parse_raise_state( std::move(state.second), state.first ) | |
); | |
} | |
return std::make_pair ( | |
shrink( r ), | |
expr<Int>( r.b->lexeme ) | |
); | |
} | |
Expr parse_raise_state( Expr&& e, TokenRange r ) { | |
if( r.empty() ) | |
return Expr( std::move(e) ); | |
if( r.b->lexeme == "+" ) { | |
auto state = parse_mult( shrink(r) ); | |
return expr< Adder > ( | |
std::move(e), | |
parse_raise_state( std::move(state.second), state.first ) | |
); | |
} | |
if( r.b->lexeme == "-" ) { | |
auto state = parse_mult( shrink(r) ); | |
return expr< Subtractor > ( | |
std::move(e), | |
parse_raise_state( std::move(state.second), state.first ) | |
); | |
} | |
if( r.b->lexeme == "*" ) { | |
r = shrink(r); | |
auto operand = parse_mult( r ); | |
return parse_raise_state ( | |
expr< Multiplier >( std::move(e), std::move(operand.second) ), | |
TokenRange( operand.first ) | |
); | |
} | |
if( r.b->lexeme == "/" ) { | |
r = shrink(r); | |
auto operand = parse_mult( r ); | |
return parse_raise_state ( | |
expr< Divider >( std::move(e), std::move(operand.second) ), | |
TokenRange( operand.first ) | |
); | |
} | |
throw "raise state cannot parse \"" + r.b->lexeme + '"'; | |
} | |
//Expr parse_mult( std::vector<Token> ts ) { | |
// Expr e; | |
// | |
// auto it = std::begin( ts ); | |
// for( ; it != std::end( ts ); it++ ) { | |
// if( it->lexeme == "*" ) { | |
// auto left = it-1, right = it+1; | |
// if( left < std::begin(ts) ) | |
// throw "multiplication requires left operand"; | |
// if( right >= std::end(ts) ) | |
// throw "multiplication requires right operand"; | |
// if( left->type != Token::NUMBER ) | |
// throw "left argument must be a number"; | |
// if( right->type != Token::NUMBER ) | |
// throw "right argument must be a number"; | |
// e = new Multiplier{ left, right }; | |
// it = ts.erase( left, right ); | |
// ts.insert( it, | |
// } | |
struct exp_tag {}; | |
template< class E > | |
auto category( const E& e ) -> decltype( e.eval() ); | |
static std::ostringstream oss; | |
template< class X > | |
static std::string show( const X& x ) { | |
oss.str( "" ); | |
oss << x; | |
return oss.str(); | |
} | |
static std::string show( std::string str ) { | |
return str; | |
} | |
static constexpr const char* show( const char* str ) { | |
return str; | |
} | |
template< class X, class Y, class ...Z > | |
static std::string show( const X& x, const Y& y, const Z& ...z ) | |
{ | |
return show(x) + show(y,z...); | |
} | |
std::ostream& operator << ( std::ostream& os, const Token& t ) { | |
os << '<' << Token::classNames[t.type] << ',' << t.lexeme << '>'; | |
return os; | |
} | |
template< class X, class Y > | |
std::ostream& operator << ( std::ostream& os, const std::pair<X,Y>& p ) { | |
os << '(' << p.first << ',' << p.second << ')'; | |
return os; | |
} | |
template< class X > | |
std::ostream& operator << ( std::ostream& os, const std::unique_ptr<X>& p ) { | |
if( p ) | |
os << "Just " << *p; | |
else | |
os << "Nothing"; | |
return os; | |
} | |
template< class X > | |
std::ostream& operator << ( std::ostream& os, const std::vector<X>& v ) { | |
os << '['; | |
for( auto it=std::begin(v); it != std::end(v); it++ ) { | |
os << *it; | |
if( std::next(it) != std::end(v) ) | |
os << ','; | |
} | |
os << ']'; | |
return os; | |
} | |
int main() { | |
try { | |
std::string input = "(((1-33) * (3+4/2)) + 5 + 5 - 0)"; | |
std::cout << "Input: " << input << std::endl; | |
auto ts = tokenize( input ); | |
std::cout << "Tokens: " << ts << std::endl; | |
auto tree = parse( ts ); | |
std::cout << "Evaluation: " << tree->eval() << std::endl; | |
} catch( const std::string& msg ) { | |
std::cout << "Parser exited unexpectedly with: " << msg << std::endl; | |
} catch( const char* const msg ) { | |
std::cout << "Parser exited unexpectedly with: " << msg << std::endl; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment