Last active
December 20, 2015 14:58
-
-
Save vittorioromeo/6150427 to your computer and use it in GitHub Desktop.
bleh, Json parser tests
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
int main() | |
{ | |
enum class TokenType | |
{ | |
BraceOpenCurly, | |
BraceCloseCurly, | |
String, | |
Number, | |
Whitespace, | |
Assign, | |
BraceOpenSquare, | |
BraceCloseSquare, | |
Comma, | |
True, | |
False, | |
Null | |
}; | |
auto tts = [](TokenType t) | |
{ | |
switch(t) | |
{ | |
case TokenType::BraceOpenCurly: return "BraceOpenCurly"; | |
case TokenType::BraceCloseCurly: return "BraceCloseCurly"; | |
case TokenType::String: return "String"; | |
case TokenType::Number: return "Number"; | |
case TokenType::Whitespace: return "Whitespace"; | |
case TokenType::Assign: return "Assign"; | |
case TokenType::BraceOpenSquare: return "BraceOpenSquare"; | |
case TokenType::BraceCloseSquare: return "BraceCloseSquare"; | |
case TokenType::Comma: return "Comma"; | |
case TokenType::True: return "True"; | |
case TokenType::False: return "False"; | |
case TokenType::Null: return "Null"; | |
} | |
return "NULLTKN"; | |
}; | |
vj::Lexer<TokenType> t; | |
t.createRule(TokenType::BraceOpenCurly, R"(\{)"); | |
t.createRule(TokenType::BraceCloseCurly, R"(\})"); | |
t.createRule(TokenType::BraceOpenSquare, R"(\[)"); | |
t.createRule(TokenType::BraceCloseSquare, R"(\])"); | |
t.createRule(TokenType::Assign, R"(\:)"); | |
t.createRule(TokenType::String, R"(".*")"); | |
t.createRule(TokenType::Number, R"([0-9]+(?:\.[0-9]*)?)"); | |
t.createRule(TokenType::Whitespace, R"([ \t\r\n])"); | |
t.createRule(TokenType::Comma, R"(\,)"); | |
t.createRule(TokenType::True, R"(true)"); | |
t.createRule(TokenType::False, R"(false)"); | |
t.createRule(TokenType::Null, R"(null)"); | |
for(const auto token : t.tokenize(R"({ "test_array": [1, 2, 3, 4], "test_null": null })")) | |
{ | |
if(token.type == TokenType::Whitespace) continue; | |
lo << left << std::setw(20) << tts(token.type) << std::setw(15) << token.str << endl; | |
} | |
enum class GrammarType | |
{ | |
BraceOpenCurly, | |
BraceCloseCurly, | |
String, | |
Number, | |
Whitespace, | |
Assign, | |
BraceOpenSquare, | |
BraceCloseSquare, | |
Comma, | |
True, | |
False, | |
Null, | |
Json, | |
Object, | |
Array, | |
Members, | |
Pair, | |
Elements, | |
Value | |
}; | |
vj::Parser<GrammarType, TokenType> p; | |
p.createRule(GrammarType::Json, {GrammarType::Object}); | |
p.createRule(GrammarType::Json, {GrammarType::Array}); | |
p.createRule(GrammarType::Object, {GrammarType::BraceOpenCurly, GrammarType::BraceCloseCurly}); | |
p.createRule(GrammarType::Object, {GrammarType::BraceOpenCurly, GrammarType::Members, GrammarType::BraceCloseCurly}); | |
p.createRule(GrammarType::Members, {GrammarType::Pair}); | |
p.createRule(GrammarType::Members, {GrammarType::Pair, GrammarType::Comma, GrammarType::Members}); | |
p.createRule(GrammarType::Pair, {GrammarType::String, GrammarType::Assign, GrammarType::Value}); | |
p.createRule(GrammarType::Array, {GrammarType::BraceOpenSquare, GrammarType::BraceCloseSquare}); | |
p.createRule(GrammarType::Array, {GrammarType::BraceOpenSquare, GrammarType::Elements, GrammarType::BraceCloseSquare}); | |
p.createRule(GrammarType::Elements, {GrammarType::Value}); | |
p.createRule(GrammarType::Elements, {GrammarType::Value, GrammarType::Comma, GrammarType::Elements}); | |
p.createRule(GrammarType::Value, {GrammarType::String}); | |
p.createRule(GrammarType::Value, {GrammarType::Number}); | |
p.createRule(GrammarType::Value, {GrammarType::Object}); | |
p.createRule(GrammarType::Value, {GrammarType::Array}); | |
p.createRule(GrammarType::Value, {GrammarType::True}); | |
p.createRule(GrammarType::Value, {GrammarType::False}); | |
p.createRule(GrammarType::Value, {GrammarType::Null}); | |
return 0; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifndef VEEJSON | |
#define VEEJSON | |
#include <string> | |
#include <vector> | |
#include <unordered_map> | |
#include <functional> | |
#include <boost/regex.hpp> | |
namespace ssvu | |
{ | |
namespace vj | |
{ | |
template<typename TTokenType> struct Token | |
{ | |
TTokenType type; std::string str; | |
Token(TTokenType mType, const std::string& mStr) : type{mType}, str{mStr} { } | |
}; | |
template<typename TTokenType> class Lexer | |
{ | |
private: | |
struct Rule { TTokenType type; boost::regex regex; }; | |
std::vector<Rule> rules; | |
public: | |
inline void createRule(TTokenType mTokenType, const std::string& mRegexString) | |
{ | |
Rule result; | |
result.type = mTokenType; | |
result.regex = boost::regex{mRegexString}; | |
rules.push_back(result); | |
} | |
inline std::vector<Token<TTokenType>> tokenize(const std::string& mString) | |
{ | |
std::vector<Token<TTokenType>> result; | |
auto itr = std::begin(mString); | |
auto start = itr; | |
while(itr != std::end(mString)) | |
{ | |
++itr; | |
for(auto& r : rules) | |
{ | |
if(!boost::regex_match(std::string{start, itr}, r.regex)) continue; | |
while(itr != std::end(mString) && boost::regex_match(std::string{start, itr + 1}, r.regex)) ++itr; | |
result.emplace_back(r.type, std::string{start, itr}); | |
start = itr; | |
break; | |
} | |
} | |
return result; | |
} | |
}; | |
template<typename TGrammarType, typename TTokenType> class Parser | |
{ | |
private: | |
struct Rule { TGrammarType from; std::vector<TGrammarType> to; }; | |
std::vector<Rule> rules; | |
public: | |
inline void createRule(TGrammarType mFrom, const std::initializer_list<TGrammarType>& mTo) | |
{ | |
Rule result; | |
result.from = mFrom; | |
result.to = mTo; | |
rules.push_back(result); | |
} | |
}; | |
} | |
} | |
#endif |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment