Created
March 15, 2021 20:47
-
-
Save brokenpylons/a8e53d4bb51493a395e7a34355bf9949 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include "Automaton.h" | |
| #include <iostream> | |
| void Automaton::reset() { | |
| state = start; | |
| } | |
| void Automaton::next(char c) { | |
| state = table[state][c]; | |
| } | |
| void Automaton::next(const std::string &s) { | |
| for (char c : s) { next(c); } | |
| } | |
| bool Automaton::test(char c) const { | |
| return table[state][c] != error_state; | |
| } | |
| int Automaton::match() const { | |
| return accept[state]; | |
| } | |
| int Automaton::get_state() const { | |
| return state; | |
| } | |
| void Automaton::print_table() const { | |
| for (size_t i = 0; i < states; i++) { | |
| for (size_t j = 0; j < alphabet_size; j++) { | |
| std::cout << table[i][j] << ","; | |
| } | |
| std::cout << "\n"; | |
| } | |
| } | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #ifndef PPJ_AUTOMATON_H | |
| #define PPJ_AUTOMATON_H | |
| #include <string> | |
| #include <limits> | |
| #include <vector> | |
| const int error_state = 0; | |
| const int error_value = 0; | |
| const int ignore_value = 1; | |
| class Automaton { | |
| public: | |
| const static int alphabet_size = std::numeric_limits<char>::max(); | |
| private: | |
| int states; | |
| int start; | |
| int state; | |
| std::vector<std::vector<int>> table; | |
| std::vector<int> accept; | |
| public: | |
| Automaton(const int states, const int start, const std::vector<std::vector<int>> &table, const std::vector<int> &accept) | |
| : states(states), start(start), state(start), table(table), accept(accept) {}; | |
| void reset(); | |
| void next(char c); | |
| void next(const std::string &s); | |
| bool test(char c) const; | |
| int match() const; | |
| int get_state() const; | |
| void print_table() const; | |
| }; | |
| #endif //PPJ_AUTOMATON_H |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include <iostream> | |
| #include <memory> | |
| #include <sstream> | |
| #include "Parser_ABC.h" | |
| int main() { | |
| auto is = std::make_unique<std::istringstream>("sd"); | |
| Parser_ABC parser; | |
| std::cout << parser.parse(std::move(is)) << std::endl; | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include "Parser_ABC.h" | |
| std::string value_to_string(int value) { | |
| switch (value) { | |
| case error_value: return "error"; | |
| case ignore_value: return "ignore"; | |
| case s: return "s"; | |
| case a: return "a"; | |
| case b: return "b"; | |
| case c: return "c"; | |
| case d: return "d"; | |
| } | |
| throw std::logic_error("Invalid value"); | |
| } | |
| Automaton Parser_ABC::make_automaton() { | |
| const int states = 7; | |
| std::vector<std::vector<int>> table(states, std::vector<int>(Automaton::alphabet_size, 0)); | |
| std::vector<int> accept(states, 0); | |
| table[1]['s'] = 2; | |
| table[1]['a'] = 3; | |
| table[1]['b'] = 4; | |
| table[1]['c'] = 5; | |
| table[1]['d'] = 6; | |
| accept[2] = s; | |
| accept[3] = a; | |
| accept[4] = b; | |
| accept[5] = c; | |
| accept[6] = d; | |
| return Automaton(states, 1, table, accept); | |
| } | |
| bool Parser_ABC::eof() { | |
| Token token = scanner->current(); | |
| return token.eof; | |
| } | |
| bool Parser_ABC::match(int value) { | |
| Token token = scanner->next(); | |
| return token.value == value; | |
| } | |
| /* | |
| S ::= s A B | C | |
| FIRST(s A B FOLLOW(S)) = {s} | |
| FIRST(C FOLLOW(S)) = {c} | |
| A ::= a | epsilon | |
| FIRST(a FOLLOW(A)) = {a} | |
| FIRST(epsilon FOLLOW(A)) = {b, d} | |
| B ::= b | |
| FIRST(b FOLLOW(B)) = {b} | |
| C ::= c A d | |
| FIRST(c FOLLOW(C)) = {c} | |
| */ | |
| bool Parser_ABC::parse(std::unique_ptr<std::istream> is) { | |
| scanner = Scanner(automaton, std::move(is)); | |
| return parse_S() && eof(); | |
| } | |
| bool Parser_ABC::parse_S() { | |
| Token token = scanner->current(); | |
| switch (token.value) { | |
| case s: | |
| return match(s) && parse_A() && parse_B(); | |
| case c: | |
| return parse_C(); | |
| } | |
| return false; | |
| } | |
| bool Parser_ABC::parse_A() { | |
| Token token = scanner->current(); | |
| switch (token.value) { | |
| case a: | |
| return match(a); | |
| case b: | |
| case d: | |
| return true; | |
| } | |
| return false; | |
| } | |
| bool Parser_ABC::parse_B() { | |
| Token token = scanner->current(); | |
| switch (token.value) { | |
| case b: | |
| return match(b); | |
| } | |
| return false; | |
| } | |
| bool Parser_ABC::parse_C() { | |
| Token token = scanner->current(); | |
| switch (token.value) { | |
| case c: | |
| return match(c) && parse_A() && match(d); | |
| } | |
| return false; | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #ifndef PPJ_PARSER_ABC_H | |
| #define PPJ_PARSER_ABC_H | |
| #include "Scanner.h" | |
| #include <optional> | |
| enum value {s = 2, a, b, c, d}; | |
| std::string value_to_string(int value); | |
| class Parser_ABC { | |
| private: | |
| Automaton automaton; | |
| std::optional<Scanner> scanner; | |
| static Automaton make_automaton(); | |
| bool parse_S(); | |
| bool parse_A(); | |
| bool parse_B(); | |
| bool parse_C(); | |
| public: | |
| Parser_ABC() : automaton(make_automaton()), scanner(std::nullopt) {}; | |
| bool eof(); | |
| bool match(int); | |
| bool parse(std::unique_ptr<std::istream> is); | |
| }; | |
| #endif //PPJ_PARSER_ABC_H |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include "Scanner.h" | |
| void Scanner::update_pos(char c) { | |
| if (c == '\n') { | |
| column++; | |
| row = 1; | |
| } else { | |
| row++; | |
| } | |
| } | |
| Token Scanner::next_helper() { | |
| a.reset(); | |
| char c; | |
| int start_row = row, start_column = column; | |
| std::string lexeme; | |
| while (is->get(c)) { | |
| if (!a.test(c)) { | |
| is->unget(); | |
| break; | |
| } | |
| a.next(c); | |
| update_pos(c); | |
| lexeme.push_back(c); | |
| } | |
| return Token { | |
| start_row, | |
| row, | |
| start_column, | |
| column, | |
| lexeme, | |
| a.match(), | |
| is->eof() | |
| }; | |
| } | |
| Token Scanner::next_ignore_helper() { | |
| Token token = next_helper(); | |
| if (token.value == ignore_value) { | |
| return next_ignore_helper(); | |
| } | |
| return token; | |
| } | |
| Token Scanner::next() { | |
| Token token = *buffer; | |
| buffer = next_ignore_helper(); | |
| return token; | |
| } | |
| Token Scanner::current() { | |
| if (!buffer) { | |
| buffer = next_ignore_helper(); | |
| } | |
| return *buffer; | |
| } | |
| std::vector<Token> scan(Scanner &s) { | |
| std::vector<Token> tokens; | |
| Token token; | |
| do { | |
| token = s.next(); | |
| if (token.value == error_value) break; | |
| tokens.push_back(token); | |
| } while (!token.eof); | |
| return tokens; | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #ifndef PPJ_SCANNER_H | |
| #define PPJ_SCANNER_H | |
| #include "Automaton.h" | |
| #include <string> | |
| #include <iostream> | |
| #include <vector> | |
| #include <optional> | |
| #include <memory> | |
| struct Token { | |
| int start_row, end_row, start_column, end_column; | |
| std::string lexeme; | |
| int value; | |
| bool eof; | |
| }; | |
| class Scanner { | |
| private: | |
| Automaton a; | |
| std::unique_ptr<std::istream> is; | |
| int row = 1, column = 1; | |
| Token next_helper(); | |
| Token next_ignore_helper(); | |
| void update_pos(char); | |
| std::optional<Token> buffer; | |
| public: | |
| Scanner(const Automaton &a, std::unique_ptr<std::istream> is) : a(a), is(std::move(is)), buffer(std::nullopt) {} | |
| Token next(); | |
| Token current(); | |
| }; | |
| std::vector<Token> scan(Scanner &); | |
| #endif //PPJ_SCANNER_H |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment