Created
March 10, 2021 10:58
-
-
Save brokenpylons/f94b87c2b5f593bdf35882adb6727d08 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include <iostream> | |
| #include <limits> | |
| #include <algorithm> | |
| #include <vector> | |
| #include <optional> | |
| #include <sstream> | |
| const int error = 0; | |
| const int ignore = -1; | |
| const auto alphabet_size = std::numeric_limits<char>::max(); | |
| class Automaton { | |
| private: | |
| int states; | |
| int start; | |
| int state; | |
| const int (*table)[alphabet_size]; | |
| const int *accept; | |
| public: | |
| Automaton(const int states, const int start, const int (*table)[alphabet_size], const int *accept) | |
| : states(states), start(start), state(start), table(table), accept(accept) {}; | |
| void reset(); | |
| void next(char c); | |
| void next(const std::string &s); | |
| bool test(char c) const; | |
| int match() const; | |
| int get_state() const; | |
| void print_table() const; | |
| }; | |
| void Automaton::reset() { | |
| state = start; | |
| } | |
| void Automaton::next(char c) { | |
| state = table[state][c]; | |
| } | |
| void Automaton::next(const std::string &s) { | |
| for (char c : s) { next(c); } | |
| } | |
| bool Automaton::test(char c) const { | |
| return table[state][c] != error; | |
| } | |
| int Automaton::match() const { | |
| return accept[state]; | |
| } | |
| int Automaton::get_state() const { | |
| return state; | |
| } | |
| void Automaton::print_table() const { | |
| for (size_t i = 0; i < states; i++) { | |
| for (size_t j = 0; j < alphabet_size; j++) { | |
| std::cout << table[i][j] << ","; | |
| } | |
| std::cout << "\n"; | |
| } | |
| } | |
| struct Token { | |
| int start_row, end_row, start_column, end_column; | |
| std::string lexeme; | |
| int value; | |
| bool eof; | |
| }; | |
| class Scanner { | |
| private: | |
| Automaton a; | |
| std::istream &is; | |
| int row = 1, column = 1; | |
| Token next_helper(); | |
| void update_pos(char); | |
| public: | |
| Scanner(const Automaton &a, std::istream &is) : a(a), is(is) {} | |
| Token next(); | |
| }; | |
| void Scanner::update_pos(char c) { | |
| if (c == '\n') { | |
| column++; | |
| row = 1; | |
| } else { | |
| row++; | |
| } | |
| } | |
| Token Scanner::next_helper() { | |
| a.reset(); | |
| char c; | |
| int start_row = row, start_column = column; | |
| std::string lexeme; | |
| while (is.peek() != std::char_traits<char>::eof() && a.test(is.peek()) && is.get(c)) { | |
| a.next(c); | |
| update_pos(c); | |
| lexeme.push_back(c); | |
| } | |
| return Token { | |
| start_row, | |
| row, | |
| start_column, | |
| column, | |
| lexeme, | |
| a.match(), | |
| is.eof() | |
| }; | |
| } | |
| Token Scanner::next() { | |
| Token token = next_helper(); | |
| if (token.value == ignore) { | |
| return next(); | |
| } | |
| return token; | |
| } | |
| std::vector<Token> scan(Scanner &s) { | |
| std::vector<Token> tokens; | |
| Token token; | |
| do { | |
| token = s.next(); | |
| if (token.value == error) break; | |
| tokens.push_back(token); | |
| } while (!token.eof); | |
| return tokens; | |
| } | |
| int main() { | |
| const int states = 5; | |
| enum value{error, keyword}; | |
| int table[states][alphabet_size] = {}; | |
| int accept[states] = {}; | |
| table[1]['a'] = 2; | |
| table[1]['e'] = 3; | |
| for (char c = '0'; c <= '9'; c++) { | |
| table[2][c] = 4; | |
| } | |
| for (char c = 'a'; c <= 'z'; c++) { | |
| table[3][c] = 4; | |
| } | |
| table[3]['b'] = 4; | |
| table[4]['d'] = 5; | |
| accept[5] = keyword; | |
| //accept[4] = ignore; | |
| Automaton a(states, 1, table, accept); | |
| a.print_table(); | |
| std::cout << a.get_state() << std::endl; | |
| a.next("a0d"); | |
| std::cout << a.get_state() << std::endl; | |
| std::cout << a.match() << std::endl; | |
| std::istringstream is("abcaaab"); | |
| Scanner scanner(a, is); | |
| const auto &tokens = scan(scanner); | |
| for (const auto &token : tokens) { | |
| std::cout << token.lexeme << std::endl; | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment