Skip to content

Instantly share code, notes, and snippets.

@brokenpylons
Created March 10, 2021 10:58
Show Gist options
  • Select an option

  • Save brokenpylons/f94b87c2b5f593bdf35882adb6727d08 to your computer and use it in GitHub Desktop.

Select an option

Save brokenpylons/f94b87c2b5f593bdf35882adb6727d08 to your computer and use it in GitHub Desktop.
#include <iostream>
#include <limits>
#include <algorithm>
#include <vector>
#include <optional>
#include <sstream>
const int error = 0;
const int ignore = -1;
const auto alphabet_size = std::numeric_limits<char>::max();
class Automaton {
private:
int states;
int start;
int state;
const int (*table)[alphabet_size];
const int *accept;
public:
Automaton(const int states, const int start, const int (*table)[alphabet_size], const int *accept)
: states(states), start(start), state(start), table(table), accept(accept) {};
void reset();
void next(char c);
void next(const std::string &s);
bool test(char c) const;
int match() const;
int get_state() const;
void print_table() const;
};
void Automaton::reset() {
state = start;
}
void Automaton::next(char c) {
state = table[state][c];
}
void Automaton::next(const std::string &s) {
for (char c : s) { next(c); }
}
bool Automaton::test(char c) const {
return table[state][c] != error;
}
int Automaton::match() const {
return accept[state];
}
int Automaton::get_state() const {
return state;
}
void Automaton::print_table() const {
for (size_t i = 0; i < states; i++) {
for (size_t j = 0; j < alphabet_size; j++) {
std::cout << table[i][j] << ",";
}
std::cout << "\n";
}
}
struct Token {
int start_row, end_row, start_column, end_column;
std::string lexeme;
int value;
bool eof;
};
class Scanner {
private:
Automaton a;
std::istream &is;
int row = 1, column = 1;
Token next_helper();
void update_pos(char);
public:
Scanner(const Automaton &a, std::istream &is) : a(a), is(is) {}
Token next();
};
void Scanner::update_pos(char c) {
if (c == '\n') {
column++;
row = 1;
} else {
row++;
}
}
Token Scanner::next_helper() {
a.reset();
char c;
int start_row = row, start_column = column;
std::string lexeme;
while (is.peek() != std::char_traits<char>::eof() && a.test(is.peek()) && is.get(c)) {
a.next(c);
update_pos(c);
lexeme.push_back(c);
}
return Token {
start_row,
row,
start_column,
column,
lexeme,
a.match(),
is.eof()
};
}
Token Scanner::next() {
Token token = next_helper();
if (token.value == ignore) {
return next();
}
return token;
}
std::vector<Token> scan(Scanner &s) {
std::vector<Token> tokens;
Token token;
do {
token = s.next();
if (token.value == error) break;
tokens.push_back(token);
} while (!token.eof);
return tokens;
}
int main() {
const int states = 5;
enum value{error, keyword};
int table[states][alphabet_size] = {};
int accept[states] = {};
table[1]['a'] = 2;
table[1]['e'] = 3;
for (char c = '0'; c <= '9'; c++) {
table[2][c] = 4;
}
for (char c = 'a'; c <= 'z'; c++) {
table[3][c] = 4;
}
table[3]['b'] = 4;
table[4]['d'] = 5;
accept[5] = keyword;
//accept[4] = ignore;
Automaton a(states, 1, table, accept);
a.print_table();
std::cout << a.get_state() << std::endl;
a.next("a0d");
std::cout << a.get_state() << std::endl;
std::cout << a.match() << std::endl;
std::istringstream is("abcaaab");
Scanner scanner(a, is);
const auto &tokens = scan(scanner);
for (const auto &token : tokens) {
std::cout << token.lexeme << std::endl;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment