Skip to content

Instantly share code, notes, and snippets.

@zhenghaoz
Last active October 24, 2016 08:18
Show Gist options
  • Save zhenghaoz/435706b11c192f0358f43ef9b515ef5c to your computer and use it in GitHub Desktop.
Save zhenghaoz/435706b11c192f0358f43ef9b515ef5c to your computer and use it in GitHub Desktop.
Regexp to NFA(LaTeX)
#include <iostream>
#include <cstdio>
#include <vector>
#include <string>
#include <stack>
#include <cstdlib>
#include <ctime>
#include <queue>
using namespace std;
// #define NFA_DEBUG
#define INPUT_EP -64
#define STATE_INIT 0x1
#define STATE_ACCEPT 0x2
#define MIN(a,b) ((a)<(b)?(a):(b))
char header[] = "\\documentclass{article}\n\
\\usepackage{pgf}\n\
\\usepackage{tikz}\n\
\\usetikzlibrary{arrows,automata}\n\
\\usepackage[latin1]{inputenc}\n\
\\usepackage{geometry}\n\
\\geometry{papersize={32cm,32cm}}\n\
\\begin{document}\n\
\\begin{tikzpicture}[->,>=stealth',shorten >=1pt,auto,node distance=2.8cm,semithick]\n";
char footer[] = "\\end{tikzpicture}\n\\end{document}\n";
int stateId = 0; // 记录当前可分配的状态ID
// 节点数据结构
struct State
{
int tag; // 访问标记(用于访问)
int attr; // 状态属性(起始?终止?)
int id; // 状态ID
vector<char> inputs; // 进入后继状态需要的输入
vector<State*> nexts; // 后继状态指针
State(): attr(0), tag(-1), id(stateId++) {}
// 增加一条状态转换
void addNextState(char input, State *next)
{
inputs.push_back(input);
nexts.push_back(next);
}
};
// NFA数据结构
struct NFA
{
State *in; // 入口
State *out; // 出口
int left; // 对应子表达式的偏移量
NFA() = default;
NFA(State *in, State *out, int left): in(in), out(out), left(left) {}
};
// 打印状态
// 参数
// out :输出流
// node :状态指针
// lastid :上个状态的ID
// 返回 :输出流
ostream &printState(ostream& out, State *node, int& lastid)
{
cout << "\\node[state";
if (node->attr & STATE_INIT) cout << ",initial";
if (node->attr & STATE_ACCEPT) cout << ",accepting";
cout << "](" << node->id << ")";
if (lastid != -1) cout << "[below right of=" << lastid << "]";
lastid = node->id;
cout << "{$" << node->id << "$};" << endl;
return out;
}
// 打印NFA
// 参数
// out :输出流
// nfa :NFA
// 返回 :输出流
ostream &operator<<(ostream& out, const NFA &nfa)
{
cout << header;
int tag = rand();
int lastid = -1;
queue<State*> q;
q.push(nfa.in);
nfa.in->tag = tag;
printState(out, nfa.in, lastid);
while (!q.empty()) {
State *node = q.front();
q.pop();
for (int i = 0; i < node->nexts.size(); i++) {
char input = node->inputs[i];
State *next = node->nexts[i];
if (next->tag != tag) {
next->tag = tag;
printState(out, next, lastid);
q.push(next);
}
cout << "\\path(" << node->id << ") edge[bend right] node{$";
if (input == INPUT_EP) cout << "\\epsilon";
else cout << input;
cout << "$}(" << next->id << ");" << endl;
}
}
cout << footer << endl;
return out;
}
// 编译字符
// 参数
// exp :正则表达式字符串
// i :字符所在位置
// 返回值 :NFA
NFA compile(const string &exp, int i)
{
State *in = new State();
State *out = new State();
#ifdef NFA_DEBUG
clog << "compile " << in->id << "-" << exp[i] << "->" << out->id << endl;
#endif
in->addNextState(exp[i], out);
return NFA(in, out, i);
}
// 串联NFA
// 参数
// lhs :在前的NFA
// rhs :在后的NFA
// 返回值 :NFA
NFA cascade(const NFA &lhs, const NFA &rhs)
{
#ifdef NFA_DEBUG
clog << "cascade " << lhs.in->id << "->" << lhs.out->id << " and " << rhs.in->id << "->" << rhs.out->id << endl;
#endif
lhs.out->inputs = rhs.in->inputs;
lhs.out->nexts = rhs.in->nexts;
delete rhs.in;
return NFA(lhs.in, rhs.out, lhs.left);
}
// 并联NFA
// 参数
// lhs :NFA1
// rhs :NFA2
// 返回值 :NFA
NFA multiple(const NFA &lhs, const NFA &rhs)
{
#ifdef NFA_DEBUG
clog << "multiple " << lhs.in->id << "->" << lhs.out->id << " and " << rhs.in->id << "->" << rhs.out->id << endl;
#endif
State *in = new State();
State *out = new State();
in->addNextState(INPUT_EP, lhs.in);
in->addNextState(INPUT_EP, rhs.in);
lhs.out->addNextState(INPUT_EP, out);
rhs.out->addNextState(INPUT_EP, out);
return NFA(in, out, MIN(lhs.left, rhs.left));
}
// 重复NFA
// 参数
// nfa :需要重复的NFA
// 返回值 :NFA
NFA repeat(const NFA &nfa)
{
State *in = new State();
State *out = new State();
in->addNextState(INPUT_EP, nfa.in);
in->addNextState(INPUT_EP, out);
nfa.out->addNextState(INPUT_EP, nfa.in);
nfa.out->addNextState(INPUT_EP, out);
return NFA(in, out, nfa.left);
}
// 自动串联NFA栈中符合条件的NFA
// 参数
// nfaStack :NFA栈
// opstack :符号栈
void autoCascade(stack<NFA> &nfaStack, stack<int> &opStack, const string &exp, int next)
{
// 下一个字符不应该是*
if (next < exp.length() && exp[next] == '*')
return;
if (nfaStack.size() >= 2 &&
(opStack.empty() || opStack.top()+1 < nfaStack.top().left)) {
#ifdef NFA_DEBUG
clog << "autoCascade ";
if (opStack.empty()) clog << "empty opstack";
else clog << opStack.top()+1 << "!=" << nfaStack.top().left;
clog << endl;
#endif
NFA nfa2 = nfaStack.top();
nfaStack.pop();
NFA nfa1 = nfaStack.top();
nfaStack.pop();
nfaStack.push(cascade(nfa1, nfa2));
}
}
// 自动并联NFA栈中符合条件的NFA
// 参数
// nfaStack :NFA栈
// opstack :符号栈
void autoMultiple(stack<NFA> &nfaStack, stack<int> &opStack, const string &exp)
{
while (!opStack.empty() && exp[opStack.top()] == '|') {
NFA nfa1 = nfaStack.top();
nfaStack.pop();
NFA nfa2 = nfaStack.top();
nfaStack.pop();
opStack.pop();
nfaStack.push(multiple(nfa1, nfa2));
}
}
// 编译正则表达式
// 参数
// exp :正则表达式字符串
// 返回值 :NFA
NFA compile(const string &exp)
{
stack<NFA> nfaStack; // NFA栈
stack<int> opStack; // 操作符栈
for (int i = 0; i < exp.length(); i++) {
switch (exp[i]) {
case '|':
case '(':
// 显显式操作符入栈
opStack.push(i);
break;
case ')':
// 应用左括号前的所有的操作符
autoMultiple(nfaStack, opStack, exp);
nfaStack.top().left = opStack.top();
opStack.pop();
break;
case '*': {
// 重复栈顶NFA
NFA nfa = nfaStack.top();
nfaStack.pop();
nfaStack.push(repeat(nfa));
break;
} default:
// 生成单字符NFA
nfaStack.push(compile(exp, i));
}
// 自动并联NFA栈中符合条件的NFA
autoCascade(nfaStack, opStack, exp, i+1);
}
// 应用所有剩余操作符
autoMultiple(nfaStack, opStack, exp);
// 添加起始状态和接受状态标记
if (!nfaStack.empty()) {
NFA nfa = nfaStack.top();
nfa.in->attr |= STATE_INIT;
nfa.out->attr |= STATE_ACCEPT;
return nfa;
}
// 正则表达式为空
State* state = new State();
state->attr |= STATE_ACCEPT | STATE_ACCEPT;
return NFA(state, state, 0);
}
int main(int argc, char const *argv[])
{
string exp(argv[1]);
cout << compile(exp);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment