Created
August 22, 2019 06:09
-
-
Save GeeLaw/351fade372ae059ccf1fa83746068fb5 to your computer and use it in GitHub Desktop.
Creates a regular expression that accepts strings of length at least 15 that have a digit, a lowercase letter, an uppercase letter and a special character.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// gnfa.cpp | |
// Copyright 2019 by Gee Law. | |
// Forked from https://gist.github.com/GeeLaw/be3aec94a6ba7c3817ef2e16d261f616 | |
// Licensed under the MIT license. | |
#include<cstdio> | |
#include<vector> | |
#include<map> | |
template <typename TDerived, typename TBase> | |
bool is(TBase *baseptr) | |
{ | |
return dynamic_cast<TDerived *>(baseptr) != nullptr; | |
} | |
template <typename TDerived, typename TBase> | |
TDerived *as(TBase *baseptr) | |
{ | |
return dynamic_cast<TDerived *>(baseptr); | |
} | |
struct RegExp | |
{ | |
virtual ~RegExp() { } | |
virtual void FPrint(FILE *fp) = 0; | |
virtual RegExp *Clone() = 0; | |
protected: | |
RegExp() = default; | |
RegExp(RegExp const &) = delete; | |
RegExp(RegExp &&) = delete; | |
RegExp &operator = (RegExp const &) = delete; | |
RegExp &operator = (RegExp &&) = delete; | |
}; | |
struct Epsilon; | |
struct Character; | |
struct Concat; | |
struct Choice; | |
struct KleeneStar; | |
struct MultipleChoice; | |
struct Epsilon : RegExp | |
{ | |
Epsilon() | |
{ | |
} | |
RegExp *Clone() | |
{ | |
return new Epsilon(); | |
} | |
~Epsilon() | |
{ | |
} | |
void FPrint(FILE *fp) | |
{ | |
std::fputc('(', fp); | |
std::fputc(')', fp); | |
} | |
}; | |
struct Character : RegExp | |
{ | |
Character(char c) | |
: ch(c) | |
{ | |
} | |
RegExp *Clone() | |
{ | |
return new Character(ch); | |
} | |
~Character() | |
{ | |
} | |
void FPrint(FILE *fp) | |
{ | |
if (ch == '\\' || ch == '|' || | |
ch == '(' || ch == ')' || | |
ch == '*' || ch == '.' || | |
ch == '[' || ch == ']' || | |
ch == '+' || ch == '?' || | |
ch == '/' || | |
ch == '{' || ch == '}' || | |
ch == '^' || ch == '$') | |
{ | |
std::fputc('\\', fp); | |
} | |
std::fputc(ch, fp); | |
} | |
private: | |
char ch; | |
}; | |
struct Concat : RegExp | |
{ | |
Concat(RegExp *f, RegExp *s) | |
: first(f), second(s) | |
{ | |
} | |
RegExp *Clone() | |
{ | |
return new Concat(first->Clone(), second->Clone()); | |
} | |
~Concat() | |
{ | |
delete first; | |
delete second; | |
} | |
void FPrint(FILE *fp) | |
{ | |
auto const firstParen = is<Choice>(first); | |
auto const secondParen = is<Choice>(second); | |
if (firstParen) | |
{ | |
std::fputc('(', fp); | |
} | |
first->FPrint(fp); | |
if (firstParen) | |
{ | |
std::fputc(')', fp); | |
} | |
if (secondParen) | |
{ | |
std::fputc('(', fp); | |
} | |
second->FPrint(fp); | |
if (secondParen) | |
{ | |
std::fputc(')', fp); | |
} | |
} | |
private: | |
RegExp *first, *second; | |
}; | |
struct Choice : RegExp | |
{ | |
Choice(RegExp *f, RegExp *s) | |
: first(f), second(s) | |
{ | |
} | |
RegExp *Clone() | |
{ | |
return new Choice(first->Clone(), second->Clone()); | |
} | |
~Choice() | |
{ | |
delete first; | |
delete second; | |
} | |
void FPrint(FILE *fp) | |
{ | |
first->FPrint(fp); | |
std::fputc('|', fp); | |
second->FPrint(fp); | |
} | |
private: | |
RegExp *first, *second; | |
}; | |
struct KleeneStar : RegExp | |
{ | |
KleeneStar(RegExp *u) | |
: unit(u) | |
{ | |
} | |
RegExp *Clone() | |
{ | |
return new KleeneStar(unit->Clone()); | |
} | |
~KleeneStar() | |
{ | |
delete unit; | |
} | |
void FPrint(FILE *fp) | |
{ | |
auto needParen = !is<Character>(unit) | |
&& !is<MultipleChoice>(unit); | |
if (needParen) | |
{ | |
fputc('(', fp); | |
} | |
unit->FPrint(fp); | |
if (needParen) | |
{ | |
fputc(')', fp); | |
} | |
fputc('*', fp); | |
} | |
private: | |
RegExp *unit; | |
}; | |
struct MultipleChoice : RegExp | |
{ | |
MultipleChoice(char const *ch) | |
: inside(ch) | |
{ | |
} | |
RegExp *Clone() | |
{ | |
return new MultipleChoice(inside); | |
} | |
~MultipleChoice() | |
{ | |
} | |
void FPrint(FILE *fp) | |
{ | |
fputc('[', fp); | |
fputs(inside, fp); | |
fputc(']', fp); | |
} | |
private: | |
char const *inside; | |
}; | |
// Factory constructors that take care of object ownership. | |
RegExp *MakeEpsilon() | |
{ | |
return new Epsilon(); | |
} | |
RegExp *MakeCharacter(char ch) | |
{ | |
return new Character(ch); | |
} | |
RegExp *MakeConcat(RegExp *first, RegExp *second) | |
{ | |
if (first == nullptr || second == nullptr) | |
{ | |
delete first; | |
delete second; | |
return nullptr; | |
} | |
if (is<Epsilon>(first)) | |
{ | |
delete first; | |
return second; | |
} | |
if (is<Epsilon>(second)) | |
{ | |
delete second; | |
return first; | |
} | |
return new Concat(first, second); | |
} | |
RegExp *MakeChoice(RegExp *first, RegExp *second) | |
{ | |
if (first == nullptr) | |
{ | |
return second; | |
} | |
if (second == nullptr) | |
{ | |
return first; | |
} | |
return new Choice(first, second); | |
} | |
RegExp *MakeKleeneStar(RegExp *unit) | |
{ | |
if (unit == nullptr) | |
{ | |
return MakeEpsilon(); | |
} | |
if (is<Epsilon>(unit) || is<KleeneStar>(unit)) | |
{ | |
return unit; | |
} | |
return new KleeneStar(unit); | |
} | |
RegExp *MakeMultipleChoice(char const *inside) | |
{ | |
if (inside == nullptr || *inside == '\0') | |
{ | |
return MakeEpsilon(); | |
} | |
return new MultipleChoice(inside); | |
} | |
RegExp *MakeClone(RegExp *rgx) | |
{ | |
return rgx == nullptr ? nullptr : rgx->Clone(); | |
} | |
// state 0 = initial | |
// state 1 = accept | |
typedef std::vector<std::map<size_t, RegExp *> > GNFA; | |
void ReduceGNFA(GNFA &graph) | |
{ | |
for (size_t sz = graph.size(); sz > 2; --sz) | |
{ | |
auto const t = sz - 1; | |
RegExp *ttStar = nullptr; | |
// Process the self-loop. | |
{ | |
RegExp *&tt = graph[t][t]; | |
tt = MakeKleeneStar(tt); | |
ttStar = tt; | |
} | |
for (size_t i = 0; i != t; ++i) | |
{ | |
RegExp *it = nullptr; | |
// Skip if there is no i->t. | |
{ | |
auto const find_it = graph[i].find(t); | |
if (find_it == graph[i].end()) | |
{ | |
continue; | |
} | |
it = find_it->second; | |
if (it == nullptr) | |
{ | |
continue; | |
} | |
} | |
for (size_t j = 0; j != t; ++j) | |
{ | |
RegExp *tj = nullptr; | |
// Skip if there is no t->j. | |
{ | |
auto const find_tj = graph[t].find(j); | |
if (find_tj == graph[t].end()) | |
{ | |
continue; | |
} | |
tj = find_tj->second; | |
if (tj == nullptr) | |
{ | |
continue; | |
} | |
} | |
RegExp *&ij = graph[i][j]; | |
// Compose new i->j as it(tt)*tj|ij. | |
// ij is not cloned because their ownerships are taken care of. | |
// it, tt, tj must be cloned because they might be used later. | |
ij = MakeChoice( | |
MakeConcat(it->Clone(), MakeConcat(ttStar->Clone(), tj->Clone())), | |
ij | |
); | |
} | |
} | |
// Clean up graph[t]. | |
for (auto const &kv : graph[t]) | |
{ | |
delete kv.second; | |
} | |
graph.pop_back(); | |
} | |
} | |
RegExp *GetRegex(GNFA &gnfa) | |
{ | |
ReduceGNFA(gnfa); | |
// The accepting expression is | |
// (00)*(01)((11)*(10)(00)*(01))*(11)*. | |
auto const r00 = gnfa[0][0]; | |
auto const r01 = gnfa[0][1]; | |
auto const r10 = gnfa[1][0]; | |
auto const r11 = gnfa[1][1]; | |
return MakeConcat( | |
// (00)*(01) | |
MakeConcat(MakeKleeneStar(MakeClone(r00)), MakeClone(r01)), | |
// ((11)*(10)(00)*(01))*(11)* | |
MakeConcat( | |
// ((11)*(10)(00)*(01))* | |
MakeKleeneStar(MakeConcat( | |
// (11)*(10) | |
MakeConcat(MakeKleeneStar(MakeClone(r11)), MakeClone(r10)), | |
// (00)*(01) | |
MakeConcat(MakeKleeneStar(MakeClone(r00)), MakeClone(r01)) | |
)), | |
// (11)* | |
MakeKleeneStar(MakeClone(r11)) | |
) | |
); | |
} | |
void DestoryGNFA(GNFA &gnfa) | |
{ | |
for (auto const &i : gnfa) | |
{ | |
for (auto const &j : i) | |
{ | |
delete j.second; | |
} | |
} | |
gnfa.clear(); | |
} | |
size_t swap1(size_t st) | |
{ | |
return (st == 1 | |
? (15 << 4) | 15 | |
: st == ((15 << 4) | 15) | |
? 1 | |
: st); | |
} | |
size_t GetStateId(size_t len, | |
bool lower, bool upper, bool digit, bool spchr) | |
{ | |
return swap1((len << 4) | | |
(lower ? 8 : 0) | | |
(upper ? 4 : 0) | | |
(digit ? 2 : 0) | | |
(spchr ? 1 : 0)); | |
} | |
size_t GetStateLen(size_t st) | |
{ | |
return swap1(st) >> 4; | |
} | |
bool GetStateLower(size_t st) | |
{ | |
return (bool)(swap1(st) & 8); | |
} | |
bool GetStateUpper(size_t st) | |
{ | |
return (bool)(swap1(st) & 4); | |
} | |
bool GetStateDigit(size_t st) | |
{ | |
return (bool)(swap1(st) & 2); | |
} | |
bool GetStateSpChr(size_t st) | |
{ | |
return (bool)(swap1(st) & 1); | |
} | |
int main() | |
{ | |
GNFA gnfa; | |
gnfa.resize(16 * 2 * 2 * 2 * 2); | |
char const *digit = "0-9"; | |
char const *lower = "a-z"; | |
char const *upper = "A-Z"; | |
char const *spchr = "!-\\/:-@\\[-`{-~"; | |
for (size_t len = 0; len != 16; ++len) | |
{ | |
size_t const nextlen = (len != 15 ? len + 1 : 15); | |
for (int st_digit = 0; st_digit != 2; ++st_digit) | |
for (int st_lower = 0; st_lower != 2; ++st_lower) | |
for (int st_upper = 0; st_upper != 2; ++st_upper) | |
for (int st_spchr = 0; st_spchr != 2; ++st_spchr) | |
{ | |
// Create digit transitions | |
{ | |
RegExp *&sj = gnfa[GetStateId(len, | |
!!st_digit, !!st_lower, | |
!!st_upper, !!st_spchr)][GetStateId(nextlen, | |
true, !!st_lower, | |
!!st_upper, !!st_spchr)]; | |
sj = MakeChoice(sj, MakeMultipleChoice(digit)); | |
} | |
// Create lower transitions | |
{ | |
RegExp *&sj = gnfa[GetStateId(len, | |
!!st_digit, !!st_lower, | |
!!st_upper, !!st_spchr)][GetStateId(nextlen, | |
!!st_digit, true, | |
!!st_upper, !!st_spchr)]; | |
sj = MakeChoice(sj, MakeMultipleChoice(lower)); | |
} | |
// Create upper transitions | |
{ | |
RegExp *&sj = gnfa[GetStateId(len, | |
!!st_digit, !!st_lower, | |
!!st_upper, !!st_spchr)][GetStateId(nextlen, | |
!!st_digit, !!st_lower, | |
true, !!st_spchr)]; | |
sj = MakeChoice(sj, MakeMultipleChoice(upper)); | |
} | |
// Create spchr transitions | |
{ | |
RegExp *&sj = gnfa[GetStateId(len, | |
!!st_digit, !!st_lower, | |
!!st_upper, !!st_spchr)][GetStateId(nextlen, | |
!!st_digit, !!st_lower, | |
!!st_upper, true)]; | |
sj = MakeChoice(sj, MakeMultipleChoice(spchr)); | |
} | |
} | |
} | |
auto const rgx = GetRegex(gnfa); | |
DestoryGNFA(gnfa); | |
rgx->FPrint(stdout); | |
delete rgx; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment