Skip to content

Instantly share code, notes, and snippets.

@GeeLaw
Created August 22, 2019 06:09
Show Gist options
  • Save GeeLaw/351fade372ae059ccf1fa83746068fb5 to your computer and use it in GitHub Desktop.
Save GeeLaw/351fade372ae059ccf1fa83746068fb5 to your computer and use it in GitHub Desktop.
Creates a regular expression that accepts strings of length at least 15 that have a digit, a lowercase letter, an uppercase letter and a special character.
// gnfa.cpp
// Copyright 2019 by Gee Law.
// Forked from https://gist.github.com/GeeLaw/be3aec94a6ba7c3817ef2e16d261f616
// Licensed under the MIT license.
#include<cstdio>
#include<vector>
#include<map>
template <typename TDerived, typename TBase>
bool is(TBase *baseptr)
{
return dynamic_cast<TDerived *>(baseptr) != nullptr;
}
template <typename TDerived, typename TBase>
TDerived *as(TBase *baseptr)
{
return dynamic_cast<TDerived *>(baseptr);
}
struct RegExp
{
virtual ~RegExp() { }
virtual void FPrint(FILE *fp) = 0;
virtual RegExp *Clone() = 0;
protected:
RegExp() = default;
RegExp(RegExp const &) = delete;
RegExp(RegExp &&) = delete;
RegExp &operator = (RegExp const &) = delete;
RegExp &operator = (RegExp &&) = delete;
};
struct Epsilon;
struct Character;
struct Concat;
struct Choice;
struct KleeneStar;
struct MultipleChoice;
struct Epsilon : RegExp
{
Epsilon()
{
}
RegExp *Clone()
{
return new Epsilon();
}
~Epsilon()
{
}
void FPrint(FILE *fp)
{
std::fputc('(', fp);
std::fputc(')', fp);
}
};
struct Character : RegExp
{
Character(char c)
: ch(c)
{
}
RegExp *Clone()
{
return new Character(ch);
}
~Character()
{
}
void FPrint(FILE *fp)
{
if (ch == '\\' || ch == '|' ||
ch == '(' || ch == ')' ||
ch == '*' || ch == '.' ||
ch == '[' || ch == ']' ||
ch == '+' || ch == '?' ||
ch == '/' ||
ch == '{' || ch == '}' ||
ch == '^' || ch == '$')
{
std::fputc('\\', fp);
}
std::fputc(ch, fp);
}
private:
char ch;
};
struct Concat : RegExp
{
Concat(RegExp *f, RegExp *s)
: first(f), second(s)
{
}
RegExp *Clone()
{
return new Concat(first->Clone(), second->Clone());
}
~Concat()
{
delete first;
delete second;
}
void FPrint(FILE *fp)
{
auto const firstParen = is<Choice>(first);
auto const secondParen = is<Choice>(second);
if (firstParen)
{
std::fputc('(', fp);
}
first->FPrint(fp);
if (firstParen)
{
std::fputc(')', fp);
}
if (secondParen)
{
std::fputc('(', fp);
}
second->FPrint(fp);
if (secondParen)
{
std::fputc(')', fp);
}
}
private:
RegExp *first, *second;
};
struct Choice : RegExp
{
Choice(RegExp *f, RegExp *s)
: first(f), second(s)
{
}
RegExp *Clone()
{
return new Choice(first->Clone(), second->Clone());
}
~Choice()
{
delete first;
delete second;
}
void FPrint(FILE *fp)
{
first->FPrint(fp);
std::fputc('|', fp);
second->FPrint(fp);
}
private:
RegExp *first, *second;
};
struct KleeneStar : RegExp
{
KleeneStar(RegExp *u)
: unit(u)
{
}
RegExp *Clone()
{
return new KleeneStar(unit->Clone());
}
~KleeneStar()
{
delete unit;
}
void FPrint(FILE *fp)
{
auto needParen = !is<Character>(unit)
&& !is<MultipleChoice>(unit);
if (needParen)
{
fputc('(', fp);
}
unit->FPrint(fp);
if (needParen)
{
fputc(')', fp);
}
fputc('*', fp);
}
private:
RegExp *unit;
};
struct MultipleChoice : RegExp
{
MultipleChoice(char const *ch)
: inside(ch)
{
}
RegExp *Clone()
{
return new MultipleChoice(inside);
}
~MultipleChoice()
{
}
void FPrint(FILE *fp)
{
fputc('[', fp);
fputs(inside, fp);
fputc(']', fp);
}
private:
char const *inside;
};
// Factory constructors that take care of object ownership.
RegExp *MakeEpsilon()
{
return new Epsilon();
}
RegExp *MakeCharacter(char ch)
{
return new Character(ch);
}
RegExp *MakeConcat(RegExp *first, RegExp *second)
{
if (first == nullptr || second == nullptr)
{
delete first;
delete second;
return nullptr;
}
if (is<Epsilon>(first))
{
delete first;
return second;
}
if (is<Epsilon>(second))
{
delete second;
return first;
}
return new Concat(first, second);
}
RegExp *MakeChoice(RegExp *first, RegExp *second)
{
if (first == nullptr)
{
return second;
}
if (second == nullptr)
{
return first;
}
return new Choice(first, second);
}
RegExp *MakeKleeneStar(RegExp *unit)
{
if (unit == nullptr)
{
return MakeEpsilon();
}
if (is<Epsilon>(unit) || is<KleeneStar>(unit))
{
return unit;
}
return new KleeneStar(unit);
}
RegExp *MakeMultipleChoice(char const *inside)
{
if (inside == nullptr || *inside == '\0')
{
return MakeEpsilon();
}
return new MultipleChoice(inside);
}
RegExp *MakeClone(RegExp *rgx)
{
return rgx == nullptr ? nullptr : rgx->Clone();
}
// state 0 = initial
// state 1 = accept
typedef std::vector<std::map<size_t, RegExp *> > GNFA;
void ReduceGNFA(GNFA &graph)
{
for (size_t sz = graph.size(); sz > 2; --sz)
{
auto const t = sz - 1;
RegExp *ttStar = nullptr;
// Process the self-loop.
{
RegExp *&tt = graph[t][t];
tt = MakeKleeneStar(tt);
ttStar = tt;
}
for (size_t i = 0; i != t; ++i)
{
RegExp *it = nullptr;
// Skip if there is no i->t.
{
auto const find_it = graph[i].find(t);
if (find_it == graph[i].end())
{
continue;
}
it = find_it->second;
if (it == nullptr)
{
continue;
}
}
for (size_t j = 0; j != t; ++j)
{
RegExp *tj = nullptr;
// Skip if there is no t->j.
{
auto const find_tj = graph[t].find(j);
if (find_tj == graph[t].end())
{
continue;
}
tj = find_tj->second;
if (tj == nullptr)
{
continue;
}
}
RegExp *&ij = graph[i][j];
// Compose new i->j as it(tt)*tj|ij.
// ij is not cloned because their ownerships are taken care of.
// it, tt, tj must be cloned because they might be used later.
ij = MakeChoice(
MakeConcat(it->Clone(), MakeConcat(ttStar->Clone(), tj->Clone())),
ij
);
}
}
// Clean up graph[t].
for (auto const &kv : graph[t])
{
delete kv.second;
}
graph.pop_back();
}
}
RegExp *GetRegex(GNFA &gnfa)
{
ReduceGNFA(gnfa);
// The accepting expression is
// (00)*(01)((11)*(10)(00)*(01))*(11)*.
auto const r00 = gnfa[0][0];
auto const r01 = gnfa[0][1];
auto const r10 = gnfa[1][0];
auto const r11 = gnfa[1][1];
return MakeConcat(
// (00)*(01)
MakeConcat(MakeKleeneStar(MakeClone(r00)), MakeClone(r01)),
// ((11)*(10)(00)*(01))*(11)*
MakeConcat(
// ((11)*(10)(00)*(01))*
MakeKleeneStar(MakeConcat(
// (11)*(10)
MakeConcat(MakeKleeneStar(MakeClone(r11)), MakeClone(r10)),
// (00)*(01)
MakeConcat(MakeKleeneStar(MakeClone(r00)), MakeClone(r01))
)),
// (11)*
MakeKleeneStar(MakeClone(r11))
)
);
}
void DestoryGNFA(GNFA &gnfa)
{
for (auto const &i : gnfa)
{
for (auto const &j : i)
{
delete j.second;
}
}
gnfa.clear();
}
size_t swap1(size_t st)
{
return (st == 1
? (15 << 4) | 15
: st == ((15 << 4) | 15)
? 1
: st);
}
size_t GetStateId(size_t len,
bool lower, bool upper, bool digit, bool spchr)
{
return swap1((len << 4) |
(lower ? 8 : 0) |
(upper ? 4 : 0) |
(digit ? 2 : 0) |
(spchr ? 1 : 0));
}
size_t GetStateLen(size_t st)
{
return swap1(st) >> 4;
}
bool GetStateLower(size_t st)
{
return (bool)(swap1(st) & 8);
}
bool GetStateUpper(size_t st)
{
return (bool)(swap1(st) & 4);
}
bool GetStateDigit(size_t st)
{
return (bool)(swap1(st) & 2);
}
bool GetStateSpChr(size_t st)
{
return (bool)(swap1(st) & 1);
}
int main()
{
GNFA gnfa;
gnfa.resize(16 * 2 * 2 * 2 * 2);
char const *digit = "0-9";
char const *lower = "a-z";
char const *upper = "A-Z";
char const *spchr = "!-\\/:-@\\[-`{-~";
for (size_t len = 0; len != 16; ++len)
{
size_t const nextlen = (len != 15 ? len + 1 : 15);
for (int st_digit = 0; st_digit != 2; ++st_digit)
for (int st_lower = 0; st_lower != 2; ++st_lower)
for (int st_upper = 0; st_upper != 2; ++st_upper)
for (int st_spchr = 0; st_spchr != 2; ++st_spchr)
{
// Create digit transitions
{
RegExp *&sj = gnfa[GetStateId(len,
!!st_digit, !!st_lower,
!!st_upper, !!st_spchr)][GetStateId(nextlen,
true, !!st_lower,
!!st_upper, !!st_spchr)];
sj = MakeChoice(sj, MakeMultipleChoice(digit));
}
// Create lower transitions
{
RegExp *&sj = gnfa[GetStateId(len,
!!st_digit, !!st_lower,
!!st_upper, !!st_spchr)][GetStateId(nextlen,
!!st_digit, true,
!!st_upper, !!st_spchr)];
sj = MakeChoice(sj, MakeMultipleChoice(lower));
}
// Create upper transitions
{
RegExp *&sj = gnfa[GetStateId(len,
!!st_digit, !!st_lower,
!!st_upper, !!st_spchr)][GetStateId(nextlen,
!!st_digit, !!st_lower,
true, !!st_spchr)];
sj = MakeChoice(sj, MakeMultipleChoice(upper));
}
// Create spchr transitions
{
RegExp *&sj = gnfa[GetStateId(len,
!!st_digit, !!st_lower,
!!st_upper, !!st_spchr)][GetStateId(nextlen,
!!st_digit, !!st_lower,
!!st_upper, true)];
sj = MakeChoice(sj, MakeMultipleChoice(spchr));
}
}
}
auto const rgx = GetRegex(gnfa);
DestoryGNFA(gnfa);
rgx->FPrint(stdout);
delete rgx;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment