Skip to content

Instantly share code, notes, and snippets.

@GibsonRuitiari
Last active August 3, 2023 07:00
Show Gist options
  • Save GibsonRuitiari/62df950d2d4ba512d3dd79037923af15 to your computer and use it in GitHub Desktop.
Save GibsonRuitiari/62df950d2d4ba512d3dd79037923af15 to your computer and use it in GitHub Desktop.
LLR Parser for regex exp
/* a fully fledged parser than parses regex expressions into AST on the fly
* note the parser is an LL(1), and the grammar used is context-free
* right recursive.
* Also, the parser does not tokenize the inputs thus making it a little
* faster than those that tokenize their inputs.
* Currently, it parses all regexes apart from those having
* back-references & Zero-Width Positive Lookahead Assertions '?='
* For reference see: https://en.wikipedia.org/wiki/Compilers:_Principles,_Techniques,_and_Tools
* */
fun main() {
// pervasive regexes gotten from https://digitalfortress.tech/tips/top-15-commonly-used-regex/
val simpleRegex ="^-?\\d*(\\.\\d+)?\$"
val emailRegex="^([a-zA-Z0-9._%-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,6})*$"
val telephoneRegex="^[-]?[0-9]+[,.]?[0-9]*([\\/][0-9]+[,.]?[0-9]*)*\$"
val passwordRegex="([!@#\$%^&*()[\\]{}\\-_+=~`|:;\"'<>,.?])"
val colorHexRegex = "^#([A-Fa-f0-9]{6}|[A-Fa-f0-9]{3})\$"
val wholeDecimalFractions ="[-]?[0-9]+[,.]?[0-9]*([\\/][0-9]+[,.]?[0-9]*)*"
val complexPasswordRegex ="(?:(.*[0-9]))(.*[\\!@#\$%^&*()[]{}\\-_+=~`|:;\"'<>,./?])(.*[a-z])(?:(.*[A-Z]))((.*)).{8,}"
val urlHttpProtocol = "https?:\\/\\/(www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{2,256}\\.[a-z]{2,6}\\b([-a-zA-Z0-9@:%_\\+.~#()?&//=]*)"
// ([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])$
val ipv4Address="(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\\.){3}\$"
val timeFormat ="(?:[01]\\d|2[0123]):(?:[012345]\\d):(?:[012345]\\d)"
val matched=Grammar.getAst().parse(timeFormat)
println(matched?.matched)
}
interface Parser<T>{
// returns matched item and the remaining literals
fun parse(input:String):ParseResult<T>?
}
data class ParseResult<T>(val remainingLiterals:String,val matched:T)
object Parsers{
fun consumePrefix(prefix:Char):Parser<Unit>{
return object :Parser<Unit>{
override fun parse(input: String): ParseResult<Unit>? {
return if (input.isEmpty()) null
else if (input.first()==prefix) ParseResult(remainingLiterals = input.drop(1), matched = Unit)
else null
}
}
}
fun consumePrefix(prefix:String):Parser<Unit>{
return object :Parser<Unit>{
override fun parse(input: String): ParseResult<Unit>? {
return if (input.isEmpty()) null
else if (input.first() in prefix) ParseResult(remainingLiterals = input.drop(1), matched = Unit)
else null
}
}
}
fun consumeCharacterExcluding(exclude:String):Parser<LetterLiteral>{
return object :Parser<LetterLiteral>{
override fun parse(input: String): ParseResult<LetterLiteral>? {
return if (input.isEmpty()) return null
else{
if (input in exclude) return null
val consumed=consumeCharacter().parse(input) ?: return null
if (!exclude.contains(consumed.matched.letter)){
ParseResult(remainingLiterals = consumed.remainingLiterals,
matched = consumed.matched)
}else null
}
}
}
}
fun consumeCharacter():Parser<LetterLiteral>{
return object :Parser<LetterLiteral>{
override fun parse(input: String): ParseResult<LetterLiteral>? {
return if (input.isEmpty()) null
else {
val characterToBeConsumed = input.first()
ParseResult(remainingLiterals = input.drop(1), matched = LetterLiteral(characterToBeConsumed))
}
}
}
}
// natural numbers or zero
fun consumeNumber():Parser<IntegerLiteral>{
return object :Parser<IntegerLiteral>{
override fun parse(input: String): ParseResult<IntegerLiteral>? {
var remainingLiterals=input
val digits = StringBuilder()
while (true){
if (remainingLiterals.isEmpty() ||
remainingLiterals.isBlank() ||
remainingLiterals.firstOrNull()?.isDigit()==false) break
val result=consumeDigit().parse(remainingLiterals) ?: break
val digit = result.matched.integer
digits.append(digit)
remainingLiterals=result.remainingLiterals
}
return try {
val number =digits.toString().toInt()
ParseResult(remainingLiterals = remainingLiterals,
matched = IntegerLiteral(number))
}catch (ex:NumberFormatException){
println("failed to convert the string to number")
null
}
}
}
}
// consume just one digit
fun consumeDigit():Parser<IntegerLiteral>{
return object :Parser<IntegerLiteral>{
override fun parse(input: String): ParseResult<IntegerLiteral>? {
return if (input.isEmpty()) null
else {
val charToBeConsumed=input.first()
if (charToBeConsumed.isDigit()) ParseResult(matched=IntegerLiteral(charToBeConsumed.digitToInt()),
remainingLiterals = input.drop(1)) else null
}
}
}
}
}
// misc utils that makes it easier to pass the tokens into our
// corresponding grammar
fun<A> oneOf(vararg parsers:Parser<A>):Parser<A>{
return object :Parser<A>{
override fun parse(input: String): ParseResult<A>? {
val matched= parsers.firstOrNull { it.parse(input)!=null }?.parse(input) ?: return null
return ParseResult(matched = matched.matched,
remainingLiterals = matched.remainingLiterals
)
}
}
}
inline fun<A,B> Parser<A>.map(crossinline transform:(A)->B): Parser<B> {
return object :Parser<B>{
override fun parse(input: String): ParseResult<B>? {
val (remaining,matched)[email protected](input) ?: return null
return ParseResult(remainingLiterals = remaining,transform(matched))
}
}
}
// defines our grammar according to our Grammar.ebnf
object Grammar{
fun getAst():Parser<AST>{
return object :Parser<AST>{
override fun parse(input: String): ParseResult<AST>? {
val isFromStartingResult=Parsers.consumePrefix('^').parse(input)
val isFromStarting = isFromStartingResult!=null
val remainingLiterals=isFromStartingResult?.remainingLiterals ?: input
val expressionResult= getExpression().parse(remainingLiterals) ?:return null
// we have reached the end? if yes return the result if no just return null since
// the expression is unrecognized by our parser
require(expressionResult.remainingLiterals.isEmpty() || expressionResult.remainingLiterals==")"){"expression must be empty or must not end with an unescaped )"}
return ParseResult(matched = AST(root = expressionResult.matched, isFromStartOfString = isFromStarting),
remainingLiterals = expressionResult.remainingLiterals)
}
}
}
private val lazyExpression by lazy { getExpression() }
fun getExpression():Parser<Expression>{
return object :Parser<Expression>{
override fun parse(input: String): ParseResult<Expression>? {
val leftSubExpression=getSubexpression().parse(input) ?: return null
val alternation = Parsers.consumePrefix('|').parse(leftSubExpression.remainingLiterals)
return if (alternation==null){
ParseResult(matched =Expression(
subExpression = leftSubExpression.matched,
rightAlternativeExpression = null),
remainingLiterals = leftSubExpression.remainingLiterals)
}else{
val rightExpression = getExpression().parse(alternation.remainingLiterals) ?: throw IllegalArgumentException("a valid expression must follow after the |")
println("right expression sub ---> ${rightExpression.matched.subExpression} | alternative-- ${rightExpression.matched.rightAlternativeExpression}")
println("----")
ParseResult(matched =Expression(
subExpression = leftSubExpression.matched,
rightAlternativeExpression = rightExpression.matched),
remainingLiterals = rightExpression.remainingLiterals)
}
}
}
}
fun getSubexpression():Parser<SubExpression>{
// one or more [+]
return object :Parser<SubExpression>{
override fun parse(input: String): ParseResult<SubExpression> {
val matches = mutableListOf<SubExpressionItem>()
var remaining=input
while (true){
// if it starts with an unescaped | then it means it is another sub-expression so break and return
// the remaining literals will be evaluated on their own
if (remaining.isEmpty() || remaining.isBlank()) break
val result=getSubexpressionItem().parse(remaining) ?: break
remaining=result.remainingLiterals
matches.add(result.matched)
}
check(matches.isNotEmpty()) {"pattern must not be empty!"}
return ParseResult(matched = SubExpression(matches),
remainingLiterals =remaining)
}
}
}
fun getSubexpressionItem():Parser<SubExpressionItem>{
return object : Parser<SubExpressionItem>{
// one of group,anchor, match, character
override fun parse(input: String): ParseResult<SubExpressionItem>? {
val subExpressionItemResult= oneOf(
getMatch().map { SubexpressionMatch(it)},
getGroup().map { SubexpressionGroup(it) },
getAnchor().map {
SubexpressionAnchor(it)},).parse(input) ?: return null
return ParseResult(matched = subExpressionItemResult.matched,
remainingLiterals = subExpressionItemResult.remainingLiterals)
}
}
}
fun getGroup():Parser<QuantifiedGroup>{
return object :Parser<QuantifiedGroup>{
override fun parse(input: String): ParseResult<QuantifiedGroup>? {
var _input = input
val openingParenthesis = Parsers.consumePrefix("(").parse(_input)?:return null
val isNonCapturing=openingParenthesis.remainingLiterals.take(2) == "?:"
_input = if (isNonCapturing) openingParenthesis.remainingLiterals.drop(2)
else openingParenthesis.remainingLiterals
val group= lazyExpression.parse(_input)?:return null
_input=group.remainingLiterals
val optionalQuantifier=getQuantifier().parse(_input)
_input = optionalQuantifier?.remainingLiterals ?: _input
val closingParenthesis = Parsers.consumePrefix("\\)").parse(_input) ?:return null
_input=closingParenthesis.remainingLiterals
// the group is followed by a quantifier (maybe)
val optionalGroupQuantifier=getQuantifier().parse(_input)
group.matched
val output=Group(isGroupCapturing = !isNonCapturing, expression = group.matched,
quantifierType = optionalQuantifier?.matched)
return ParseResult(matched=QuantifiedGroup(group = output,
quantifier = optionalGroupQuantifier?.matched),
remainingLiterals = optionalGroupQuantifier?.remainingLiterals ?: _input)
}
}
}
fun getMatch(): Parser<Match> {
return object :Parser<Match>{
override fun parse(input: String): ParseResult<Match>? {
if (input.isEmpty()) return null
val result=oneOf(Parsers.consumePrefix('.').map { MatchAnyCharacter()},
getCharacterGroup().map { MatchCharacterGroup(it)},
getCharacterClass().map { MatchCharacterClass(it)},
// exclude these characters because they will be handled by the respective handlers
// including them will make the parser go in a never-ending loop :)
Parsers.consumeCharacterExcluding("\\()|*?+$").map {
MatchCharacter(it)
},
getEscapedCharacter().map { MatchCharacter(it)}).parse(input) ?: return null
val optionalQuantifier=getQuantifier().parse(result.remainingLiterals)
val remainingLiterals = optionalQuantifier?.remainingLiterals ?: result.remainingLiterals
return ParseResult(remainingLiterals =remainingLiterals,
matched = Match(matchItem = result.matched,quantifier = optionalQuantifier?.matched))
}
}
}
fun getCharacterGroup():Parser<CharacterGroup>{
return object :Parser<CharacterGroup> {
override fun parse(input: String): ParseResult<CharacterGroup>? {
val openingParenthesisResult = Parsers.consumePrefix('[').parse(input) ?: return null
val optionalInvertedMarkerResult = Parsers.consumePrefix('^')
.parse(openingParenthesisResult.remainingLiterals)
val isGroupInverted = optionalInvertedMarkerResult!=null
var remainingLiterals = optionalInvertedMarkerResult?.remainingLiterals ?: openingParenthesisResult.remainingLiterals
val matches = mutableListOf<CharacterGroupItem>()
while (true){
// if it starts with then it means we have a quantifier next so break out of the loop and consume the ] then return the quantifier
if (remainingLiterals.isEmpty() || remainingLiterals.isBlank() || remainingLiterals.startsWith(']')) break
val characterGroupItemResult = getCharacterGroupItem().parse(remainingLiterals) ?: break
remainingLiterals = characterGroupItemResult.remainingLiterals
matches.add(characterGroupItemResult.matched)
}
check(matches.isNotEmpty()){"character group cannot be empty!"}
val closingParenthesisResult=Parsers.consumePrefix(']').parse(remainingLiterals) ?: throw IllegalArgumentException("character group must end with a ]")
return ParseResult(remainingLiterals = closingParenthesisResult.remainingLiterals,
matched = CharacterGroup(isInverted = isGroupInverted,
groupItems = matches))
}
}
}
// one of either character range character class or character
fun getCharacterGroupItem():Parser<CharacterGroupItem>{
return object :Parser<CharacterGroupItem>{
override fun parse(input: String): ParseResult<CharacterGroupItem>? {
// val unescapedBackslash=Parsers.consumePrefix("/").parse(input)
// if (unescapedBackslash!=null) throw IllegalArgumentException("an unescaped delimiter must be escaped with a backslash")
val result=oneOf(getCharacterClass().map { it },
getCharacterRange().map { it },
getDigitRange().map{it},
getEscapedCharacter().map { Character(it.letter) },
Parsers.consumeCharacterExcluding("]")
.map { Character(it.letter) }).parse(input) ?: return null
return ParseResult(remainingLiterals = result.remainingLiterals,
matched = result.matched)
}
}
}
fun getEscapedCharacter():Parser<LetterLiteral>{
return object :Parser<LetterLiteral>{
override fun parse(input: String): ParseResult<LetterLiteral>? {
val escapedBackslash=Parsers.consumePrefix('\\').parse(input) ?: return null
if (escapedBackslash.remainingLiterals.isEmpty()) throw IllegalArgumentException("dude you cannot end pattern with a '\\'")
val consumedCharacterResult=Parsers.consumeCharacter().parse(escapedBackslash.remainingLiterals) ?: return null
consumedCharacterResult.matched
return ParseResult(matched = consumedCharacterResult.matched,
remainingLiterals = consumedCharacterResult.remainingLiterals)
}
}
}
//[0-9]
fun getDigitRange():Parser<DigitRange>{
return object :Parser<DigitRange>{
override fun parse(input: String): ParseResult<DigitRange>? {
val openingBracketResult=Parsers.consumePrefix('[').parse(input)
val lowerBoundResult=if (openingBracketResult==null){
Parsers.consumeCharacter().parse(input) ?: return null
}else{
Parsers.consumeCharacter().parse(openingBracketResult.remainingLiterals) ?: return null
}
val dashResult= Parsers.consumePrefix('-').parse(lowerBoundResult.remainingLiterals) ?: return null
val upperBoundResult = Parsers.consumeCharacter().parse(dashResult.remainingLiterals) ?: return null
val remainingLiterals=if (openingBracketResult!=null){
val result= Parsers.consumePrefix(']').parse(upperBoundResult.remainingLiterals ) ?: return null
result.remainingLiterals
}else{
upperBoundResult.remainingLiterals
}
if (upperBoundResult.matched.letter !in digits() || lowerBoundResult.matched.letter !in digits()) return null
if (upperBoundResult.matched.letter <= lowerBoundResult.matched.letter) throw IllegalArgumentException("upper bound must be greater than lower bound")
return ParseResult(remainingLiterals = remainingLiterals,
matched = DigitRange(IntegerLiteral(lowerBoundResult.matched.letter.digitToInt())..
IntegerLiteral(upperBoundResult.matched.letter.digitToInt())))
}
}
}
// [a-z]
fun getCharacterRange():Parser<CharacterRange>{
return object :Parser<CharacterRange>{
override fun parse(input: String): ParseResult<CharacterRange>? {
val openingBracketResult=Parsers.consumePrefix('[').parse(input)
val lowerBoundResult=if (openingBracketResult==null){
Parsers.consumeCharacter().parse(input) ?: return null
}else{
Parsers.consumeCharacter().parse(openingBracketResult.remainingLiterals) ?: return null
}
val dashResult= Parsers.consumePrefix('-').parse(lowerBoundResult.remainingLiterals) ?: return null
val upperBoundResult = Parsers.consumeCharacter().parse(dashResult.remainingLiterals) ?: return null
val remainingLiterals=if (openingBracketResult!=null){
val result= Parsers.consumePrefix(']').parse(upperBoundResult.remainingLiterals ) ?: return null
result.remainingLiterals
}else{
upperBoundResult.remainingLiterals
}
if (upperBoundResult.matched.letter !in letters() || lowerBoundResult.matched.letter !in letters()) return null
if (upperBoundResult.matched.letter <= lowerBoundResult.matched.letter) throw IllegalArgumentException("upper bound must be greater than lower bound")
return ParseResult(remainingLiterals=remainingLiterals,
matched = CharacterRange(LetterLiteral(lowerBoundResult.matched.letter)..LetterLiteral(upperBoundResult.matched.letter)))
}
}
}
fun getCharacterClass():Parser<CharacterClass>{
return object :Parser<CharacterClass>{
override fun parse(input: String): ParseResult<CharacterClass>? {
val prefixResult= Parsers.consumePrefix('\\').parse(input) ?: return null
val consumedCharResult=Parsers.consumeCharacter().parse(prefixResult.remainingLiterals)
return if (consumedCharResult?.matched!=null){
val characterClass = makeCharacterClass(consumedCharResult.matched.letter)
if (characterClass!=null)
ParseResult(remainingLiterals = consumedCharResult.remainingLiterals,
matched = characterClass)
else null
}else null
}
}
}
private fun makeCharacterClass(char: Char):CharacterClass?{
return when(char){
'd'-> CharacterClassAnyDecimalDigit()
'D'->CharacterClassAnyNonDecimalDigit()
's'->CharacterClassAnyWhiteSpace()
'S'->CharacterClassAnyNonWhiteSpace()
'w'->CharacterClassAnyWord()
'W'->CharacterClassAnyNonWord()
else->null
}
}
// * + ? {2,4}
fun getQuantifier():Parser<Quantifier>{
return object :Parser<Quantifier>{
override fun parse(input: String): ParseResult<Quantifier>? {
if (input.isEmpty()) return null
val quantifiers=oneOf(Parsers.consumePrefix('*').map {ZeroOrMoreQuantifier},
Parsers.consumePrefix('+').map {OneOrMoreQuantifier },
Parsers.consumePrefix('?').map { ZeroOrOneQuantifier },
getRangeQuantifier().map { it }).parse(input) ?: return null
val isQuantifierLazy =Parsers.consumePrefix('?').parse(quantifiers.remainingLiterals)
val remainingLiterals = isQuantifierLazy?.remainingLiterals ?: quantifiers.remainingLiterals
return ParseResult(remainingLiterals = remainingLiterals,
matched = Quantifier(isQuantifierLazy =isQuantifierLazy!=null,
quantifierType = quantifiers.matched))
}
}
}
// {2,4} {2} {4,}
fun getRangeQuantifier():Parser<RangeQuantifier>{
return object :Parser<RangeQuantifier>{
override fun parse(input: String): ParseResult<RangeQuantifier>? {
val bracketPrefix= Parsers.consumePrefix('{').parse(input) ?: return null
var remaining= bracketPrefix.remainingLiterals
val lowerBoundResult= Parsers.consumeNumber().parse(remaining) ?: return null
remaining=lowerBoundResult.remainingLiterals
val lowerBound= lowerBoundResult.matched.integer
// ,3 --> optional
val parsedComma=Parsers.consumePrefix(',').parse(remaining)
return if (parsedComma==null){
// we do not have an upper bound we thus create the range quantifier here
// check that we have {1 -we are here check if we have a }
val closingBracketResult=Parsers.consumePrefix('}').parse(remaining) ?: throw IllegalArgumentException("missing a }!")
remaining=closingBracketResult.remainingLiterals
ParseResult(remainingLiterals = remaining, matched = makeRangeQuantifier(lowerBound,null))
}else{
// we do have upper bound
remaining=parsedComma.remainingLiterals
val upperBoundResult=Parsers.consumeNumber().parse(remaining)
upperBoundResult?.let {
remaining=it.remainingLiterals
}
// means we just had a comma if the upper bound is null
// if it's null -- {2,}
val upperBound = upperBoundResult?.matched?.integer ?:lowerBound
check(upperBound>=lowerBound){"upper bound needs to be greater or equal to the lower bound not the other way round"}
val closingBracketResult = Parsers.consumePrefix('}').parse(remaining) ?: throw IllegalArgumentException("missing a }!")
ParseResult(remainingLiterals = closingBracketResult.remainingLiterals,
matched = makeRangeQuantifier(lowerBound, upperBound))
}
}
}
}
private fun makeRangeQuantifier(lowerBound: Int,upperBound: Int?):RangeQuantifier{
return RangeQuantifier(lowerBound = lowerBound, upperBound = upperBound?:lowerBound)
}
// one of escaped anchor/ end of string anchor
fun getAnchor():Parser<Anchor>{
return object :Parser<Anchor>{
override fun parse(input: String): ParseResult<Anchor>? {
val remainingLiterals = StringBuilder()
val anchor=if (input.startsWith("\\")){
val result=getEscapedAnchor(input)
if (result!=null){
val (remaining,matched) = result
val escapedAnchor=when(matched.letter){
'B'->AnchorNonWordBoundary()
'b'->AnchorWordBoundary()
'z'->AnchorEndOfStringNotNewline()
'A'->AnchorStartOfStringOnly()
'Z'->AnchorEndOfStringOnly()
else->null
}
remainingLiterals.append(remaining)
escapedAnchor
}else null
}else if (input.startsWith("$")){
val endOfStringAnchor=Parsers.consumePrefix('$').parse(input)
// what about (?:[a-b]$) ?is this a valid expression? For now we treat it as such
// because it does make sense logically
// if(endOfStringAnchor?.remainingLiterals?.isNotEmpty() == true) throw IllegalArgumentException("$ cannot be used in the middle of the expression")
if(endOfStringAnchor?.matched!=null){
AnchorEndOfString()
}else null
}else null
return if (anchor==null) null else ParseResult(remainingLiterals = remainingLiterals.toString(), matched = anchor)
}
}
}
private fun getEscapedAnchor(input: String): ParseResult<LetterLiteral>? {
val parserResult = Parsers.consumePrefix('\\').parse(input) ?: return null
return Parsers.consumeCharacter().parse(parserResult.remainingLiterals)
}
}
// our tree containing root/top node
data class AST(val isFromStartOfString:Boolean,val root:Expression)
// our ast types which are basically our return types after parsing
// this is the base grammar type whereby it represents a single unit of grammar
sealed interface GrammarType
// this defines a composite of grammar types whereby it represents a collection of grammar
// the major expression which is made up of either a single expression
// or a single + another whereby the another = right alternative expression i.e
// exp|exp or exp
data class Expression(
val subExpression: SubExpression, val rightAlternativeExpression: Expression?=null):GrammarType
data class SubExpression(val subExpressionItems:List<SubExpressionItem>):GrammarType
sealed interface SubExpressionItem
data class SubexpressionMatch(val match: Match):SubExpressionItem
data class SubexpressionGroup(val group: QuantifiedGroup):SubExpressionItem
data class SubexpressionAnchor(val anchor:Anchor):SubExpressionItem
data class QuantifiedGroup(val group:Group,val quantifier: Quantifier?=null):GrammarType
data class Group(val isGroupCapturing:Boolean,
val expression: Expression,
val quantifierType: Quantifier?=null):GrammarType
data class Match(val matchItem: MatchItem, val quantifier:Quantifier?=null):GrammarType
sealed interface MatchItem
data class MatchAnyCharacter(val anyCharacterType:String="."):MatchItem
data class MatchCharacterClass(val characterClass: CharacterClass):MatchItem
data class MatchCharacter(val character:LetterLiteral):MatchItem
data class MatchCharacterGroup(val characterGroup:CharacterGroup):MatchItem
data class CharacterGroup(val groupItems:List<CharacterGroupItem>,val isInverted:Boolean):GrammarType
sealed interface CharacterGroupItem
sealed interface CharacterClass:CharacterGroupItem
data class CharacterClassAnyWord(val characterClassTypeValue: String=word()):CharacterClass
data class CharacterClassAnyNonWord(val characterClassTypeValue: String=nonWord()):CharacterClass
data class CharacterClassAnyDecimalDigit(val characterClassTypeValue: String=digits()):CharacterClass
data class CharacterClassAnyNonDecimalDigit(val characterClassTypeValue: String=letters()+punctuation())
:CharacterClass
data class CharacterClassAnyWhiteSpace(val characterClassTypeValue: String=whitespace()):CharacterClass
data class CharacterClassAnyNonWhiteSpace(val characterClassTypeValue: String=nonWhitespace()):CharacterClass
data class CharacterRange(val range:ClosedRange<LetterLiteral>):CharacterGroupItem
data class DigitRange(val range:ClosedRange<IntegerLiteral>):CharacterGroupItem
data class Character(val char:Char):CharacterGroupItem
// quantifier
data class Quantifier(val quantifierType: QuantifierType,val isQuantifierLazy:Boolean):GrammarType
// quantifier types
sealed interface QuantifierType:GrammarType
object ZeroOrMoreQuantifier:QuantifierType
object OneOrMoreQuantifier:QuantifierType
object ZeroOrOneQuantifier:QuantifierType
// range quantifier
data class RangeQuantifier(val upperBound:Int,val lowerBound:Int):QuantifierType
// Anchors - the anchor type value is unescaped
sealed class Anchor(open val anchorTypeValue:String):GrammarType
data class AnchorWordBoundary(override val anchorTypeValue: String = "b"):Anchor(anchorTypeValue)
data class AnchorNonWordBoundary(override val anchorTypeValue: String = "B"):Anchor(anchorTypeValue)
data class AnchorEndOfString(override val anchorTypeValue: String = "$"):Anchor(anchorTypeValue)
data class AnchorEndOfStringNotNewline(override val anchorTypeValue: String = "z"):Anchor(anchorTypeValue)
data class AnchorEndOfStringOnly(override val anchorTypeValue: String = "Z"):Anchor(anchorTypeValue)
data class AnchorStartOfStringOnly(override val anchorTypeValue: String = "A"):Anchor(anchorTypeValue)
// basic grammar type in this case literals
// [0-9]+
data class IntegerLiteral(val integer:Int):GrammarType,Comparable<IntegerLiteral>{
override fun compareTo(other: IntegerLiteral): Int {
return if (integer == other.integer)0
else if (integer < other.integer)-1
else 1
}
}
// letters [a-zA-z]+ or just a single one
data class LetterLiteral(val letter:Char):GrammarType,Comparable<LetterLiteral>{
override fun compareTo(other: LetterLiteral): Int {
return if (letter ==other.letter) 0
else if (letter < other.letter) -1
else 1
}
}
// CharacterSets not defined by standard library (probably they should?)
fun punctuation() ="!\\\"#\$%&'()*+,-./:;<=>?@[\\\\]^_`{|}~"
fun whitespace() =" \\t\\n\\r\\f"
fun word() =letters()+digits()+"-"
fun nonWord()= (32..126).joinToString(""){ it.toChar().toString() }.filterNot { it in word() }
fun nonWhitespace() =(32..126).joinToString("") { it.toChar().toString() }.filterNot { it.isWhitespace() }
fun digits() = (0..9).joinToString("")
fun letters() = lowerCase()+upperCase()
fun lowerCase() = (97..122).joinToString("") { it.toChar().toString() }
fun upperCase() = (65..90).joinToString("") { it.toChar().toString()}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment