Last active
September 21, 2017 01:37
-
-
Save pathikrit/5883ad3fe68cf4bd3243020dee397317 to your computer and use it in GitHub Desktop.
Group named Regex
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.util.regex.{MatchResult, Pattern} | |
import scala.collection.mutable | |
/** | |
* Supports named group finding | |
* | |
* @see http://stackoverflow.com/questions/39754604/ | |
*/ | |
class GroupNamedRegex(pattern: Pattern, namedGroups: Set[String]) { | |
def this(regex: String) = this(Pattern.compile(regex), GroupNamedRegex.namePattern.findAllMatchIn(regex).map(_.group(1)).toSet) | |
def findNamedMatches(s: String): Iterator[GroupNamedRegex.Match] = new Iterator[GroupNamedRegex.Match] { | |
private[this] val m = pattern.matcher(s) | |
private[this] var _hasNext = m.find() | |
override def hasNext = _hasNext | |
override def next() = { | |
val ans = GroupNamedRegex.Match(m.toMatchResult, namedGroups.find(group => m.group(group) != null)) | |
_hasNext = m.find() | |
ans | |
} | |
} | |
def replaceAndReturnGroups(t: String, replacement: Char = ' '): (String, Set[String]) = { | |
val array = t.toCharArray | |
val ans = mutable.Set.empty[String] | |
for { | |
m <- findNamedMatches(t) | |
group <- m.groupName | |
_ = ans += group | |
i <- m.result.start() until m.result.end() | |
} array(i) = replacement | |
(new String(array), ans.toSet) | |
} | |
} | |
object GroupNamedRegex { | |
case class Match(result: MatchResult, groupName: Option[String]) | |
private[this] val innerNameRule = s"([a-zA-Z][a-zA-Z0-9]*)" | |
private[GroupNamedRegex] val namePattern = s"\\(\\?<$innerNameRule>".r | |
def named(name: String, pattern: String) = { | |
require(name.matches(innerNameRule), s"$name must match $innerNameRule") | |
s"(?<$name>($pattern))" | |
} | |
def fromMultiMap(table: Map[String, Set[String]]) = { | |
val combined = table map {case (id, rules) => named(id, orJoin(rules.toSeq.sortBy(-_.length)))} | |
new GroupNamedRegex(orJoin(combined)) | |
} | |
def orJoin(tokens: Iterable[String], prefix: String = "", suffix: String = "") = | |
tokens.map(t => prefix + t + suffix).mkString("((", ")|(", "))") | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment