Skip to content

Instantly share code, notes, and snippets.

@pathikrit
Last active September 21, 2017 01:37
Show Gist options
  • Save pathikrit/5883ad3fe68cf4bd3243020dee397317 to your computer and use it in GitHub Desktop.
Save pathikrit/5883ad3fe68cf4bd3243020dee397317 to your computer and use it in GitHub Desktop.
Group named Regex
import java.util.regex.{MatchResult, Pattern}
import scala.collection.mutable
/**
* Supports named group finding
*
* @see http://stackoverflow.com/questions/39754604/
*/
class GroupNamedRegex(pattern: Pattern, namedGroups: Set[String]) {
def this(regex: String) = this(Pattern.compile(regex), GroupNamedRegex.namePattern.findAllMatchIn(regex).map(_.group(1)).toSet)
def findNamedMatches(s: String): Iterator[GroupNamedRegex.Match] = new Iterator[GroupNamedRegex.Match] {
private[this] val m = pattern.matcher(s)
private[this] var _hasNext = m.find()
override def hasNext = _hasNext
override def next() = {
val ans = GroupNamedRegex.Match(m.toMatchResult, namedGroups.find(group => m.group(group) != null))
_hasNext = m.find()
ans
}
}
def replaceAndReturnGroups(t: String, replacement: Char = ' '): (String, Set[String]) = {
val array = t.toCharArray
val ans = mutable.Set.empty[String]
for {
m <- findNamedMatches(t)
group <- m.groupName
_ = ans += group
i <- m.result.start() until m.result.end()
} array(i) = replacement
(new String(array), ans.toSet)
}
}
object GroupNamedRegex {
case class Match(result: MatchResult, groupName: Option[String])
private[this] val innerNameRule = s"([a-zA-Z][a-zA-Z0-9]*)"
private[GroupNamedRegex] val namePattern = s"\\(\\?<$innerNameRule>".r
def named(name: String, pattern: String) = {
require(name.matches(innerNameRule), s"$name must match $innerNameRule")
s"(?<$name>($pattern))"
}
def fromMultiMap(table: Map[String, Set[String]]) = {
val combined = table map {case (id, rules) => named(id, orJoin(rules.toSeq.sortBy(-_.length)))}
new GroupNamedRegex(orJoin(combined))
}
def orJoin(tokens: Iterable[String], prefix: String = "", suffix: String = "") =
tokens.map(t => prefix + t + suffix).mkString("((", ")|(", "))")
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment