Skip to content

Instantly share code, notes, and snippets.

@bzamecnik
Created October 10, 2017 20:45
Show Gist options
  • Save bzamecnik/a117063571018f6082ff6944e4bf094d to your computer and use it in GitHub Desktop.
Save bzamecnik/a117063571018f6082ff6944e4bf094d to your computer and use it in GitHub Desktop.
Hack to limit execution time of java.util.regex.Pattern with catastrophic backtracking.
package com.github.bzamecnik.regex
import scala.concurrent.duration._
/**
* Wraps String to make its usage time limited.
*
* After given time duration charAt() operation will start throwing a
* TimeoutException.
*
* This allows to interrupt matching in java.util.regex.Pattern.
* The only assumption is that Pattern takes characters one at a time
* and that the times for processing each subsequent character increase
* roughly 2x in each iteration. Then in the worst case when charAt() is called
* just before the given timeout, the next charAt() which results in the
* exception around 2x timeout time.
*
* Don't forget to use the instance immediately since the timing starts at the
* construction time.
*
* Example usage:
*
* import scala.concurrent.duration._
* val str: TimeLimitedString = "aaaaaaaaaaaaaaaaaaaaaaab".timeLimited(100.millis)
* // throws TimeoutException
* regex.matcher(str).matches()
*
* Idea comes from: https://stackoverflow.com/questions/910740/cancelling-a-long-running-regex-match
* Changes:
* - internal timing instead of Thread.interrupted()
* - Scala implementation, sytnax sugar with implicits
* - custom exception
* - test with regexes
*/
case class TimeLimitedString(value: CharSequence, duration: Duration = 1.second) extends CharSequence {
// ttl = time to live
private val ttlMillis = duration.toMillis
require(ttlMillis > 0)
private val endTime: Long = System.currentTimeMillis + ttlMillis
private def remainingMillis: Long = math.max(endTime - System.currentTimeMillis, 0)
private def isAlive: Boolean = remainingMillis > 0
private def checkTimeout() = {
if (!isAlive) throw TimeoutException(s"Timeout after $ttlMillis ms")
}
override def charAt(index: Int): Char = {
checkTimeout()
value.charAt(index)
}
override def length: Int = value.length
override def subSequence(start: Int, end: Int) =
TimeLimitedString(value.subSequence(start, end), remainingMillis.millis)
override def toString: String = value.toString
}
case class TimeoutException(message: String = null) extends RuntimeException(message)
object TimeLimitedStringImplicits {
/**
* Allows to wrap String to TimeLimitedString easily.
*/
implicit class StringWithTimeLimit(value: String) {
def timeLimited(duration: Duration): TimeLimitedString = {
TimeLimitedString(value, duration)
}
}
}
package com.github.bzamecnik.regex
import java.util.regex.Pattern
import org.scalatest.FunSuite
import scala.concurrent.duration._
class TimeLimitedStringTest extends FunSuite {
test("regex takes too long and cannot be easily interrupted") {
val regex = Pattern.compile("^(a+)+$")
val value = "aaaaaaaaaaaaaaaaaaaaaab"
// ~1 sec
printTime {
regex.matcher(value).matches()
}
}
test("regex will be killed with a string that times out") {
val regex = Pattern.compile("^(a+)+$")
import com.github.bzamecnik.regex.TimeLimitedStringImplicits._
val ttlMillis = 200
val (_, elapsedMillis) = time {
try {
val value = "aaaaaaaaaaaaaaaaaaaaaab".timeLimited(duration = ttlMillis.millis)
regex.matcher(value).matches()
} catch {
case ex: TimeoutException => println(ex)
}
}
println(s"Elapsed time: $elapsedMillis ms")
// it should step within the timeout + some epsilon
// - the regex may take next char occasionally
// - there might be some overhead for exception handling
assert(elapsedMillis < ttlMillis * 1.5)
}
private def time[R](block: => R): (R, Long) = {
val start: Long = System.currentTimeMillis
val result = block
val end: Long = System.currentTimeMillis
val elapsedMillis: Long = end - start
(result, elapsedMillis)
}
private def printTime[R](block: => R, title: String = ""): R = {
val (result, elapsedTime) = time(block)
println(s"Elapsed time${if (title.nonEmpty) s" [$title]" else ""}: $elapsedTime ms")
result
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment