Created
October 10, 2017 20:45
-
-
Save bzamecnik/a117063571018f6082ff6944e4bf094d to your computer and use it in GitHub Desktop.
Hack to limit execution time of java.util.regex.Pattern with catastrophic backtracking.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.github.bzamecnik.regex | |
import scala.concurrent.duration._ | |
/** | |
* Wraps String to make its usage time limited. | |
* | |
* After given time duration charAt() operation will start throwing a | |
* TimeoutException. | |
* | |
* This allows to interrupt matching in java.util.regex.Pattern. | |
* The only assumption is that Pattern takes characters one at a time | |
* and that the times for processing each subsequent character increase | |
* roughly 2x in each iteration. Then in the worst case when charAt() is called | |
* just before the given timeout, the next charAt() which results in the | |
* exception around 2x timeout time. | |
* | |
* Don't forget to use the instance immediately since the timing starts at the | |
* construction time. | |
* | |
* Example usage: | |
* | |
* import scala.concurrent.duration._ | |
* val str: TimeLimitedString = "aaaaaaaaaaaaaaaaaaaaaaab".timeLimited(100.millis) | |
* // throws TimeoutException | |
* regex.matcher(str).matches() | |
* | |
* Idea comes from: https://stackoverflow.com/questions/910740/cancelling-a-long-running-regex-match | |
* Changes: | |
* - internal timing instead of Thread.interrupted() | |
* - Scala implementation, sytnax sugar with implicits | |
* - custom exception | |
* - test with regexes | |
*/ | |
case class TimeLimitedString(value: CharSequence, duration: Duration = 1.second) extends CharSequence { | |
// ttl = time to live | |
private val ttlMillis = duration.toMillis | |
require(ttlMillis > 0) | |
private val endTime: Long = System.currentTimeMillis + ttlMillis | |
private def remainingMillis: Long = math.max(endTime - System.currentTimeMillis, 0) | |
private def isAlive: Boolean = remainingMillis > 0 | |
private def checkTimeout() = { | |
if (!isAlive) throw TimeoutException(s"Timeout after $ttlMillis ms") | |
} | |
override def charAt(index: Int): Char = { | |
checkTimeout() | |
value.charAt(index) | |
} | |
override def length: Int = value.length | |
override def subSequence(start: Int, end: Int) = | |
TimeLimitedString(value.subSequence(start, end), remainingMillis.millis) | |
override def toString: String = value.toString | |
} | |
case class TimeoutException(message: String = null) extends RuntimeException(message) | |
object TimeLimitedStringImplicits { | |
/** | |
* Allows to wrap String to TimeLimitedString easily. | |
*/ | |
implicit class StringWithTimeLimit(value: String) { | |
def timeLimited(duration: Duration): TimeLimitedString = { | |
TimeLimitedString(value, duration) | |
} | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.github.bzamecnik.regex | |
import java.util.regex.Pattern | |
import org.scalatest.FunSuite | |
import scala.concurrent.duration._ | |
class TimeLimitedStringTest extends FunSuite { | |
test("regex takes too long and cannot be easily interrupted") { | |
val regex = Pattern.compile("^(a+)+$") | |
val value = "aaaaaaaaaaaaaaaaaaaaaab" | |
// ~1 sec | |
printTime { | |
regex.matcher(value).matches() | |
} | |
} | |
test("regex will be killed with a string that times out") { | |
val regex = Pattern.compile("^(a+)+$") | |
import com.github.bzamecnik.regex.TimeLimitedStringImplicits._ | |
val ttlMillis = 200 | |
val (_, elapsedMillis) = time { | |
try { | |
val value = "aaaaaaaaaaaaaaaaaaaaaab".timeLimited(duration = ttlMillis.millis) | |
regex.matcher(value).matches() | |
} catch { | |
case ex: TimeoutException => println(ex) | |
} | |
} | |
println(s"Elapsed time: $elapsedMillis ms") | |
// it should step within the timeout + some epsilon | |
// - the regex may take next char occasionally | |
// - there might be some overhead for exception handling | |
assert(elapsedMillis < ttlMillis * 1.5) | |
} | |
private def time[R](block: => R): (R, Long) = { | |
val start: Long = System.currentTimeMillis | |
val result = block | |
val end: Long = System.currentTimeMillis | |
val elapsedMillis: Long = end - start | |
(result, elapsedMillis) | |
} | |
private def printTime[R](block: => R, title: String = ""): R = { | |
val (result, elapsedTime) = time(block) | |
println(s"Elapsed time${if (title.nonEmpty) s" [$title]" else ""}: $elapsedTime ms") | |
result | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment