Skip to content

Instantly share code, notes, and snippets.

@gordinmitya
Created June 30, 2025 18:06
Show Gist options
  • Save gordinmitya/a3d0f7c84ff568bcaf82663f0cee0e5b to your computer and use it in GitHub Desktop.
Save gordinmitya/a3d0f7c84ff568bcaf82663f0cee0e5b to your computer and use it in GitHub Desktop.
// Rabin–Karp substring search algorithm
class SubstringSearch private constructor(val text: String) {
val powBase = LongArray(text.length + 1).also { it[0] = 1L }
val hHash = LongArray(text.length + 1)
init {
for (i in 1 until powBase.size) {
powBase[i] = powBase[i - 1] * BASE
}
for (i in 0 until text.length) {
hHash[i + 1] = hHash[i] * BASE + text[i].code
}
}
fun indexOf(substring: String, fromIndex: Int = 0): Int {
val m = substring.length
if (m == 0) return fromIndex.coerceAtMost(text.length)
if (m > text.length) return -1
// Compute the hash of the pattern (needle)
var nHash = 0L
for (i in substring.indices) {
nHash = nHash * BASE + substring[i].code
}
// Rolling hash search (natural overflow, no %)
for (i in fromIndex..text.length - m) {
val window = hHash[i + m] - hHash[i] * powBase[m]
if (window == nHash) {
// Verify by direct comparison to avoid hash collision
if (text.regionMatches(i, substring, 0, m)) {
return i
}
}
}
return -1
}
companion object {
private const val BASE = 256L
@JvmStatic
fun prepare(text: String): SubstringSearch {
return SubstringSearch(text)
}
}
}
import org.junit.jupiter.api.Assertions.assertEquals
import org.junit.jupiter.api.RepeatedTest
import org.junit.jupiter.api.Test
import kotlin.random.Random
import kotlin.system.measureNanoTime
class SubstringSearchTest {
/** Basic deterministic scenarios. */
@Test
fun basicMatches() {
val search = SubstringSearch.prepare("abracadabra")
assertEquals(0, search.indexOf("abra")) // at start
assertEquals(0, search.indexOf("abra", 0)) // at start
assertEquals(3, search.indexOf("acad")) // in the middle
assertEquals(7, search.indexOf("abra", 1)) // later occurrence using fromIndex
}
/** Whole string search (pattern == text). */
@Test
fun substringEqualsText() {
val text = "hello"
val search = SubstringSearch.prepare(text)
assertEquals(0, search.indexOf(text))
}
/** Pattern longer than text → no match. */
@Test
fun substringLongerThanText() {
val search = SubstringSearch.prepare("hi")
assertEquals(-1, search.indexOf("hello"))
}
/** Empty pattern is defined to match immediately at *fromIndex*. */
@Test
fun emptySubstring() {
val text = "abc"
val search = SubstringSearch.prepare(text)
assertEquals(0, search.indexOf(""))
assertEquals(2, search.indexOf("", 2))
}
/** Pattern not present. */
@Test
fun notFound() {
val search = SubstringSearch.prepare("kotlin")
assertEquals(-1, search.indexOf("java"))
}
/** Behaviour on empty text input. */
@Test
fun textEmpty() {
val search = SubstringSearch.prepare("")
assertEquals(0, search.indexOf("")) // empty pattern trivially matches
assertEquals(-1, search.indexOf("x"))
}
/** Verify that *fromIndex* skips earlier occurrences. */
@Test
fun fromIndexSkipsEarlierOccurrence() {
val search = SubstringSearch.prepare("aaaaa")
assertEquals(0, search.indexOf("aa"))
assertEquals(1, search.indexOf("aa", 1))
assertEquals(2, search.indexOf("aa", 2))
}
/**
* Randomised, property-based stress-test: compare against JDK reference implementation.
* Repeated 300× with up to 200-char random ASCII strings.
*/
@RepeatedTest(300)
fun randomizedMatches() {
val rand = Random.Default
val len = rand.nextInt(16000, 64000)
val text = buildString {
repeat(len) { append((rand.nextInt(32, 128)).toChar()) }
}
val searchObj = SubstringSearch.prepare(text)
val subStart = if (text.isEmpty()) 0 else rand.nextInt(0, text.length)
val subLen = rand.nextInt(0, text.length - subStart + 1)
val substring = text.substring(subStart, subStart + subLen)
val fromIndex = if (text.isEmpty()) 0 else rand.nextInt(0, text.length / 10 + 1)
// Measure built-in
var expected: Int = -1
val indexOfTime = measureNanoTime {
repeat(10) {
expected = text.indexOf(substring, fromIndex)
}
}
// Measure custom
var actual: Int = -1
val customTime = measureNanoTime {
repeat(10) {
actual = searchObj.indexOf(substring, fromIndex)
}
}
// Always check for correctness
assertEquals(
expected, actual,
"Failed on text=\"$text\", substring=\"$substring\", fromIndex=$fromIndex"
)
if (indexOfTime > customTime) {
println(
"text.len=${text.length}, substring.len=${substring.length}, " +
"fromIndex=$fromIndex | " +
"indexOf=${indexOfTime}ns, custom=${customTime}ns"
)
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment