Created
June 30, 2025 18:06
-
-
Save gordinmitya/a3d0f7c84ff568bcaf82663f0cee0e5b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Rabin–Karp substring search algorithm | |
class SubstringSearch private constructor(val text: String) { | |
val powBase = LongArray(text.length + 1).also { it[0] = 1L } | |
val hHash = LongArray(text.length + 1) | |
init { | |
for (i in 1 until powBase.size) { | |
powBase[i] = powBase[i - 1] * BASE | |
} | |
for (i in 0 until text.length) { | |
hHash[i + 1] = hHash[i] * BASE + text[i].code | |
} | |
} | |
fun indexOf(substring: String, fromIndex: Int = 0): Int { | |
val m = substring.length | |
if (m == 0) return fromIndex.coerceAtMost(text.length) | |
if (m > text.length) return -1 | |
// Compute the hash of the pattern (needle) | |
var nHash = 0L | |
for (i in substring.indices) { | |
nHash = nHash * BASE + substring[i].code | |
} | |
// Rolling hash search (natural overflow, no %) | |
for (i in fromIndex..text.length - m) { | |
val window = hHash[i + m] - hHash[i] * powBase[m] | |
if (window == nHash) { | |
// Verify by direct comparison to avoid hash collision | |
if (text.regionMatches(i, substring, 0, m)) { | |
return i | |
} | |
} | |
} | |
return -1 | |
} | |
companion object { | |
private const val BASE = 256L | |
@JvmStatic | |
fun prepare(text: String): SubstringSearch { | |
return SubstringSearch(text) | |
} | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import org.junit.jupiter.api.Assertions.assertEquals | |
import org.junit.jupiter.api.RepeatedTest | |
import org.junit.jupiter.api.Test | |
import kotlin.random.Random | |
import kotlin.system.measureNanoTime | |
class SubstringSearchTest { | |
/** Basic deterministic scenarios. */ | |
@Test | |
fun basicMatches() { | |
val search = SubstringSearch.prepare("abracadabra") | |
assertEquals(0, search.indexOf("abra")) // at start | |
assertEquals(0, search.indexOf("abra", 0)) // at start | |
assertEquals(3, search.indexOf("acad")) // in the middle | |
assertEquals(7, search.indexOf("abra", 1)) // later occurrence using fromIndex | |
} | |
/** Whole string search (pattern == text). */ | |
@Test | |
fun substringEqualsText() { | |
val text = "hello" | |
val search = SubstringSearch.prepare(text) | |
assertEquals(0, search.indexOf(text)) | |
} | |
/** Pattern longer than text → no match. */ | |
@Test | |
fun substringLongerThanText() { | |
val search = SubstringSearch.prepare("hi") | |
assertEquals(-1, search.indexOf("hello")) | |
} | |
/** Empty pattern is defined to match immediately at *fromIndex*. */ | |
@Test | |
fun emptySubstring() { | |
val text = "abc" | |
val search = SubstringSearch.prepare(text) | |
assertEquals(0, search.indexOf("")) | |
assertEquals(2, search.indexOf("", 2)) | |
} | |
/** Pattern not present. */ | |
@Test | |
fun notFound() { | |
val search = SubstringSearch.prepare("kotlin") | |
assertEquals(-1, search.indexOf("java")) | |
} | |
/** Behaviour on empty text input. */ | |
@Test | |
fun textEmpty() { | |
val search = SubstringSearch.prepare("") | |
assertEquals(0, search.indexOf("")) // empty pattern trivially matches | |
assertEquals(-1, search.indexOf("x")) | |
} | |
/** Verify that *fromIndex* skips earlier occurrences. */ | |
@Test | |
fun fromIndexSkipsEarlierOccurrence() { | |
val search = SubstringSearch.prepare("aaaaa") | |
assertEquals(0, search.indexOf("aa")) | |
assertEquals(1, search.indexOf("aa", 1)) | |
assertEquals(2, search.indexOf("aa", 2)) | |
} | |
/** | |
* Randomised, property-based stress-test: compare against JDK reference implementation. | |
* Repeated 300× with up to 200-char random ASCII strings. | |
*/ | |
@RepeatedTest(300) | |
fun randomizedMatches() { | |
val rand = Random.Default | |
val len = rand.nextInt(16000, 64000) | |
val text = buildString { | |
repeat(len) { append((rand.nextInt(32, 128)).toChar()) } | |
} | |
val searchObj = SubstringSearch.prepare(text) | |
val subStart = if (text.isEmpty()) 0 else rand.nextInt(0, text.length) | |
val subLen = rand.nextInt(0, text.length - subStart + 1) | |
val substring = text.substring(subStart, subStart + subLen) | |
val fromIndex = if (text.isEmpty()) 0 else rand.nextInt(0, text.length / 10 + 1) | |
// Measure built-in | |
var expected: Int = -1 | |
val indexOfTime = measureNanoTime { | |
repeat(10) { | |
expected = text.indexOf(substring, fromIndex) | |
} | |
} | |
// Measure custom | |
var actual: Int = -1 | |
val customTime = measureNanoTime { | |
repeat(10) { | |
actual = searchObj.indexOf(substring, fromIndex) | |
} | |
} | |
// Always check for correctness | |
assertEquals( | |
expected, actual, | |
"Failed on text=\"$text\", substring=\"$substring\", fromIndex=$fromIndex" | |
) | |
if (indexOfTime > customTime) { | |
println( | |
"text.len=${text.length}, substring.len=${substring.length}, " + | |
"fromIndex=$fromIndex | " + | |
"indexOf=${indexOfTime}ns, custom=${customTime}ns" | |
) | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment