Created
January 8, 2019 11:45
-
-
Save Jire/53628c459040bba73e74034ffa16a15c to your computer and use it in GitHub Desktop.
Incredibly fast zero-allocation chat filter written in Kotlin
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package ps.eden.server | |
import it.unimi.dsi.fastutil.objects.ObjectArrayList | |
import it.unimi.dsi.fastutil.objects.ObjectList | |
import java.util.concurrent.ForkJoinPool | |
/** | |
* Incredibly fast zero-allocation chat filter supporting: | |
* - ABSOLUTELY NO ALLOCATIONS AFTER INITIAL LOAD! (i.e., no garbage) | |
* - Configurable white (allowed) and black (blocked) lists. | |
* - Full word replace (i.e., "idiot" is filtered by "1diot", "1d10t", "id1o+", etc.) | |
* - Configurable l33tspeak character mapping. | |
* - Configurable filtered out (ignored) characters. | |
* - Thread safe for checking if filtered. | |
* | |
* @author Thomas G. P. Nappo (Jire) | |
*/ | |
object ChatFilter { | |
private val whitelist = arrayOf( | |
ServerConstants.GAME_WEBSITE_SHORT, | |
"runescape", | |
"landscape", | |
"machinescape", | |
"fashionscape", | |
"07scape", | |
"2007scape", | |
"osrscape", | |
"osrsscape", | |
"moparscape", | |
"didyscape" | |
) | |
private val blacklist = arrayOf( | |
/* Bad words */ | |
"nigger", | |
/* Advertisement basics */ | |
"www", | |
".com", | |
".org", | |
".net", | |
".io", | |
".ps", | |
".tk", | |
"dotcom", | |
"dotorg", | |
"dotnet", | |
"dottk", | |
/* General server coverage */ | |
"scape", | |
/* Individual server names */ | |
"kratos", | |
"atlas", | |
"osscape", | |
"alora", | |
"elkoy", | |
"osrune", | |
"guthixp", | |
"dawntained", | |
"locopk", | |
"imagineps", | |
"nearreal", | |
"pkhonor", | |
"dreamsc", | |
"manicps", | |
"imagineps", | |
"draganoth", | |
"alosps", | |
"rsps2", | |
"lostisle", | |
"necrotic", | |
"redrune", | |
"deathwish", | |
"pkowned", | |
"osbase", | |
"beastpk", | |
"roatpk", | |
"rsgenesis", | |
"trinityps", | |
"boxrune", | |
"runique", | |
"furiousp", | |
"novus", | |
"ikov", | |
"joinmy", | |
"atarax", | |
"nardahp", | |
"illerai", | |
"letspk", | |
"ratedpixel", | |
"cloudnine", | |
"viceos", | |
"deprivedr", | |
"exoria", | |
"simplicityp", | |
"cruxp", | |
"ospkz", | |
"scapewar", | |
"amberp", | |
"diviner", | |
"osunity", | |
"amulius", | |
"zenyteps", | |
"zenyteosrs" | |
) | |
@JvmStatic | |
fun load() { | |
for (blacklistedWord in blacklist) { | |
flag(blacklistedWord) | |
} | |
} | |
@JvmStatic | |
fun isFiltered(message: String): Boolean { | |
if (message.isEmpty()) { | |
return false | |
} | |
val msb = messageSB.get() | |
msb.setLength(0) | |
for (i in 0..message.lastIndex) { | |
msb.append(Character.toLowerCase(message[i])) | |
} | |
val msb2 = messageSB_Second.get() | |
msb2.setLength(0) | |
for (whitelistedWord in whitelist) { | |
var start = 0 | |
var end = msb.indexOf(whitelistedWord, start) | |
if (end > -1) { | |
msb2.setLength(0) | |
val replLength = whitelistedWord.length | |
while (end != -1) { | |
msb2.append(message, start, end).append("") | |
start = end + replLength | |
end = msb.indexOf(whitelistedWord, start) | |
} | |
msb2.append(message, start, message.length) | |
msb.setLength(0) | |
msb.append(msb2) | |
} | |
} | |
blockedWords.fastIterate { | |
if (msb.indexOfFast(it) > 0) { | |
return@isFiltered true | |
} | |
} | |
val sb = filteredSB.get() | |
sb.setLength(0) | |
for (i in 0..msb.lastIndex) { | |
val c = msb[i] | |
if (!filteredOutChars.contains(c)) { | |
sb.append(c) | |
} | |
} | |
blockedWords.fastIterate { | |
if (sb.indexOfFast(it) > 0) { | |
return@isFiltered true | |
} | |
} | |
return false | |
} | |
@JvmStatic | |
fun flag(plainWord: CharSequence) { | |
ForkJoinPool.commonPool().execute { | |
blockedWords.put(plainWord) | |
val sb = flagSB.get() | |
val li = plainWord.lastIndex | |
for (i in 0..li) { | |
val c = plainWord[i] | |
for ((normal, leetspeak) in normalToLeetspeak) { | |
if (c == normal) { | |
/* First, build our initial string */ | |
sb.setLength(0) // reset the string builder | |
sb.appendSubstring(plainWord, 0, i - 1) | |
sb.append(leetspeak) | |
sb.appendSubstring(plainWord, i + 1, li) | |
val blockedWord = sb.toString() | |
if (blockedWords.put(blockedWord)) { | |
/* If we added it, recursively do it again. */ | |
flag(blockedWord) | |
} | |
} | |
} | |
} | |
} | |
} | |
private val blockedWords: ObjectList<CharSequence> = ObjectArrayList() | |
private val flagSB = ThreadLocal.withInitial { StringBuilder() } | |
private val filteredSB = ThreadLocal.withInitial { StringBuilder() } | |
private val messageSB = ThreadLocal.withInitial { StringBuilder() } | |
private val messageSB_Second = ThreadLocal.withInitial { StringBuilder() } | |
private val normalToLeetspeak = arrayOf( | |
'o' to '0', | |
'i' to '1', | |
'l' to '1', | |
't' to '+', | |
'e' to '3', | |
'i' to '!', | |
'l' to '!', | |
's' to '$', | |
'a' to '&', | |
'a' to '@', | |
'c' to '(', | |
'd' to ')', | |
'c' to '{', | |
'd' to '}', | |
'c' to '[', | |
'd' to ']', | |
'd' to '0', | |
'g' to '6', | |
't' to '7', | |
'g' to '9', | |
's' to '5', | |
'a' to '4' | |
) | |
private val filteredOutChars = charArrayOf( | |
' ', '-', '_', ',', '=', '<', '>', '?', '|', ';', '#', '\\', '/' | |
) | |
private fun ObjectList<CharSequence>.put(blockedWord: CharSequence): Boolean { | |
synchronized(this) { | |
if (!contains(blockedWord)) { | |
add(blockedWord) | |
return true | |
} | |
return false | |
} | |
} | |
private fun StringBuilder.appendSubstring(sequence: CharSequence, start: Int, end: Int) { | |
for (i in start..end) { | |
if (i >= sequence.length) { | |
return | |
} | |
append(sequence[i]) | |
} | |
} | |
private fun CharSequence.indexOfFast(source: CharSequence): Int { | |
val sourceCount = count() | |
val sourceOffset = 0 | |
val targetOffset = 0 | |
val targetCount = source.count() | |
var fromIndex = 0//fromIndex | |
if (fromIndex >= sourceCount) { | |
return if (targetCount == 0) sourceCount else -1 | |
} | |
if (fromIndex < 0) { | |
fromIndex = 0 | |
} | |
if (targetCount == 0) { | |
return fromIndex | |
} | |
val first = source[targetOffset] | |
val max = sourceOffset + (sourceCount - targetCount) | |
var i = sourceOffset + fromIndex | |
while (i <= max) { | |
/* Look for first character. */ | |
if (source[i] != first) { | |
while (++i <= max && source[i] != first); | |
} | |
/* Found first character, now look at the rest of v2 */ | |
if (i <= max) { | |
var j = i + 1 | |
val end = j + targetCount - 1 | |
var k = targetOffset + 1 | |
while (j < end && source[j] == source[k]) { | |
j++ | |
k++ | |
} | |
if (j == end) { | |
/* Found whole string. */ | |
return i - sourceOffset | |
} | |
} | |
i++ | |
} | |
return -1 | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment