Created
February 4, 2025 08:38
-
-
Save takapi327/233176b326b8019e55bf6bb1c289987c to your computer and use it in GitHub Desktop.
Regarding scodec performance
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//> using scala 3.6.3 | |
//> using dep org.scodec::scodec-bits:1.2.1 | |
//> using dep org.scodec::scodec-core:2.3.2 | |
import scodec.bits.* | |
import scodec.codecs.* | |
val iterations = 10000 | |
val str = "Hello World!" | |
val lengthBits = BitVector(str.length) | |
val stringBits = BitVector.encodeUtf8(str).toOption.get // Not safe, but only used for verification. | |
val bits = lengthBits ++ stringBits | |
// warm up | |
for (i <- 0 until 10000) { | |
decodeByCodecs() | |
decodeBySplitAt() | |
} | |
val benchmark1Start = System.nanoTime() | |
for (i <- 0 until iterations) { | |
decodeByCodecs() | |
} | |
val benchmark1End = System.nanoTime() - benchmark1Start | |
val benchmark2Start = System.nanoTime() | |
for (i <- 0 until iterations) { | |
decodeBySplitAt() | |
} | |
val benchmark2End = System.nanoTime() - benchmark2Start | |
println(s"benchmark1: ${benchmark1End / 1000000} ms") // 5 ms | |
println(s"benchmark2: ${benchmark2End / 1000000} ms") // 2 ms | |
def decodeByCodecs(): String = | |
(for | |
length <- uint8 | |
str <- bytes(length) | |
yield new String(str.toArray, "UTF-8")).decode(bits).require.value | |
def decodeBySplitAt(): String = | |
val (lengthBits, postLengthBits) = bits.splitAt(8) | |
val length = lengthBits.toInt(false) | |
val strBits = postLengthBits.take(length * 8L) | |
new String(strBits.toByteArray, "UTF-8") |
I also verified the process when using Array and Chunk.
val byteArray = bits.toByteArray
val chunk = fs2.Chunk(byteArray *)
def decodeByStandard(): String =
val length = byteArray(0).toInt & 0xFF
new String(byteArray, 1, length, "UTF-8")
def decodeByChunk(): String =
val length = chunk(0).toInt & 0xFF
val strBytes = chunk.drop(1).take(length).toArray
new String(strBytes, "UTF-8")
The results were as follows, with the direct decoding of Chunk being the fastest.
codecs: 9 ms
splitAt: 3 ms
chunk: 1 ms
array: 4 ms
Full code
//> using scala 3.6.3
//> using dep co.fs2::fs2-core:3.11.0
//> using dep org.scodec::scodec-bits:1.2.1
//> using dep org.scodec::scodec-core:2.3.2
import scodec.bits.*
import scodec.codecs.*
val iterations = 10000
val str = "Hello World!"
val lengthBits = BitVector(str.length)
val stringBits = BitVector.encodeUtf8(str).toOption.get // Not safe, but only used for verification.
val bits = lengthBits ++ stringBits
val byteArray = bits.toByteArray
val chunk = fs2.Chunk(byteArray*)
// warm up
for (i <- 0 until 100000000) {
decodeByCodecs()
decodeBySplitAt()
decodeByChunk()
decodeByStandard()
}
val benchmark1Start = System.nanoTime()
for (i <- 0 until iterations) {
decodeByCodecs()
}
val benchmark1End = System.nanoTime() - benchmark1Start
val benchmark2Start = System.nanoTime()
for (i <- 0 until iterations) {
decodeBySplitAt()
}
val benchmark2End = System.nanoTime() - benchmark2Start
val benchmark3Start = System.nanoTime()
for (i <- 0 until iterations) {
decodeByChunk()
}
val benchmark3End = System.nanoTime() - benchmark3Start
val benchmark4Start = System.nanoTime()
for (i <- 0 until iterations) {
decodeByStandard()
}
val benchmark4End = System.nanoTime() - benchmark4Start
println(s"benchmark1: ${benchmark1End / 1000000} ms") // 9 ms
println(s"benchmark2: ${benchmark2End / 1000000} ms") // 3 ms
println(s"benchmark3: ${benchmark3End / 1000000} ms") // 1 ms
println(s"benchmark4: ${benchmark4End / 1000000} ms") // 4 ms
def decodeByCodecs(): String =
val decoded = uint8.decode(bits).require
val length = decoded.value
val str = bytes(length).decode(decoded.remainder).require.value
new String(str.toArray, "UTF-8")
def decodeBySplitAt(): String =
val (lengthBits, postLengthBits) = bits.splitAt(8)
val length = lengthBits.toInt(false)
val strBits = postLengthBits.take(length * 8L)
new String(strBits.toByteArray, "UTF-8")
def decodeByStandard(): String =
val length = byteArray(0).toInt & 0xFF
new String(byteArray, 1, length, "UTF-8")
def decodeByChunk(): String =
val length = chunk(0).toInt & 0xFF
val strBytes = chunk.drop(1).take(length).toArray
new String(strBytes, "UTF-8")
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I tried not to use flatMap/map for a test and the result is as follows.
Here is the modified code. Is there something wrong with the usage?