|
@file:OptIn(ExperimentalUnsignedTypes::class) |
|
|
|
import java.io.File |
|
import java.util.* |
|
|
|
data class Node( |
|
val left: Node? = null, |
|
val right: Node? = null, |
|
val symbol: UByte? = null, |
|
val value: Int |
|
) |
|
|
|
data class TableEntry(val huffmanCode: Int, val huffmanCodeLength: Int) { |
|
override fun toString() = "TableEntry(huffmanCode=${huffmanCode.toString(2)}, huffmanCodeLength=$huffmanCodeLength)" |
|
} |
|
|
|
data class CompressionInfo( |
|
val signature: String = "LUCAS", |
|
val table: MutableMap<UByte, TableEntry>, |
|
val compressedBytes: List<UByte> |
|
) |
|
|
|
object HuffmanCompressor { |
|
|
|
fun frequencies(input: UByteArray): IntArray { |
|
val frequencies = IntArray(256) { 0 } |
|
input.forEach { frequencies[it.toInt()]++ } |
|
return frequencies |
|
} |
|
|
|
fun tree(frequencies: IntArray): Node? { |
|
val queue = PriorityQueue<Node>(compareBy { it.value }) |
|
frequencies.forEachIndexed { index, frequency -> |
|
if (frequency > 0) { |
|
queue.add(Node(symbol = index.toUByte(), value = frequency)) |
|
} |
|
} |
|
while (queue.size > 1) { |
|
val first = queue.poll() |
|
val second = queue.poll() |
|
val sum = Node(left = first, right = second, value = first.value + second.value) |
|
queue.add(sum) |
|
} |
|
|
|
return queue.poll() |
|
} |
|
|
|
fun table( |
|
node: Node, |
|
currentCode: Int = 0b0, |
|
currentCodeLength: Int = 0, |
|
table: MutableMap<UByte, TableEntry> = mutableMapOf() |
|
): MutableMap<UByte, TableEntry> { |
|
if (node.symbol != null) { |
|
table[node.symbol] = TableEntry(currentCode, currentCodeLength) |
|
} else { |
|
node.left?.let { |
|
table( |
|
node = node.left, |
|
currentCode = (currentCode shl 1) or 0b0, |
|
currentCodeLength = currentCodeLength + 1, |
|
table = table |
|
) |
|
} |
|
|
|
node.right?.let { |
|
table( |
|
node = node.right, |
|
currentCode = (currentCode shl 1) or 0b1, |
|
currentCodeLength = currentCodeLength + 1, |
|
table = table |
|
) |
|
} |
|
} |
|
|
|
return table |
|
} |
|
|
|
fun compression(input: UByteArray, table: MutableMap<UByte, TableEntry>): MutableList<UByte> { |
|
val uBytes = mutableListOf<UByte>() |
|
var nextByte = 0 |
|
var accumulatedLength = 0 |
|
|
|
input.forEach { uByte -> |
|
val entry = table[uByte] ?: error("missing something 💀") |
|
val currCode = entry.huffmanCode |
|
val currLength = entry.huffmanCodeLength |
|
|
|
for (i in currLength - 1 downTo 0) { |
|
val extractedBit = (currCode shr i) and 1 |
|
nextByte = (nextByte shl 1) or extractedBit |
|
accumulatedLength++ |
|
if (accumulatedLength == 8) { |
|
uBytes += nextByte.toUByte() |
|
nextByte = 0 |
|
accumulatedLength = 0 |
|
} |
|
} |
|
} |
|
|
|
if (accumulatedLength > 0) uBytes += nextByte.shl(8 - accumulatedLength).toUByte() |
|
|
|
return uBytes |
|
} |
|
} |
|
|
|
object HuffmanWriter { |
|
|
|
private fun getCompressionInfoFor(input: UByteArray): CompressionInfo { |
|
val frequencies = HuffmanCompressor.frequencies(input) |
|
val tree = HuffmanCompressor.tree(frequencies)!! |
|
val table = HuffmanCompressor.table(tree) |
|
val compression = HuffmanCompressor.compression(input, table) |
|
return CompressionInfo(table = table, compressedBytes = compression) |
|
} |
|
|
|
private fun encodeCompressionInfo(input: UByteArray): MutableList<UByte> { |
|
val info = getCompressionInfoFor(input) |
|
val bytes = mutableListOf<UByte>() |
|
|
|
// put signature |
|
bytes.addAll(info.signature.toByteArray().toUByteArray()) |
|
|
|
// put table length |
|
bytes += info.table.size.toUByte() |
|
|
|
// put table info |
|
info.table.forEach { (symbol, entry) -> |
|
bytes += symbol |
|
bytes += entry.huffmanCodeLength.toUByte() |
|
} |
|
|
|
// put compression bytes |
|
bytes.addAll(info.compressedBytes) |
|
return bytes |
|
} |
|
|
|
fun compressToFile(input: UByteArray, outputPath: String) { |
|
val file = File(outputPath) |
|
if (file.exists()) error("File of path [$outputPath] already exists!") |
|
val encodedCompression = encodeCompressionInfo(input) |
|
|
|
// the final file compression bytes |
|
// println(encodedCompression.map { it.toString(16).padStart(2, '0').uppercase() }) |
|
|
|
file.writeBytes(encodedCompression.toUByteArray().toByteArray()) |
|
} |
|
} |
|
|
|
fun main() { |
|
val input = "eu meu Noah".toByteArray().toUByteArray() |
|
HuffmanWriter.compressToFile(input, "kkkk.huff") |
|
} |
não sei se funciona, ainda não fiz decode dos dados pra saber 💀
Se um dia funcionar, eu comento aqui