Skip to content

Instantly share code, notes, and snippets.

@aSemy
Last active March 7, 2024 10:43
Show Gist options
  • Save aSemy/c076a8b23c509a318f46d6ff5a1c954a to your computer and use it in GitHub Desktop.
Save aSemy/c076a8b23c509a318f46d6ff5a1c954a to your computer and use it in GitHub Desktop.
Okio convert UTF 8/16/32

Untested

import okio.*
import okio.ByteString.Companion.encodeUtf8
import org.snakeyaml.engine.v2.api.YamlUnicodeReader.CharEncoding
import org.snakeyaml.engine.v2.api.YamlUnicodeReader.CharEncoding.*
import kotlin.jvm.JvmName

/**
 * Convert a [String] to a [BufferedSource].
 *
 * The string _must_ be encoded with UTF-8.
 */
// https://github.com/square/okio/issues/774#issuecomment-703315013
internal fun String.bufferedSource(): BufferedSource = Buffer().write(encodeUtf8())


internal fun Source.convertToUtf8(
    sourceEncoding: CharEncoding
): Source = convertToUtf8(this, sourceEncoding)


@JvmName("_convertToUtf8")
private fun convertToUtf8(
    source: Source,
    sourceEncoding: CharEncoding
): Source {

    val charReader: BufferedSourceCharReader =
        when (sourceEncoding) {
            UTF_8    -> return source // no conversion necessary for UTF-8

            // For UTF-16, read the entire Buffer one char at a time
            UTF_16BE -> BufferedSourceCharReader { it.readShort().toInt().toChar() }

            // For UTF-16LE, also read a char, but swap the first and last bytes
            UTF_16LE -> BufferedSourceCharReader { it.readShortLe().toInt().toChar() }

            // For UTF-32, read the buffer an Int at a time
            UTF_32BE -> BufferedSourceCharReader { it.readInt().toChar() }

            // For UTF-32LE, also read an Int, but swap the first and last bytes
            UTF_32LE -> BufferedSourceCharReader { it.readIntLe().toChar() }
        }

    return UnicodeSource(
        src = source.buffer(),
        charReader = charReader,
    )
}

private class UnicodeSource(
    private val src: BufferedSource,
    private val charReader: BufferedSourceCharReader,
) : Source {

    override fun read(sink: Buffer, byteCount: Long): Long {
        if (!src.request(byteCount)) return -1L

        val initialSinkSize = sink.size

        while (!src.exhausted()) {
            val c = charReader(src)
            sink.writeUtf8CodePoint(c.code)
        }

        return sink.size - initialSinkSize
    }

    override fun close(): Unit = src.close()

    override fun timeout(): Timeout = src.timeout()
}

private fun interface BufferedSourceCharReader {
    operator fun invoke(source: BufferedSource): Char
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment