Created
October 30, 2018 01:07
-
-
Save guersam/867f43c8728446af8db552429a405bed to your computer and use it in GitHub Desktop.
Naively decode an arbitrary URI encoded string
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env amm | |
import $ivy.`org.typelevel::cats-core:1.4.0` | |
import cats.implicits._ | |
import java.net.{URLEncoder, URLDecoder} | |
import scala.util.{Try, Success} | |
val charsets = List("utf-8", "x-windows-949") | |
/** | |
* Note that not all charsets can round-trip for every character | |
*/ | |
def roundtrip(str: String)(charsetName: String): Option[String] = | |
Try(URLDecoder.decode(str, charsetName)).toOption.flatMap { decoded => | |
Try(URLEncoder.encode(decoded, charsetName)) match { | |
case Success(`str`) => Some(decoded) | |
case _ => None | |
} | |
} | |
def naiveDecodeUriComponent(str: String): Option[String] = | |
charsets.collectFirstSome(roundtrip(str)) | |
println(naiveDecodeUriComponent("%ED%85%8C%EC%8A%A4%ED%8A%B8")) // Some("테스트") | |
println(naiveDecodeUriComponent("%C5%D7%BD%BA%C6%AE")) // Some("테스트") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment