-
-
Save muink/b7f506e4f210633d466c5c8e48440384 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def urid: | |
def uni2num: | |
if 48 <= . and . <= 57 then . - 48 elif 65 <= . and . <= 70 then . - 55 else . - 87 end; | |
# ^ ^ ^ ^ ^ | |
# | | | | 97 | 102 | |
# uni0 ---------- uni9 -48=0-9 uniA ----------- uniF -55=10-15 unia - unif -87=10-15 | |
# de pct-encoded | |
def decode: | |
def loop($i): | |
if $i >= length then empty else 16 * (.[$i+1] | uni2num) + (.[$i+2] | uni2num), loop($i+3) end; | |
# 16 ^ 1 * High4bit + 16 ^ 0 * Low4bit | |
explode | [loop(0)]; | |
def utf82uni: | |
def loop($i): | |
if $i >= length then empty | |
elif .[$i] >= 240 then (.[$i+3]-128) + 64*(.[$i+2]-128) + 4096*(.[$i+1]-128) + 262144*(.[$i]-240), loop($i+4) | |
elif .[$i] >= 224 then (.[$i+2]-128) + 64*(.[$i+1]-128) + 4096*(.[$i]-224), loop($i+3) | |
elif .[$i] >= 192 then (.[$i+1]-128) + 64*(.[$i]-192), loop($i+2) | |
else .[$i], loop($i+1) | |
end; | |
# Ref: https://en.wikipedia.org/wiki/UTF-8#Encoding | |
# . >= 252 -> FC = 6Byte UTF-8 | |
# . >= 248 -> F8 = 5Byte UTF-8 | |
# . >= 240 -> F0 = 4Byte UTF-8 U+10000 - U+10FFFF | |
# . >= 224 -> E0 = 3Byte UTF-8 U+0800 - U+FFFF | |
# . >= 192 -> C0 = 2Byte UTF-8 U+0080 - U+07FF | |
# else 1Byte ASCII U+0000 - U+007F | |
# | |
# 2Byte UTF-8 2 ^ 0 << (6*1) = 64 | |
# 3Byte UTF-8 2 ^ 0 << (6*2) = 4096 | |
# 4Byte UTF-8 2 ^ 0 << (6*3) = 262144 | |
# 5Byte UTF-8 2 ^ 0 << (6*4) = 16777216 | |
# 6Byte UTF-8 2 ^ 0 << (6*5) = 1073741824 | |
[loop(0)]; | |
# Note that URL-encoding implies percent-encoded UTF-8 octets, so we have to | |
# manually reassemble these into codepoints for implode | |
gsub("(?<m>(?:%[[:xdigit:]]{2})+)"; .m | decode | utf82uni | implode); | |
urid |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment