Last active
December 15, 2015 20:40
-
-
Save ladislav/5319674 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
make object! [ | |
; Private Unicode Area for encoded delimiters and octets | |
url-pua-start: #"^(e000)" | |
url-pua-end: #"^(e0ff)" | |
delimiter: charset ":/?#[]@!$&'()*+,;=" | |
hex-digit: charset [#"0" - #"9" #"a" - #"f" #"A" - #"F"] | |
ascii: charset [#"^(00)" - #"^(7f)"] | |
two-byte-start: charset [#"^(c0)" - #"^(df)"] | |
three-byte-start: charset [#"^(e0)" - #"^(ef)"] | |
four-byte-start: charset [#"^(f0)" - #"^(f7)"] | |
continuation: charset [#"^(80)" - #"^(bf)"] | |
bmp-utf-8: [ | |
ascii | |
| | |
two-byte-start continuation | |
| | |
three-byte-start 2 continuation | |
] | |
set 'load-url func [ | |
source [string!] | |
/local result here percent-group octet octet-group success | |
] [ | |
result: make url! 0 | |
octet-group: copy #{} | |
parse source [ | |
any [ | |
; percent encoding | |
copy percent-group | |
[ | |
#"%" | |
[ | |
2 hex-digit | |
| | |
(do make error! "Invalid percent encoding") | |
] | |
] | |
( | |
octet: to integer! first dehex percent-group | |
append octet-group octet | |
) | |
| | |
; process octet group | |
( | |
parse octet-group [ | |
any [ | |
; escaped delimiter? | |
set character delimiter | |
(append result add url-pua-start character) | |
| | |
; BMP UTF-8? | |
copy character bmp-utf-8 | |
( | |
character: first to string! character | |
success: either all [ | |
url-pua-start <= character | |
url-pua-end >= character | |
] [ | |
; URL-PUA CP | |
[end skip] | |
] [ | |
; public Unicode CP | |
append result character | |
none | |
] | |
) | |
success | |
| | |
; just an octet | |
set character skip | |
(append result add url-pua-start character) | |
] | |
] | |
octet-group: copy #{} | |
) | |
set character skip | |
( | |
either all [ | |
url-pua-start <= character | |
url-pua-end >= character | |
] [ | |
; URL-PUA CP | |
character: to binary! character | |
parse character [ | |
any [ | |
set character skip | |
(append result add url-pua-start character) | |
] | |
] | |
] [ | |
append result character | |
] | |
) | |
] | |
] | |
result | |
] | |
encodable-chars: charset [ | |
; control characters | |
#"^(00)" - #"^(1f)" #"^(7f)" | |
; spaces | |
#" " #"^(a0)" | |
; percent | |
#"%" | |
] | |
set 'mold-url func [ | |
value [url!] | |
/local result character | |
] [ | |
result: copy "" | |
parse value [ | |
any [ | |
set character skip | |
( | |
either all [ | |
url-pua-start <= character | |
url-pua-end >= character | |
] [ | |
; an octet | |
append result #"%" | |
character: to char! character - url-pua-start | |
character: append copy "" character | |
append result enbase/base character 16 | |
] [ | |
; public Unicode CP | |
either find encodable-chars character [ | |
; this character has to be encoded | |
append result #"%" | |
character: append copy "" character | |
append result enbase/base character 16 | |
] [ | |
append result character | |
] | |
] | |
) | |
] | |
] | |
result | |
] | |
] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment