Created
August 23, 2014 18:20
-
-
Save clojens/6675d5a68300c909c9b8 to your computer and use it in GitHub Desktop.
RFC 3986 Uniform Resource Identifier (URI): Generic Syntax
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
;; | |
;; Copied from the w3c document and swapping quoting (double -> single) | |
;; for benefit of inline LightTable and Instaparse operability | |
;; | |
;; http://tools.ietf.org/html/rfc3986[RFC 3986 URI Generic Syntax 2005] | |
;; | |
(require '[alembic.still :refer [distill* distill]] | |
'(instaparse [core :as insta] | |
[combinators :as ipcombo])) | |
(def uri | |
(insta/parser | |
"URI = scheme ':' hier-part [ '?' query ] [ '#' fragment ] | |
hier-part = '//' authority path-abempty | |
/ path-absolute | |
/ path-rootless | |
/ path-empty | |
URI-reference = URI / relative-ref | |
absolute-URI = scheme ':' hier-part [ '?' query ] | |
relative-ref = relative-part [ '?' query ] [ '#' fragment ] | |
relative-part = '//' authority path-abempty | |
/ path-absolute | |
/ path-noscheme | |
/ path-empty | |
scheme = ALPHA *( ALPHA / DIGIT / '+' / '-' / '.' ) | |
authority = [ userinfo '@' ] host [ ':' port ] | |
userinfo = *( unreserved / pct-encoded / sub-delims / ':' ) | |
host = IP-literal / IPv4address / reg-name | |
port = *DIGIT | |
IP-literal = '[' ( IPv6address / IPvFuture ) ']' | |
IPvFuture = 'v' 1*HEXDIG '.' 1*( unreserved / sub-delims / ':' ) | |
IPv6address = 6( h16 ':' ) ls32 | |
/ '::' 5( h16 ':' ) ls32 | |
/ [ h16 ] '::' 4( h16 ':' ) ls32 | |
/ [ *1( h16 ':' ) h16 ] '::' 3( h16 ':' ) ls32 | |
/ [ *2( h16 ':' ) h16 ] '::' 2( h16 ':' ) ls32 | |
/ [ *3( h16 ':' ) h16 ] '::' h16 ':' ls32 | |
/ [ *4( h16 ':' ) h16 ] '::' ls32 | |
/ [ *5( h16 ':' ) h16 ] '::' h16 | |
/ [ *6( h16 ':' ) h16 ] '::' | |
h16 = 1*4HEXDIG | |
ls32 = ( h16 ':' h16 ) / IPv4address | |
IPv4address = dec-octet '.' dec-octet '.' dec-octet '.' dec-octet | |
dec-octet = DIGIT ; 0-9 | |
/ %x31-39 DIGIT ; 10-99 | |
/ '1' 2DIGIT ; 100-199 | |
/ '2' %x30-34 DIGIT ; 200-249 | |
/ '25' %x30-35 ; 250-255 | |
reg-name = *( unreserved / pct-encoded / sub-delims ) | |
path = path-abempty ; begins with '/' or is empty | |
/ path-absolute ; begins with '/' but not '//' | |
/ path-noscheme ; begins with a non-colon segment | |
/ path-rootless ; begins with a segment | |
/ path-empty ; zero characters | |
path-abempty = *( '/' segment ) | |
path-absolute = '/' [ segment-nz *( '/' segment ) ] | |
path-noscheme = segment-nz-nc *( '/' segment ) | |
path-rootless = segment-nz *( '/' segment ) | |
path-empty = 0<pchar> | |
segment = *pchar | |
segment-nz = 1*pchar | |
segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / '@' ) | |
; non-zero-length segment without any colon ':' | |
pchar = unreserved / pct-encoded / sub-delims / ':' / '@' | |
query = *( pchar / '/' / '?' ) | |
fragment = *( pchar / '/' / '?' ) | |
pct-encoded = '%' HEXDIG HEXDIG | |
unreserved = ALPHA / DIGIT / '-' / '.' / '_' / '~' | |
reserved = gen-delims / sub-delims | |
gen-delims = ':' / '/' / '?' / '#' / '[' / ']' / '@' | |
sub-delims = '!' / '$' / '&' / \"'\" / '(' / ')' | |
/ '*' / '+' / ',' / ';' / '=' | |
" | |
:input-format :abnf)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment