Last active
August 29, 2015 14:04
-
-
Save ttahmouch/5300542abd760ba74f4e to your computer and use it in GitHub Desktop.
RFC 3986 URI En/Decoding in Javascript (Node and Browser)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* A Uniform Resource Identifier (URI) is a compact sequence of characters that identifies an abstract or physical | |
* resource. | |
* @example "http://www.google.com:80/search?query=text#result" | |
* | |
* @return {Uri} | |
* @constructor | |
*/ | |
function Uri() { | |
return this; | |
} | |
/** | |
* 5.3. Component Recomposition | |
* | |
* Parsed URI components can be recomposed to obtain the corresponding URI reference string. | |
* Using pseudocode, this would be: | |
* | |
* result = "" | |
* | |
* if defined(scheme) then | |
* append scheme to result; | |
* append ":" to result; | |
* endif; | |
* | |
* if defined(authority) then | |
* append "//" to result; | |
* append authority to result; | |
* endif; | |
* | |
* append path to result; | |
* | |
* if defined(query) then | |
* append "?" to result; | |
* append query to result; | |
* endif; | |
* | |
* if defined(fragment) then | |
* append "#" to result; | |
* append fragment to result; | |
* endif; | |
* | |
* return result; | |
* | |
* Note that we are careful to preserve the distinction between a component that is undefined, | |
* meaning that its separator was not present in the reference, and a component that is empty, | |
* meaning that the separator was present and was immediately followed by the next component | |
* separator or the end of the reference. | |
* @see http://tools.ietf.org/html/rfc3986#section-5.3 | |
* | |
* @param uri {Object} representing the Uniform Resource Identifier (URI). | |
* @example | |
* { | |
* "uri": "http://www.google.com:80/search?query=text#result", | |
* "scheme": "http", | |
* "authority": "www.google.com:80", | |
* "path": "/search", | |
* "query": "query=text", | |
* "fragment": "result" | |
* } | |
* | |
* @return {string} representing the Uniform Resource Identifier (URI). | |
* @example "http://www.google.com:80/search?query=text#result" | |
*/ | |
Uri.encode = function (uri) { | |
uri = !!uri && typeof uri === 'object' ? uri : {}; | |
var scheme = uri.scheme || '', | |
authority = uri.authority || '', | |
path = uri.path || '', | |
query = uri.query || '', | |
fragment = uri.fragment || '', | |
result = ''; | |
if (scheme) { | |
result += scheme + ':'; | |
} | |
if (authority) { | |
result += '//' + authority; | |
} | |
result += path; | |
if (query) { | |
result += '?' + query; | |
} | |
if (fragment) { | |
result += '#' + fragment; | |
} | |
return result; | |
}; | |
/** | |
* Appendix B. Parsing a URI Reference with a Regular Expression | |
* | |
* As the "first-match-wins" algorithm is identical to the "greedy" disambiguation method used by POSIX regular | |
* expressions, it is natural and commonplace to use a regular expression for parsing the potential five components | |
* of a URI reference. | |
* | |
* The following line is the regular expression for breaking-down a well-formed URI reference into its components. | |
* | |
* ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? | |
* 12 3 4 5 6 7 8 9 | |
* | |
* The numbers in the second line above are only to assist readability; they indicate the reference points for each | |
* subexpression (i.e., each paired parenthesis). We refer to the value matched for subexpression <n> as $<n>. | |
* For example, matching the above expression to | |
* | |
* http://www.ics.uci.edu/pub/ietf/uri/#Related | |
* | |
* results in the following subexpression matches: | |
* | |
* $1 = http: | |
* $2 = http | |
* $3 = //www.ics.uci.edu | |
* $4 = www.ics.uci.edu | |
* $5 = /pub/ietf/uri/ | |
* $6 = <undefined> | |
* $7 = <undefined> | |
* $8 = #Related | |
* $9 = Related | |
* | |
* where <undefined> indicates that the component is not present, as is the case for the query component in the | |
* above example. Therefore, we can determine the value of the five components as | |
* | |
* scheme = $2 | |
* authority = $4 | |
* path = $5 | |
* query = $7 | |
* fragment = $9 | |
* | |
* Going in the opposite direction, we can recreate a URI reference from its components by using the algorithm of | |
* Section 5.3. | |
* @see http://tools.ietf.org/html/rfc3986#appendix-B | |
* | |
* @param uri {string} representing the Uniform Resource Identifier (URI). | |
* @example "http://www.google.com:80/search?query=text#result" | |
* | |
* @return {Object} representing the Uniform Resource Identifier (URI). | |
* @example | |
* { | |
* "uri": "http://www.google.com:80/search?query=text#result", | |
* "scheme": "http", | |
* "authority": "www.google.com:80", | |
* "path": "/search", | |
* "query": "query=text", | |
* "fragment": "result" | |
* } | |
*/ | |
Uri.decode = function (uri) { | |
uri = typeof uri === 'string' ? uri : ''; | |
uri = /^(([^:/?#]+):)?(\/\/([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?/.exec(uri); | |
/** | |
* authority = [ userinfo "@" ] host [ ":" port ] | |
* userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) | |
*/ | |
var authority = /^(([^@]+)@)?(([^:]*)(:(.*))?)/.exec(uri[4] || ''), | |
userinfo = /^([^:]*)(:(.*))?/.exec(authority[2] || ''); | |
return { | |
uri: uri[0] || '', | |
scheme: uri[2] || '', | |
authority: uri[4] || '', | |
path: uri[5] || '', | |
query: uri[7] || '', | |
fragment: uri[9] || '', | |
userinfo: authority[2] || '', | |
/** | |
* Compatibility with URLUtils and Node.JS. | |
* @see https://developer.mozilla.org/en-US/docs/Web/API/URLUtils | |
* @see http://nodejs.org/api/url.html | |
*/ | |
href: uri[0] || '', | |
protocol: uri[1] || '', | |
pathname: uri[5] || '', | |
search: uri[6] || '', | |
hash: uri[8] || '', | |
auth: authority[2] || '', | |
host: authority[3] || '', | |
hostname: authority[4] || '', | |
port: authority[6] || '', | |
username: userinfo[1] || '', | |
password: userinfo[3] || '', | |
origin: uri[2] && authority[3] ? uri[2] + '://' + authority[3] : '' | |
}; | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment