Created
December 4, 2013 20:23
-
-
Save SimonSapin/7794849 to your computer and use it in GitHub Desktop.
WIP URL parser for Rust, by @jgraham
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use std::str; | |
use std::iterator::{Iterator}; | |
use std::str::{eq, eq_slice, ByteIterator}; | |
use std::vec; | |
use std::ops; | |
use std::char; | |
struct IPv6Address { | |
data: [u16, ..8] | |
} | |
impl Eq for IPv6Address { | |
fn eq(&self, other: &IPv6Address) -> bool { | |
for i in range(0,8) { | |
let j = i as uint; | |
if self.data[j] != other.data[j] { | |
return false; | |
} | |
}; | |
true | |
} | |
} | |
impl Clone for IPv6Address { | |
fn clone(&self) -> IPv6Address { | |
let mut new = IPv6Address::new(); | |
for i in range(0,8) { | |
let j = i as uint; | |
new.data[j] = self.data[j] | |
}; | |
new | |
} | |
} | |
#[deriving(Eq, Clone)] | |
enum Host { | |
NoHost, | |
DomainHost(~str), | |
IPv6Host(IPv6Address), | |
} | |
#[deriving(Eq, Clone)] | |
enum Scheme { | |
EmptyScheme, //Initial value | |
FTPScheme, | |
FileScheme, | |
GopherScheme, | |
HTTPScheme, | |
HTTPSScheme, | |
WSScheme, | |
WSSScheme, | |
OtherScheme(~str) | |
} | |
pub struct ParsedURL { | |
scheme: Scheme, | |
scheme_data: ~str, | |
username: ~str, | |
password: Option<~str>, | |
host: Host, | |
port: ~str, | |
path: ~[~str], | |
query: Option<~str>, | |
fragment: Option<~str>, | |
relative: bool | |
} | |
impl Scheme { | |
pub fn from_str(string: &str) -> Scheme { | |
match string { | |
"ftp" => FTPScheme, | |
"file" => FileScheme, | |
"gopher" => GopherScheme, | |
"http" => HTTPScheme, | |
"https" => HTTPSScheme, | |
"ws" => WSSScheme, | |
"wss" => WSSScheme, | |
_ => OtherScheme(string.to_owned()) | |
} | |
} | |
pub fn is_relative(&self) -> bool { | |
match *self { | |
FTPScheme | FileScheme | GopherScheme | HTTPScheme | HTTPSScheme | WSScheme | WSSScheme => true, | |
_ => false | |
} | |
} | |
pub fn default_port(&self) -> Option<~str> { | |
match *self { | |
FTPScheme => Some(~"21"), | |
GopherScheme => Some(~"70"), | |
HTTPScheme => Some(~"80"), | |
HTTPSScheme => Some(~"443"), | |
WSScheme => Some(~"80"), | |
WSSScheme => Some(~"443"), | |
_ => None | |
} | |
} | |
} | |
#[deriving(Eq)] | |
enum ParserState { | |
SchemeStartState, | |
SchemeState, | |
SchemeDataState, | |
NoSchemeState, | |
RelativeOrAuthorityState, | |
RelativeState, | |
RelativeSlashState, | |
AuthorityFirstSlashState, | |
AuthoritySecondSlashState, | |
AuthorityIgnoreSlashesState, | |
AuthorityState, | |
FileHostState, | |
HostState, | |
HostnameState, | |
PortState, | |
RelativePathStartState, | |
RelativePathState, | |
QueryState, | |
FragmentState | |
} | |
pub struct SeekableCharIterator { | |
priv string: ~str, | |
priv len: uint, | |
priv pos: uint | |
} | |
impl Iterator<char> for SeekableCharIterator { | |
fn next(&mut self) -> Option<char> { | |
if self.pos >= self.len { | |
None | |
} else { | |
let rv = Some(self.string[self.pos] as char); | |
self.pos += 1; | |
rv | |
} | |
} | |
fn size_hint(&self) -> (uint, Option<uint>) { | |
(self.len - self.pos, Some(self.len - self.pos)) | |
} | |
} | |
impl SeekableCharIterator { | |
pub fn new(string: ~str) -> SeekableCharIterator { | |
SeekableCharIterator { | |
len: string.len(), | |
string: string, | |
pos: 0 | |
} | |
} | |
pub fn peek(&self, chars: uint) -> Option<char> { | |
if chars + self.pos - 1> self.len { | |
None | |
} else { | |
Some(self.string[chars + self.pos - 1] as char) | |
} | |
} | |
pub fn seek(&mut self, pos: uint) { | |
if pos >= self.len { | |
self.pos = self.len; | |
} else { | |
self.pos = pos; | |
} | |
} | |
pub fn rewind(&mut self, n: uint) { | |
if (self.pos > 0) { | |
self.pos -= n | |
} | |
} | |
} | |
enum URLParseError { | |
InvalidURL | |
} | |
impl ParsedURL { | |
pub fn new(scheme: Scheme, | |
scheme_data: ~str, | |
username: ~str, | |
password: Option<~str>, | |
host: Host, | |
port: ~str, | |
path: ~[~str], | |
query: Option<~str>, | |
fragment: Option<~str>, | |
relative: bool) -> ParsedURL { | |
ParsedURL {scheme: scheme, | |
scheme_data: scheme_data, | |
username: username, | |
password: password, | |
host: host, | |
port: port, | |
path: path, | |
query: query, | |
fragment: fragment, | |
relative: relative} | |
} | |
pub fn parse(raw_input: &str, base_url: Option<&ParsedURL>, encoding: Option<&str>, mut initial_url: Option<ParsedURL>, state_override: Option<ParserState>) -> Result<Option<ParsedURL>, URLParseError> { | |
let (url_, input_) = match initial_url { | |
Some(x) => (x, raw_input), | |
None => { | |
let mut new_url = ParsedURL::new(EmptyScheme, ~"", ~"", None, NoHost, ~"", ~[], None, None, false); | |
//Need to check this is the right chars | |
(new_url, raw_input.trim()) | |
} | |
}; | |
let mut url = url_; | |
let mut char_iter = SeekableCharIterator::new(input_.to_owned()); | |
let mut state = match state_override { | |
Some(state) => state, | |
None => SchemeStartState | |
}; | |
let mut encoding_override = match encoding { | |
Some(x) => x, | |
None => "utf-8" | |
}; | |
//There is possibly a better type to use here | |
let mut buf = ~""; | |
let mut at_flag = false; | |
let mut square_paren_flag = false; | |
loop { | |
let maybe_c = char_iter.next(); | |
match maybe_c { | |
Some(c) => { | |
//Normal character handling | |
match state { | |
SchemeStartState => { | |
match c { | |
'a'..'z' | 'A'..'Z' => { | |
buf.push_char(char_to_lower(c)); | |
state = SchemeState; | |
}, | |
_ => { | |
match state_override { | |
Some(state) => {return Err(InvalidURL);} | |
None => { | |
char_iter.rewind(1); | |
state = NoSchemeState; | |
} | |
} | |
} | |
} | |
}, | |
SchemeState => | |
match c { | |
'a'..'z' | 'A'..'Z' | '+' | '-' | '.' => { | |
buf.push_char(char_to_lower(c)); | |
}, | |
':' => { | |
let scheme = Scheme::from_str(buf); | |
buf = ~""; | |
if state_override.is_some() { | |
return Ok(None); | |
} | |
if scheme.is_relative() { | |
url.relative = true; | |
} | |
if scheme == FileScheme { | |
state = RelativeState; | |
} else if (url.relative) { | |
if (base_url.is_some() && | |
base_url.unwrap().scheme == scheme) { | |
state = RelativeOrAuthorityState; | |
} else { | |
state = AuthorityFirstSlashState; | |
} | |
} else { | |
state = SchemeDataState; | |
} | |
url.scheme = scheme; | |
}, | |
_ => { | |
match state_override { | |
Some(x) => { | |
//break if we have EOF, but not sure if we can get here with EOF | |
return Err(InvalidURL); | |
}, | |
None => { | |
buf = ~""; | |
state = NoSchemeState; | |
char_iter.seek(0); | |
} | |
} | |
} | |
}, | |
SchemeDataState => { | |
match c { | |
'?' => { | |
url.scheme_data = buf; | |
buf = ~""; | |
url.query = Some(~""); | |
state = QueryState; | |
}, | |
'#' => { | |
url.scheme_data = buf; | |
buf = ~""; | |
url.fragment = Some(~""); | |
state = FragmentState; | |
}, | |
_ => { | |
if c != '%' && !is_url_char(c) { | |
return Err(InvalidURL); | |
} else if (c == '%' && | |
!(unwrap_bool(is_hex_char, char_iter.peek(1)) && | |
unwrap_bool(is_hex_char, char_iter.peek(2)))) { | |
return Err(InvalidURL); | |
} else if (c == '\x09' || c == '\x0a' || c == '\x0d') { | |
//Ignore these characters | |
} else { | |
buf.push_str(utf8_percent_encode(c, SimpleEncodeSet)); | |
} | |
} | |
} | |
}, | |
NoSchemeState => { | |
if base_url.is_none() || !base_url.unwrap().relative { | |
return Err(InvalidURL); | |
} else { | |
state = RelativeState; | |
char_iter.rewind(1); | |
} | |
}, | |
RelativeOrAuthorityState => { | |
let next = char_iter.peek(1); | |
if (c == '/' && next.is_some() && next.unwrap() == '/') { | |
state = AuthorityIgnoreSlashesState; | |
char_iter.next(); | |
} else { | |
//XXX non-fatal parse error | |
char_iter.rewind(1); | |
state = RelativeState; | |
} | |
}, | |
RelativeState => { | |
let base = base_url.expect("In relative state we must have a base url"); | |
url.relative = true; | |
if url.scheme != FileScheme && base_url.is_some() { | |
url.scheme = base.scheme.clone(); | |
} | |
//Need to deal with EOF also | |
match c { | |
'\\' | '/' => state = RelativeSlashState, | |
'?' => { | |
url.host = base.host.clone(); | |
url.port = base.port.clone(); | |
url.path = base.path.clone(); | |
url.query = Some(~""); | |
state = QueryState; | |
}, | |
'#' => { | |
url.host = base.host.clone(); | |
url.port = base.port.clone(); | |
url.path = base.path.clone(); | |
url.query = base.query.clone(); | |
url.fragment = Some(~""); | |
state = FragmentState; | |
}, | |
_ => { | |
let next = char_iter.peek(1); | |
let second = char_iter.peek(2); | |
if (url.scheme != FileScheme || | |
!is_ascii_alpha(c) || | |
!(next == Some(':') || | |
next == Some('|')) || | |
!(second == Some('/') || | |
second == Some('\\') || | |
second == Some('?') || | |
second == Some('#'))) { | |
url.host = base.host.clone(); | |
url.path = base.path.clone(); | |
url.port = base.port.clone(); | |
url.path.pop(); //??? "And then pop URL's path" | |
} | |
} | |
} | |
}, | |
RelativeSlashState => { | |
let base = base_url.expect("In relative slash state we must have a base url"); | |
match c { | |
'\\' | '/' => { | |
if url.scheme == FileScheme { | |
state = FileHostState; | |
} else { | |
state = AuthorityIgnoreSlashesState; | |
} | |
}, | |
_ => { | |
if url.scheme != FileScheme { | |
url.host = base.host.clone(); | |
url.port = base.port.clone(); | |
} | |
state = RelativePathState; | |
char_iter.rewind(1); | |
} | |
} | |
}, | |
AuthorityFirstSlashState => { | |
match c { | |
'/' => state = AuthoritySecondSlashState, | |
_ => { | |
state = AuthorityIgnoreSlashesState; | |
char_iter.rewind(1); | |
} | |
} | |
}, | |
AuthoritySecondSlashState => { | |
state = AuthorityIgnoreSlashesState; | |
if c != '/' { | |
char_iter.rewind(1); | |
} | |
}, | |
AuthorityIgnoreSlashesState => { | |
if c != '/' && c != '\\' { | |
state = AuthorityState; | |
char_iter.rewind(1); | |
} | |
}, | |
AuthorityState => { | |
match c { | |
'@' => { | |
if at_flag { | |
let mut new_buf = ~"%40"; | |
new_buf.push_str(buf); | |
buf = new_buf; | |
} | |
at_flag = true; | |
let mut target = ~""; | |
for cp in buf.iter() { | |
if (cp == '\x09' || | |
cp == '\x0a' || | |
cp == '\x0d') { | |
loop; | |
} | |
if cp == ':' && url.password.is_none() { | |
url.password = Some(~""); | |
url.username.push_str(target.clone()); | |
target = ~""; | |
} else { | |
target.push_str(utf8_percent_encode(cp, DefaultEncodeSet)) | |
} | |
} | |
match url.password { | |
Some(ref mut x) => { | |
x.push_str(target); | |
}, | |
None => url.username.push_str(target) | |
} | |
}, | |
'/' | '\\' | '?' | '#' => { | |
char_iter.rewind(buf.len()); | |
buf = ~""; | |
state = HostState; | |
}, | |
_ => { | |
buf.push_char(c); | |
} | |
} | |
}, | |
FileHostState => { | |
match c { | |
'/' | '\\' | '?' | '#' => { | |
if (buf.len() == 2 && | |
is_ascii_alpha(buf[0] as char) && | |
(buf[1] as char == ':' || | |
buf[1] as char == '|')) { | |
state = RelativePathState; | |
} else if eq_slice(buf, &"") { | |
state = RelativePathStartState; | |
} else { | |
let host = host_parse(buf); | |
match host { | |
Some(x) => { | |
url.host = x; | |
state = RelativePathStartState; | |
}, | |
None => { | |
return Err(InvalidURL); | |
} | |
} | |
} | |
}, | |
'\x09' | '\x0a' | '\x0d' => { | |
//parse error | |
}, | |
_ => { | |
buf.push_char(c); | |
} | |
} | |
}, | |
HostState | HostnameState => { | |
if !square_paren_flag && c == ':' { | |
let host = host_parse(buf); | |
match host { | |
None => return Ok(None), | |
Some(x) => { | |
url.host = x; | |
buf = ~""; | |
state = PortState; | |
} | |
} | |
} else { | |
match c { | |
'/' | '\\' | '?' | '#' => { | |
char_iter.rewind(1); | |
let host = host_parse(buf); | |
match host { | |
Some(x) => { | |
url.host = x; | |
buf = ~""; | |
state = RelativePathStartState; | |
if state_override.is_some() { | |
return Ok(None); | |
} | |
}, | |
None => return Err(InvalidURL) | |
} | |
}, | |
'\x09' | '\x0A' | '\x0D' => { | |
//Do nothing | |
}, | |
_ => { | |
if c == '[' { | |
square_paren_flag = true; | |
} else if c == ']' { | |
square_paren_flag = false; | |
}; | |
buf.push_char(c); | |
} | |
} | |
} | |
}, | |
PortState => { | |
if c.is_digit_radix(10) { | |
buf.push_char(c); | |
} else if (c == '#' || c == '\\' || c == '/' || c == '?' || | |
state_override.is_some()) { | |
while buf[0] as char == '\x30' && buf.len() > 1 { | |
buf = buf.slice(1, buf.len()).to_owned(); | |
} | |
match url.scheme.default_port() { | |
Some(p) => { | |
if eq_slice(p, buf) { | |
buf = ~""; | |
} | |
}, | |
None => {} | |
} | |
url.port = buf; | |
if state_override.is_some() { | |
return Ok(None); | |
} | |
buf = ~""; | |
state = RelativePathStartState; | |
char_iter.rewind(1); | |
} else if c == '\x09' || c == '\x0A' || c == '\x0D' { | |
//Do nothing | |
} else { | |
return Ok(None); | |
} | |
}, | |
RelativePathStartState => { | |
state = RelativePathState; | |
if c != '\\' && c != '/' { | |
char_iter.rewind(1); | |
} | |
}, | |
RelativePathState => { | |
if (c == '/' || c == '\\' || | |
(state_override.is_none() && | |
c == '?' || c == '#')) { | |
if eq_slice(buf, &"%2e") { | |
buf = ~"."; | |
} else if (eq_slice(buf, &".%2e") || | |
eq_slice(buf, &"%2e%2e") || | |
eq_slice(buf, &"%2e.")) { | |
buf = ~".."; | |
} | |
if eq_slice(buf, &"..") { | |
url.path.pop_opt(); | |
if c != '\\' && c != '/' { | |
url.path.push(~""); | |
} | |
} else if !eq_slice(buf, &".") { | |
if (url.scheme == FileScheme && url.path.is_empty() && | |
buf.len() == 2 && buf[1] as char == '|') { | |
buf.pop_char(); | |
buf.push_char(':'); | |
} | |
url.path.push(buf); | |
} | |
buf = ~""; | |
if c == '?' { | |
state = QueryState; | |
url.query = Some(~""); | |
} else if c == '#' { | |
state = FragmentState; | |
url.fragment = Some(~""); | |
} | |
} else if (c == '\x09' || c == '\x0A' || c == '\x0D') { | |
//Do nothing | |
} else { | |
buf.push_str(utf8_percent_encode(c, DefaultEncodeSet)); | |
} | |
}, | |
QueryState => { | |
if state_override.is_none() && c == '#' { | |
if url.relative { | |
encoding_override = "utf-8"; | |
//TODO Now we should encode the buffer | |
for byte in buf.byte_iter() { | |
let char_str = match byte { | |
0..0x20 | 0x7F..0xFF | 0x22 | 0x23 | 0x3C | | |
0x3E | 0x60 => { | |
percent_encode(byte as char) | |
}, | |
_ => (byte as char).to_str() | |
}; | |
url.query.unwrap().push_str(char_str); | |
} | |
}; | |
let buf = ~""; | |
if c == '#' { | |
state = FragmentState; | |
url.fragment = Some(~""); | |
} | |
} else if (c == '\x09' || c == '\x0A' || c == '\x0D') { | |
//Do nothing | |
} else { | |
buf.push_char(c); | |
} | |
}, | |
FragmentState => { | |
match c { | |
'\x09' | '\x0A' | '\x0D' => {}, | |
_ => { | |
url.fragment.expect("Fragment cannot be None").push_str(utf8_percent_encode(c, SimpleEncodeSet)); | |
} | |
} | |
} | |
_ => fail!("Not implemented") | |
} | |
}, | |
None => { | |
//EOF handling | |
match state { | |
SchemeDataState => { | |
url.scheme_data = buf; | |
} | |
_ => {fail!("Not implemented")} | |
} | |
break; | |
} | |
} | |
} | |
Ok(Some(url)) | |
} | |
} | |
fn unwrap_bool<T>(f: &fn(T)->bool, x: Option<T>) -> bool { | |
match x { | |
None => false, | |
Some(v) => f(v) | |
} | |
} | |
fn is_url_char(c: char) -> bool { | |
match c { | |
'a'..'z' | | |
'A'..'Z' | | |
'0'..'9' | '!' | '$' | '&' | '\'' | '(' | ')' | '*' | '+' | ',' | '-' | '.' | '/' | ':' | ';' | '=' | '?' | '@' | '_' | '~' | '\u00A0'..'\uD7FF' | '\uE000'..'\uFDCF' | '\uFDF0'..'\uFFEF' | '\U00010000'..'\U0001FFFD' | '\U00020000'..'\U0002FFFD' | '\U00030000'..'\U0003FFFD' | '\U00040000'..'\U0004FFFD' | '\U00050000'..'\U0005FFFD' | '\U00060000'..'\U0006FFFD' | '\U00070000'..'\U0007FFFD' | '\U00080000'..'\U0008FFFD' | '\U00090000'..'\U0009FFFD' | '\U000A0000'..'\U000AFFFD' | '\U000B0000'..'\U000BFFFD' | '\U000C0000'..'\U000CFFFD' | '\U000D0000'..'\U000DFFFD' | '\U000E1000'..'\U000EFFFD' | '\U000F0000'..'\U000FFFFD' | '\U00100000'..'\U0010FFFD' => true, | |
_ => false | |
} | |
} | |
fn is_ascii_alpha(c: char) -> bool { | |
match c { | |
'a'..'z' | 'A'..'Z' => true, | |
_ => false | |
} | |
} | |
fn is_hex_char(c: char) -> bool { | |
match c { | |
'0'..'9' | 'a'..'f' | 'A'..'F' => true, | |
_ => false | |
} | |
} | |
fn char_to_lower(c: char) -> char { | |
match c { | |
'A'..'Z' => (c as u8 | '\x40' as u8) as char, | |
_ => c | |
} | |
} | |
#[deriving(Eq)] | |
enum EncodeSet { | |
SimpleEncodeSet, | |
DefaultEncodeSet, | |
PasswordEncodeSet, | |
UsernameEncodeSet | |
} | |
fn in_encode_set(c: char, set: EncodeSet) -> bool { | |
if (c < '\x20' || c > '\x7e') { | |
return true; | |
} else if (set == SimpleEncodeSet) { | |
return false; | |
} | |
if (c == '\x20' || c == '"' || c == '#' || c == '<' || c == '>' || c == '?' || c == '`') { | |
return true; | |
} else if (set == DefaultEncodeSet) { | |
return false; | |
} | |
if (c == '\\' || c == '@' || c == '/') { | |
return true; | |
} else if (set == PasswordEncodeSet) { | |
return false; | |
} | |
if (c == ':') { | |
return true; | |
} else if (set == UsernameEncodeSet) { | |
return false; | |
} | |
fail!("Unexpected encode set") | |
} | |
fn percent_encode(c: char) -> ~str { | |
let mut rv = ~"%"; | |
rv.push_str(c.to_str_radix(16)); | |
return rv; | |
} | |
fn utf8_percent_encode(c: char, set: EncodeSet) -> ~str { | |
let mut rv = ~""; | |
if !in_encode_set(c, set) { | |
rv.push_char(c); | |
} else { | |
let mut buf = vec::from_elem(c.len_utf8_bytes(), 0 as u8); | |
c.encode_utf8(buf); | |
for b in buf.iter() { | |
rv.push_char('%'); | |
rv.push_str(b.to_str_radix(16)); | |
} | |
} | |
rv | |
} | |
fn host_parse(input: &str) -> Option<Host> { | |
if input.len() == 0 { | |
return None | |
} | |
if input[0] as char == '[' { | |
if input[input.len() - 1] as char != ']' { | |
return None | |
} | |
return match ipv6_parse(input.slice(1, input.len() - 1)) { | |
Some(x) => Some(IPv6Host(x)), | |
None => None | |
} | |
} else { | |
let decoded = percent_decode(input); | |
} | |
None | |
} | |
impl IPv6Address { | |
fn new() -> IPv6Address { | |
return IPv6Address { | |
data: [0, 0, 0, 0, 0, 0, 0, 0] | |
} | |
} | |
fn set(&mut self, i: uint, x: u16) { | |
self.data[i] = x; | |
} | |
} | |
impl ops::Index<uint, u16> for IPv6Address { | |
fn index(&self, i: &uint) -> u16 { | |
self.data[*i] | |
} | |
} | |
fn ipv6_parse(input: &str) -> Option<IPv6Address> { | |
let mut address = IPv6Address::new(); | |
let mut piece_pointer = 0 as uint; | |
let mut compress_pointer = None; | |
let mut is_ip_v4 = false; | |
let mut iter = SeekableCharIterator::new(input.to_owned()); | |
let first = input[0] as char; | |
if first == ':' { | |
if first != ':' { | |
return None | |
} else { | |
iter.next(); | |
iter.next(); | |
piece_pointer += 1; | |
compress_pointer = Some(piece_pointer); | |
} | |
} | |
loop { | |
let maybe_c = iter.next(); | |
if piece_pointer == 8 { | |
return None; | |
} | |
match maybe_c { | |
Some(c_0) => { | |
let mut c = c_0; | |
if c == ':' { | |
if compress_pointer.is_none() { | |
return None; | |
piece_pointer += 1; | |
compress_pointer = Some(piece_pointer); | |
loop; | |
} | |
} | |
let mut value = 0 as u16; | |
let mut length = 0; | |
while length < 4 { | |
if c.is_digit_radix(16) { | |
break; | |
} | |
value = value * 0x10 + c.to_digit(16).unwrap() as u16; | |
length += 1; | |
let maybe_c = iter.next(); | |
match maybe_c { | |
Some(x) => {c = x}, | |
None => break | |
} | |
} | |
match c { | |
'.' => { | |
if length == 0 { | |
return None; | |
} | |
iter.rewind(length); | |
is_ip_v4 = true; | |
break; | |
}, | |
':' => {}, | |
_ => { | |
return None; | |
}, | |
}; | |
address.set(piece_pointer, value); | |
piece_pointer += 1; | |
} | |
None => {} | |
} | |
} | |
if is_ip_v4 { | |
if piece_pointer > 6 { | |
return None; | |
} | |
let mut dots_seen = 0; | |
for c in iter { | |
let mut value = 0; | |
while c.is_digit_radix(10) { | |
value = value * 10 + c.to_digit(10).unwrap() as u16; | |
} | |
if value > 255 { | |
return None; | |
} | |
if dots_seen < 3 && c != '.' { | |
return None; | |
} else if dots_seen == 3 { | |
return None; | |
} | |
let piece = address[piece_pointer]; | |
address.set(piece_pointer, piece * 0x100 + value); | |
if dots_seen != 1 { | |
piece_pointer += 1; | |
} | |
dots_seen += 1; | |
} | |
if dots_seen < 3 { | |
return None; | |
} | |
} | |
if compress_pointer.is_some() { | |
let mut swaps = piece_pointer - compress_pointer.unwrap(); | |
piece_pointer = 7; | |
while piece_pointer != 0 && swaps != 0 { | |
let swap_pointer = compress_pointer.unwrap() + swaps - 1; | |
let piece = address[piece_pointer]; | |
let swap_piece = address[swap_pointer]; | |
address.set(piece_pointer, swap_piece); | |
address.set(swap_pointer, piece); | |
swaps -= 1; | |
piece_pointer -= 1; | |
} | |
} else { | |
if piece_pointer != 8 { | |
return None; | |
} | |
} | |
return Some(address); | |
} | |
fn percent_decode(input: &str) -> ~str { | |
//XXX not sure that this is multibyte character safe | |
let mut bytes = ~""; | |
let mut iter = SeekableCharIterator::new(input.to_owned()); | |
loop { | |
let maybe_c = iter.next(); | |
match maybe_c { | |
Some(c) => { | |
let out_c = match c { | |
'%' => { | |
let next = iter.peek(1); | |
let second = iter.peek(2); | |
if !(unwrap_bool(is_hex_char, next) && | |
unwrap_bool(is_hex_char, second)) { | |
c | |
} else { | |
let mut decoded = iter.next().expect("Char should not be None").to_digit(16).expect("Char should be a digit"); | |
decoded *= 16; | |
decoded += iter.next().expect("Char should not be None").to_digit(16).expect("Char should be a digit"); | |
char::from_digit(decoded, 16).expect("Decoded should be a character") | |
} | |
}, | |
_ => { | |
c | |
} | |
}; | |
bytes.push_char(out_c); | |
}, | |
None => break | |
} | |
} | |
bytes | |
} | |
fn main() { | |
ParsedURL::parse("http://example.org:8080/foo?bar#baz", None, None, None, None); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment