Last active
February 2, 2025 07:11
-
-
Save thomcc/e24c14639cffc5eb00de60f790d75901 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #[derive(Clone, Debug)] | |
| pub struct XmlNode { | |
| pub tag: String, | |
| pub attributes: Vec<(String, String)>, | |
| pub children: Vec<XmlNode>, | |
| pub body: String, | |
| } | |
| impl XmlNode { | |
| pub fn new(tag: String) -> XmlNode { | |
| XmlNode { | |
| tag: tag, | |
| attributes: Vec::new(), | |
| children: Vec::new(), | |
| body: String::new(), | |
| } | |
| } | |
| pub fn child<'a>(&'a self, child: &str) -> Option<&'a XmlNode> { | |
| self.children.iter().find(|c| c.tag == child) | |
| } | |
| pub fn attr<'a>(&'a self, attr: &str) -> Option<&'a str> { | |
| self.attributes | |
| .iter() | |
| .find(|a| a.0 == attr) | |
| .map(|a| a.1.as_str()) | |
| } | |
| pub fn set_attr(&mut self, attr: String, value: String) { | |
| if let Some(cur) = self.attributes.iter_mut().find(|a| a.0 == attr) { | |
| cur.1 = value; | |
| return; | |
| } | |
| self.attributes.push((String::from(attr), value)); | |
| } | |
| pub fn child_or_attr<'a>(&'a self, name: &str) -> Option<&'a str> { | |
| self.attr(name) | |
| .or_else(|| self.child(name).map(|v| &*v.body)) | |
| } | |
| } | |
| static WHITESPACE: &'static [char] = &[' ', '\t', '\n', '\r']; | |
| static DELIM: &'static [char] = &['<', '>', '!', '?', '=', '/']; | |
| #[inline] | |
| fn is_one_of(e: char, cs: &[char]) -> bool { | |
| cs.iter().any(|&c| c == e) | |
| } | |
| struct XmlParser<'a> { | |
| s: core::iter::Peekable<core::iter::Enumerate<core::str::Chars<'a>>>, | |
| line: usize, | |
| } | |
| impl<'a> XmlParser<'a> { | |
| fn parse(&mut self) -> Result<XmlNode, String> { | |
| let mut token = self.next_token(); | |
| while token != "<" || is_one_of(self.s.peek().map_or(' ', |&v| v.1), &['!', '?']) { | |
| token = self.next_token(); | |
| } | |
| let mut elem = XmlNode::new(self.next_token()); | |
| while self.s.peek().is_some() && { | |
| token = self.next_token(); | |
| token != ">" && token != "/" | |
| } { | |
| let attr = token; | |
| token = self.next_token(); | |
| if token != "=" { | |
| return Err(format!( | |
| "Expected '=' between attr and value in {} node on line {} (saw {}).", | |
| elem.tag, self.line, token | |
| )); | |
| } | |
| let value = self.next_token(); | |
| elem.set_attr(attr, value); | |
| } | |
| if token == "/" { | |
| token = self.next_token(); | |
| if token != ">" { | |
| return Err(format!( | |
| "Expected '>' after '/' in {} node on line {}.", | |
| elem.tag, self.line | |
| )); | |
| } | |
| return Ok(elem); | |
| } | |
| if token != ">" { | |
| return Err(format!( | |
| "Expected '>' to close {} node on line {}.", | |
| elem.tag, self.line | |
| )); | |
| } | |
| self.eat_whitespace(); | |
| let mut l = self.line; | |
| let mut t = self.s.clone(); | |
| token = self.next_token(); | |
| while token != "<" || self.s.peek().map_or(' ', |v| v.1) != '/' { | |
| if token == "<" { | |
| self.s = t.clone(); | |
| self.line = l; | |
| elem.children.push(match self.parse() { | |
| Ok(node) => node, | |
| Err(s) => return Err(s), | |
| }); | |
| self.eat_whitespace(); | |
| } else { | |
| let end_idx = self.s.peek().map(|v| v.0).unwrap_or(0); | |
| while let Some(&(i, _)) = t.peek() { | |
| if i == end_idx { | |
| break; | |
| } | |
| elem.body.push(t.next().unwrap().1); | |
| } | |
| while let Some(&(_, c)) = self.s.peek() { | |
| if c == '<' { | |
| break; | |
| } | |
| elem.body.push(self.s.next().unwrap().1); | |
| } | |
| if self.s.peek().is_none() { | |
| return Err(format!( | |
| "Unclosed {} element body on line {}", | |
| elem.tag, self.line | |
| )); | |
| } | |
| } | |
| t = self.s.clone(); | |
| l = self.line; | |
| token = self.next_token(); | |
| } | |
| if token != "<" { | |
| Err(format!( | |
| "Unclosed {} element body on line {}", | |
| elem.tag, self.line | |
| )) | |
| } else if { | |
| token = self.next_token(); | |
| token != "/" | |
| } { | |
| Err(format!( | |
| "Expected '/' in closing tag of {} on line {}", | |
| elem.tag, self.line | |
| )) | |
| } else if { | |
| token = self.next_token(); | |
| token != elem.tag | |
| } { | |
| Err(format!( | |
| "Saw closing tag for {} on line {} when it should have been for {}", | |
| token, self.line, elem.tag | |
| )) | |
| } else if { | |
| token = self.next_token(); | |
| token != ">" | |
| } { | |
| Err(format!( | |
| "Expected '>' in closing tag of {} on line {}", | |
| elem.tag, self.line | |
| )) | |
| } else { | |
| let s = elem.body.clone(); | |
| elem.body = s.trim().to_string(); | |
| Ok(elem) | |
| } | |
| } | |
| fn eat_whitespace(&mut self) { | |
| while let Some(&(_, c)) = self.s.peek() { | |
| if !is_one_of(c, WHITESPACE) { | |
| break; | |
| } | |
| if c == '\n' { | |
| self.line += 1; | |
| } | |
| self.s.next().unwrap(); | |
| } | |
| } | |
| fn next_token(&mut self) -> String { | |
| let mut token = String::new(); | |
| while let Some(&(_, c)) = self.s.peek() { | |
| if !is_one_of(c, WHITESPACE) { | |
| break; | |
| } | |
| if c == '\n' { | |
| self.line += 1; | |
| } | |
| self.s.next().unwrap(); | |
| } | |
| let ch = match self.s.next() { | |
| Some((_, c)) => c, | |
| None => return token, | |
| }; | |
| if ch == '"' { | |
| while let Some((_, c)) = self.s.next() { | |
| if c == '"' { | |
| break; | |
| } | |
| if c == '\n' { | |
| self.line += 1; | |
| } | |
| token.push(c); | |
| } | |
| } else { | |
| token.push(ch); | |
| if !is_one_of(ch, DELIM) { | |
| while let Some(&(_, c)) = self.s.peek() { | |
| if is_one_of(c, DELIM) || is_one_of(c, WHITESPACE) { | |
| break; | |
| } | |
| token.push(self.s.next().unwrap().1); | |
| } | |
| self.eat_whitespace(); | |
| } | |
| } | |
| token | |
| } | |
| } | |
| pub fn xml_parse(s: &str) -> Result<XmlNode, String> { | |
| let mut parser = XmlParser { | |
| line: 0, | |
| s: s.chars().enumerate().peekable(), | |
| }; | |
| parser.parse() | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment