Last active
August 12, 2021 07:46
-
-
Save vbkaisetsu/2aa73ebec99cb0b71f17097571af0acb to your computer and use it in GitHub Desktop.
RustでHTMLをパースし,編集し,書き出す (html5ever)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use std::cell::RefCell; | |
use std::rc::Rc; | |
use html5ever::{Attribute, LocalName, QualName}; | |
use html5ever::driver::ParseOpts; | |
use html5ever::{local_name, ns, namespace_url}; | |
use html5ever::{parse_document, parse_fragment}; | |
use html5ever::rcdom::{Handle, Node, NodeData, RcDom}; | |
use html5ever::serialize; | |
use html5ever::serialize::SerializeOpts; | |
use html5ever::tendril::{TendrilSink, StrTendril}; | |
fn search_iter(node: &Handle) { | |
match node.data { | |
NodeData::Text { ref contents } => { | |
println!("{}", contents.borrow().to_string() ); | |
}, | |
NodeData::Element { ref name, ref attrs, .. } => { | |
}, | |
NodeData::Document { .. } => { | |
}, | |
NodeData::Doctype { .. } => { | |
}, | |
NodeData::Comment { .. } => { | |
}, | |
NodeData::ProcessingInstruction { .. } => { | |
}, | |
}; | |
for child in node.children.borrow().iter() { | |
search_iter(&child); | |
} | |
} | |
fn create_text_node(s: &str) -> Rc<Node> { | |
Node::new(NodeData::Text { | |
contents: RefCell::new(s.into()), | |
}) | |
} | |
fn create_element_node( | |
name: QualName, | |
attrs: Vec<Attribute>) -> Rc<Node> { | |
Node::new(NodeData::Element { | |
name: name, | |
attrs: RefCell::new(attrs), | |
template_contents: None, | |
mathml_annotation_xml_integration_point: false, | |
}) | |
} | |
fn create_tag_name(name: &str) -> QualName { | |
QualName::new(None, ns!(html), LocalName::from(name)) | |
} | |
fn create_attribute(name: &str, value: &str) -> Attribute { | |
Attribute { | |
name: QualName::new(None, ns!(), LocalName::from(name)), | |
value: StrTendril::from(value), | |
} | |
} | |
fn main() { | |
let html_data = "これは<span>テスト</span>です。"; | |
// let parser = parse_document(RcDom::default(), ParseOpts::default()); | |
// let dom = parser.one(html_data); | |
let parser = parse_fragment( | |
RcDom::default(), | |
ParseOpts::default(), | |
QualName::new(None, ns!(html), local_name!("body")), | |
vec![], | |
); | |
let dom = parser.one(html_data); | |
search_iter(&dom.document.children.borrow()[0]); | |
let html_node = &dom.document.children.borrow()[0]; | |
let span_node = &html_node.children.borrow()[1]; | |
let a_node = create_element_node(create_tag_name("a"), vec![]); | |
a_node.children.borrow_mut().push(create_text_node("アンカー")); | |
span_node.children.borrow_mut()[0] = a_node; | |
if let NodeData::Element { ref attrs, .. } = span_node.data { | |
attrs.borrow_mut().push(create_attribute("style", "color: #ff0000")); | |
} | |
let mut bytes = vec![]; | |
// serialize(&mut bytes, &dom.document, SerializeOpts::default()).unwrap(); | |
serialize(&mut bytes, &dom.document.children.borrow()[0], SerializeOpts::default()).unwrap(); | |
println!("{}", String::from_utf8(bytes).unwrap()); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment