Skip to content

Instantly share code, notes, and snippets.

@oldtune
Created June 15, 2023 08:29
Show Gist options
  • Save oldtune/d3ea8c3015ac66655f3182caf3e4b4da to your computer and use it in GitHub Desktop.
Save oldtune/d3ea8c3015ac66655f3182caf3e4b4da to your computer and use it in GitHub Desktop.
crawler main.rs 3
async fn main() -> anyhow::Result<()> {
let http_client = build_http_client()?;
//we are searching for the word "access" here.
let html = http_client
.get("http://tratu.coviet.vn/hoc-tieng-anh/tu-dien/lac-viet/A-V/access.html")
.send()
.await?
.text_with_charset("utf-8")
.await?;
let dom = get_dom(&html)?;
let pronun = extract_pronunciation(&dom, "div.p5l.fl.cB");
println!("{:?}", pronun);
Ok(())
}
fn get_dom(html: &str) -> anyhow::Result<VDom> {
let dom = tl::parse(&html, ParserOptions::default())?;
Ok(dom)
}
fn extract_pronunciation(dom: &VDom, query_selector: &str) -> Option<String> {
let query_result = dom.query_selector(query_selector);
if query_result.is_none() {
return None;
}
for node_handle in query_result.unwrap() {
if let Some(node) = node_handle.get(dom.parser()) {
if let Some(tag) = node.as_tag() {
return Some(tag.inner_html(dom.parser()));
}
}
}
return None;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment