Skip to content

Instantly share code, notes, and snippets.

@sguzman
Created June 20, 2018 11:44
Show Gist options
  • Save sguzman/c5112e3eaea12256fe4a544d0a488653 to your computer and use it in GitHub Desktop.
Save sguzman/c5112e3eaea12256fe4a544d0a488653 to your computer and use it in GitHub Desktop.
Extracting stuff from a page- may need it in the future
println!("Downloading {}", url);
let html = dom::get_html_from_path(url);
let title = dom::get_single_text(&html, "h1.post-title");
let img = {
let img = dom::get_single(&html, "div.book-cover img[src]", "src");
let img = img.trim_left_matches("https://it-eb.com");
format!("http://23.95.221.108{}", img)
};
let link = {
let id = dom::get_single(&html, "input[name=\"comment_post_ID\"]", "value");
let url = format!("/download.php?id={}", id);
network::request_path(&url)
};
let desc = dom::get_single_text(&html, "div.entry-inner");
let cats = {
let select = dom::selector("p.post-btm-cats a[href]");
let categories = html.select(&select);
let mut cats: Vec<&str> = Vec::new();
for c in categories {
let t = c.text().collect::<Vec<_>>()[0];
cats.push(t);
}
cats
};
let details = {
let key_text = {
let select = dom::selector("div.book-details li span");
let keys = html.select(&select);
let mut vec_keys: Vec<&str> = Vec::new();
for k in keys {
let t = k.text().collect::<Vec<_>>()[0];
vec_keys.push(t);
}
vec_keys
};
let val_text = {
let select = dom::selector("div.book-details li");
let val = html.select(&select);
let mut vec_val: Vec<&str> = Vec::new();
for k in val {
let t = k.text().collect::<Vec<_>>()[1];
vec_val.push(t);
}
vec_val
};
let len = val_text.len();
let mut vec = HashMap::new();
for i in 0..len {
let key = {
let key = key_text[i];
let key = key.trim_right();
let key = key.trim_right_matches(":");
key.to_lowercase()
};
let val = val_text[i];
vec.insert(key, val);
}
json!({
"isbn-10": vec.get("isbn-10"),
"isbn-13": vec.get("isbn-13"),
"format": vec.get("format"),
"authors": vec.get("authors"),
"publication date": vec.get("publication date"),
"publisher": vec.get("publisher"),
"pages": vec.get("pages"),
"size": vec.get("size")
})
};
let value = json!({
"title": title,
"img": img,
"link": link,
"desc": desc,
"categories": cats,
"details": details
});
std::fs::write(full_path, value.to_string()).expect("Could not write");
list.push(value);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment