Skip to content

Instantly share code, notes, and snippets.

@ringmaster
Last active December 17, 2015 03:39
Show Gist options
  • Save ringmaster/5545240 to your computer and use it in GitHub Desktop.
Save ringmaster/5545240 to your computer and use it in GitHub Desktop.
function test_scrape_allconsuming() {
$out = array();
for($x = 1;$x <= 43; $x++) {
$p = file_get_contents('http://www.allconsuming.net/person/mikelietz/consumed/book?page=' . $x);
$h = \Habari\HTMLDoc::create($p);
foreach($h->find('strong a') as $node) {
if(preg_match('#^/item/view/#', $node->href, $matches)) {
$p = file_get_contents('http://www.allconsuming.net' . $node->href);
$h2 = \Habari\HTMLDoc::create($p);
$as = $h2->find('a');
foreach($as as $a) {
if(preg_match('#/o/asin/([^/]+)/#', $a->href, $matches)) {
$title = $h2->find_one('head title');
$out[$matches[1]] = preg_replace('#on All Consuming$#', '', $title->inner_html());
}
}
}
}
}
$this->output($out);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment