Skip to content

Instantly share code, notes, and snippets.

@nticaric
Last active April 13, 2016 15:33
Show Gist options
  • Save nticaric/9a8136ed0b7cf9b282dcc18e349d3b8c to your computer and use it in GitHub Desktop.
Save nticaric/9a8136ed0b7cf9b282dcc18e349d3b8c to your computer and use it in GitHub Desktop.
<?php
public function scrapePHPUnitDe()
{
$client = new Client();
$crawler = $client->request('GET', 'https://phpunit.de/manual/current/en/index.html');
$toc = $crawler->filter('.toc');
file_put_contents(base_path('resources/docs/').'index.html', $toc->html());
$crawler->filter('.toc > dt a')->each(function($node) use ($client) {
$href = $node->attr('href');
$this->info("Scraped: " . $href);
$crawler = $client->request('GET', $href);
$chapter = $crawler->filter('.col-md-8 .chapter, .col-md-8 .appendix')->html();
file_put_contents(base_path('resources/docs/').$href, $chapter);
});
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment