Skip to content

Instantly share code, notes, and snippets.

@liuxd
Last active September 7, 2017 01:26
Show Gist options
  • Save liuxd/214809a542d730e634b6672a8dc4e98e to your computer and use it in GitHub Desktop.
Save liuxd/214809a542d730e634b6672a8dc4e98e to your computer and use it in GitHub Desktop.
[Gif Spider] Fetch the Gif images on the page of http://www.ik123.com/q/tuku/gaoxiao/.
{
"require": {
"bupt1987/html-parser": "dev-master"
}
}
<?php
require __DIR__ . 'vendor/autoload.php';
define('IMG_FOLDER', '/tmp/img/');
function show($msg)
{
echo $msg, PHP_EOL;
}
function fetch($url, $reffer)
{
$header = [
'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36',
'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Referer: ' . $reffer,
'Accept-Encoding: gzip, deflate, sdch',
'Accept-Language: zh-CN,en-US;q=0.8,en;q=0.6',
];
$ch = curl_init();
curl_setopt ($ch, CURLOPT_URL, $url);
curl_setopt($ch,CURLOPT_HTTPHEADER, $header);
curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1);
$file_contents = curl_exec($ch);
curl_close($ch);
return $file_contents;
}
function getDom($url)
{
$html = file_get_contents($url);
$html_dom = new \HtmlParser\ParserDom($html);
return $html_dom;
}
function download($url)
{
$html_dom = getDom($url);
$arr = $html_dom->find('img');
foreach ($arr as $img){
$img_url = $img->getAttr('src');
show($img_url);
$tmp = explode('/', $img_url);
$filename = IMG_FOLDER . end($tmp);
if (file_exists($filename)) {
show('Existed!');
continue;
}
$data = fetch($img_url, $url);
file_put_contents($filename, $data);
}
}
function main()
{
foreach (range(1, 86) as $i) {
$url_pool = [];
$url = "http://www.ik123.com/q/tuku/gaoxiao/list_14_{$i}.html";
$dom = getDom($url);
$arr = $dom->find('.img a');
foreach ($arr as $e) {
$url = $e->getAttr('href');
$url_pool[] = $url;
}
$url_pool = array_unique($url_pool);
foreach ($url_pool as $url) {
download($url);
}
}
}
main();
# eof
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment