Last active
January 23, 2019 11:16
-
-
Save inkrement/76d01ea85098d29fb299cd138f5c3393 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
date_default_timezone_set('UTC'); | |
require 'vendor/autoload.php'; | |
use Monolog\Logger; | |
use Monolog\Handler\StreamHandler; | |
use GuzzleHttp\Pool; | |
use GuzzleHttp\Client; | |
use GuzzleHttp\Psr7\Request; | |
use GuzzleHttp\HandlerStack; | |
use GuzzleHttp\MessageFormatter; | |
use GuzzleHttp\Middleware; | |
use PHPHtmlParser\Dom; | |
use PHPHtmlParser\Exceptions\EmptyCollectionException; | |
// create a log channel | |
$log = new Logger('pondus scraper'); | |
$log->pushHandler(new StreamHandler('php://stdout', Logger::DEBUG)); | |
$stack = new HandlerStack(); | |
$formatter = new MessageFormatter(); | |
$stack->setHandler(\GuzzleHttp\choose_handler()); | |
$stack->push(Middleware::log($log, $formatter, 'debug')); | |
$stack->push(Middleware::httpErrors()); | |
$client = new Client([ | |
'handler' => $stack, | |
]); | |
function overviewRequestFactory($page = 1) | |
{ | |
return new Request('GET', sprintf('https://kat.cr/movies/%d/', $page)); | |
} | |
function subpageRequestFactory($url) | |
{ | |
return new Request('GET', 'https://kat.cr'.$url); | |
} | |
$requests = []; | |
// if the upper bound is set to a number > 5 it will work | |
for ($i = 0;$i < 1;++$i) { | |
array_push($requests, overviewRequestFactory($i)); | |
} | |
$iterator = new ArrayIterator($requests); | |
$pool = new Pool($client, $iterator, [ | |
'concurrency' => 5, | |
'fulfilled' => function ($response, $index) use ($log, &$iterator) { | |
//parse HTML response | |
$dom = new Dom(); | |
$dom->load($response->getBody()); | |
if ($dom->find('title')->innerHTML === 'Download Movie Torrents - Kickass Torrents') { | |
//this is a overviewResponse | |
$log->addInfo('received overviewResponse'); | |
try { | |
$rows = $dom->find('#mainSearchTable')->find('table')->find('tr'); | |
for ($i = 1;$i < count($rows); ++$i) { //skip first row (header) | |
$row = $rows[$i]; | |
$url = $row->find('td')->find('.markeredBlock')->find('a')->getAttribute('href'); | |
####### ADD NEW REQUEST | |
$log->addInfo('add subpageRequest'); | |
$iterator->append(subpageRequestFactory($url)); | |
} | |
} catch (EmptyCollectionException $e) { | |
//todo: error | |
} | |
} else { | |
//this is a subpageResponse | |
$log->addInfo('received subpageResponse'); | |
} | |
}, | |
'rejected' => function ($reason, $index) use ($log) { | |
//todo: error | |
}, | |
]); | |
// Initiate the transfers and create a promise | |
$promise = $pool->promise(); | |
// Force the pool of requests to complete. | |
$promise->wait(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment