Skip to content

Instantly share code, notes, and snippets.

@inkrement
Last active January 23, 2019 11:16
Show Gist options
  • Save inkrement/76d01ea85098d29fb299cd138f5c3393 to your computer and use it in GitHub Desktop.
Save inkrement/76d01ea85098d29fb299cd138f5c3393 to your computer and use it in GitHub Desktop.
<?php
date_default_timezone_set('UTC');
require 'vendor/autoload.php';
use Monolog\Logger;
use Monolog\Handler\StreamHandler;
use GuzzleHttp\Pool;
use GuzzleHttp\Client;
use GuzzleHttp\Psr7\Request;
use GuzzleHttp\HandlerStack;
use GuzzleHttp\MessageFormatter;
use GuzzleHttp\Middleware;
use PHPHtmlParser\Dom;
use PHPHtmlParser\Exceptions\EmptyCollectionException;
// create a log channel
$log = new Logger('pondus scraper');
$log->pushHandler(new StreamHandler('php://stdout', Logger::DEBUG));
$stack = new HandlerStack();
$formatter = new MessageFormatter();
$stack->setHandler(\GuzzleHttp\choose_handler());
$stack->push(Middleware::log($log, $formatter, 'debug'));
$stack->push(Middleware::httpErrors());
$client = new Client([
'handler' => $stack,
]);
function overviewRequestFactory($page = 1)
{
return new Request('GET', sprintf('https://kat.cr/movies/%d/', $page));
}
function subpageRequestFactory($url)
{
return new Request('GET', 'https://kat.cr'.$url);
}
$requests = [];
// if the upper bound is set to a number > 5 it will work
for ($i = 0;$i < 1;++$i) {
array_push($requests, overviewRequestFactory($i));
}
$iterator = new ArrayIterator($requests);
$pool = new Pool($client, $iterator, [
'concurrency' => 5,
'fulfilled' => function ($response, $index) use ($log, &$iterator) {
//parse HTML response
$dom = new Dom();
$dom->load($response->getBody());
if ($dom->find('title')->innerHTML === 'Download Movie Torrents - Kickass Torrents') {
//this is a overviewResponse
$log->addInfo('received overviewResponse');
try {
$rows = $dom->find('#mainSearchTable')->find('table')->find('tr');
for ($i = 1;$i < count($rows); ++$i) { //skip first row (header)
$row = $rows[$i];
$url = $row->find('td')->find('.markeredBlock')->find('a')->getAttribute('href');
####### ADD NEW REQUEST
$log->addInfo('add subpageRequest');
$iterator->append(subpageRequestFactory($url));
}
} catch (EmptyCollectionException $e) {
//todo: error
}
} else {
//this is a subpageResponse
$log->addInfo('received subpageResponse');
}
},
'rejected' => function ($reason, $index) use ($log) {
//todo: error
},
]);
// Initiate the transfers and create a promise
$promise = $pool->promise();
// Force the pool of requests to complete.
$promise->wait();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment