Last active
May 26, 2021 21:43
-
-
Save izdrail/a82c9ff671e3e44689b8f331342dfeda to your computer and use it in GitHub Desktop.
Guzzle Parallel Extract
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require __DIR__ . '/vendor/autoload.php'; | |
use GuzzleHttp\Client as GuzzleClient; | |
use GuzzleHttp\Promise as GuzzlePromise; | |
$client = new GuzzleClient(['timeout' => 12.0]); // see how i set a timeout | |
$requestPromises = []; | |
$sitesArray = SiteEntity->getAll(); // returns an array with objects that contain a domain | |
foreach ($sitesArray as $site) { | |
$requestPromises[$site->getDomain()] = $client->getAsync('http://' . $site->getDomain()); | |
} | |
$results = GuzzlePromise\settle($requestPromises)->wait(); | |
foreach ($results as $domain => $result) { | |
$site = $sitesArray[$domain]; | |
$this->logger->info('Crawler FetchHomePages: domain check ' . $domain); | |
if ($result['state'] === 'fulfilled') { | |
$response = $result['value']; | |
if ($response->getStatusCode() == 200) { | |
$site->setHtml($response->getBody()); | |
} else { | |
$site->setHtml($response->getStatusCode()); | |
} | |
} else if ($result['state'] === 'rejected') { | |
// notice that if call fails guzzle returns is as state rejected with a reason. | |
$site->setHtml('ERR: ' . $result['reason']); | |
} else { | |
$site->setHtml('ERR: unknown exception '); | |
$this->logger->err('Crawler FetchHomePages: unknown fetch fail domain: ' . $domain); | |
} | |
$this->entityManager->persist($site); // this is a call to Doctrines entity manager | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment