Skip to content

Instantly share code, notes, and snippets.

@andreybolonin
Created January 24, 2017 15:43
Show Gist options
  • Save andreybolonin/83fa6298224c6d6534855105e61eab87 to your computer and use it in GitHub Desktop.
Save andreybolonin/83fa6298224c6d6534855105e61eab87 to your computer and use it in GitHub Desktop.
HhUaProvider.php
<?php
namespace AppBundle\Provider;
use AppBundle\Entity\JobTitle;
use AppBundle\Entity\Provider;
use GuzzleHttp\Client;
use Symfony\Component\DomCrawler\Crawler;
use Symfony\Component\HttpFoundation\Response;
class HHUaProvider extends AbstractProvider
{
/**
* @var string
*/
public $url = 'https://hh.ua/catalog/Informacionnye-tehnologii-Internet-Telekom';
/**
* {@inheritdoc}
*/
public function parse()
{
$client = new Client();
$i = 0;
$array = [];
for ($count = 0; $count < 100000; $count++) {
//var_dump($client->request('GET', $this->url . '/page-' . $count)->getStatusCode());
if ($client->request('GET', $this->url . '/page-' . $count)->getStatusCode() == Response::HTTP_NOT_FOUND) {
var_dump('break');
break;
}
$response = $client->request('GET', $this->url . '/page-' . $count);
$crawler = new Crawler($response->getBody()->getContents());
$vacations = $crawler->filter('div.search-result-item__head')->children()->first();
foreach ($vacations as $el) {
// set all params nullable
$salary = null;
$companyName = null;
$location = null;
$vacation_url = $el->getAttribute('href');
//var_dump($vacation_url);
// check for existing vacation
if ($this->em->getRepository(JobTitle::class)->findByUrl($vacation_url)) {
var_dump(123);
continue;
}
$vacation_response = $client->request('GET', $vacation_url);
$vacation_crawler = new Crawler($vacation_response->getBody()->getContents());
$jobName = $vacation_crawler->filter('h1')->text();
$subject = $vacation_crawler->filter('div.l-paddings')->getNode(3)->textContent;
//var_dump((str_replace(' ', '', $subject)));
var_dump($subject);
if ($subject == ' з/п не указана') {
$salary = null;
} else {
$salary = $subject;
}
// if (ctype_alnum((str_replace(' ', '', $subject)))) {
// $salary = $vacation_crawler->filter('div.l-paddings')->getNode(3)->textContent;
// }
$location = $vacation_crawler->filter('div.l-paddings')->getNode(4)->textContent;
//
// $c=0;
// var_dump($otherData);
// foreach ($otherData as $info) {
// $c++;
// if ($c++ == 3) {
// $salary = $info->textContent;
// var_dump($salary);
// }
// if ($c == 4) {
// $location = $info->textContent;
// //var_dump($location);
// break;
// }
// }
$companyName = $vacation_crawler->filter('div.companyname')->text();
$array[$i]['company'] = $companyName;
$array[$i]['jobtitle'] = $jobName;
$array[$i]['location'] = $location;
$array[$i]['url'] = $vacation_url;
$array[$i]['salary'] = $salary;
$array[$i]['provider'] = Provider::HH_UA_PROVIDER;
var_dump($array[$i]['salary']);
$i++;
}
}
$this->import($array);
return $this;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment