Skip to content

Instantly share code, notes, and snippets.

@tarlepp
Last active September 26, 2020 16:47
Show Gist options
  • Select an option

  • Save tarlepp/0f1e3e46fc24273d1825ff9244bacd94 to your computer and use it in GitHub Desktop.

Select an option

Save tarlepp/0f1e3e46fc24273d1825ff9244bacd94 to your computer and use it in GitHub Desktop.
<?php
namespace App\Crawler;
use Symfony\Component\DomCrawler\Crawler;
use Symfony\Component\Panther\Client;
class DiaEsCrawler
{
public function foo()
{
$client = Client::createChromeClient();
$client->request('GET', 'https://www.dia.es/compra-online/');
$crawler = $client->waitFor('#nav-submenu-container');
$output = [];
$exclude = ['Novedades', 'Ver marcas', 'Soy solidario', 'Ofertas'];
$crawler
->filter('#nav-submenu-container > li')
->each($this->getClosure($output, $exclude));
$client->quit();
}
/**
* @param array $output
* @param array $exclude
* @return \Closure
*/
private function getClosure(array $output, array $exclude): \Closure
{
return function (Crawler $node, $i) use (&$output, $exclude) {
if ($node->filter('a')->count() > 0) {
$nodeLink = $node->filter('a');
$text = trim(strip_tags($nodeLink->html()));
if (!in_array($text, $exclude)) {
$output[$i] = [
'text' => $text,
'link' => $nodeLink->attr('href')
];
if ($node->children('ul')->count() > 0) {
$node
->children('ul > li')
->each($this->getClosure2($output, $exclude, $i));
}
}
}
};
}
/**
* @param array $output
* @param array $exclude
* @param $i
* @return \Closure
*/
private function getClosure2(array $output, array $exclude, $i): \Closure
{
return function (Crawler $node, $x) use (&$output, $exclude, $i) {
$nodeLink = $node->filter('a');
$text = trim(strip_tags($nodeLink->html()));
if (!in_array($text, $exclude)) {
$output[$i]['child'][$x] = [
'text' => $text,
'link' => $nodeLink->attr('href')
];
if ($node->children('ul')->count() > 0) {
$node
->children('ul > li')
->each($this->getClosure3($output, $exclude, $i, $x));
}
}
};
}
/**
* @param array $output
* @param array $exclude
* @param $i
* @param $x
* @return \Closure
*/
private function getClosure3(array $output, array $exclude, $i, $x): \Closure
{
return static function (Crawler $node) use (&$output, $exclude, $i, $x) {
$nodeLink = $node->filter('a');
$text = trim(strip_tags($nodeLink->html()));
if (!in_array($text, $exclude)) {
$output[$i]['child'][$x]['child'][] = [
'text' => $text,
'link' => $nodeLink->attr('href')
];
}
};
}
}
<?php
declare(strict_types = 1);
namespace App\Controller;
use App\Crawler\DiaEsCrawler;
use Symfony\Component\HttpFoundation\JsonResponse;
use Symfony\Component\Routing\Annotation\Route;
class IndexController
{
/**
* @Route(
* path="/",
* name="index",
* )
*/
public function __invoke(DiaEsCrawler $diaEsCrawler): JsonResponse
{
return new JsonResponse($diaEsCrawler->foo());
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment