Skip to content

Instantly share code, notes, and snippets.

@ermst4r
Created October 5, 2016 15:49
Show Gist options
  • Select an option

  • Save ermst4r/a807a92bd820d2bc89f46c03e6d0b21a to your computer and use it in GitHub Desktop.

Select an option

Save ermst4r/a807a92bd820d2bc89f46c03e6d0b21a to your computer and use it in GitHub Desktop.
zalando spider
<?php
/**
* Created by PhpStorm.
* User: erm
* Date: 24-04-15
* Time: 14:03
*/
namespace Cronjob\Spider;
use \Zend\Dom\Query;
use Application\Model\Image;
use Application\Model\Remotefile;
use Zend\Http\Client as HttpClient;
class zalando
{
/**
* @param $rawUrl
* @param $config
* @param $primId
* @return bool|int
*/
public static function parseData($rawUrl, $config, $primId)
{
$counter = 0;
try {
$client = new HttpClient();
$client->setAdapter('Zend\Http\Client\Adapter\Curl');
$client->setUri($rawUrl);
$result = $client->send();
$body = $result->getBody();
$dom = new Query($body);
foreach ($dom->execute('.articleMedia img') as $values) {
$counter++;
$imageUrl = $values->getAttribute('src');
Remotefile::downloadExtraImages($imageUrl, $primId, $config, $primId . "_" . $counter);
}
return $counter;
} catch (\Exception $e) {
echo "skip\n";
return false;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment