Created
October 5, 2016 15:49
-
-
Save ermst4r/a807a92bd820d2bc89f46c03e6d0b21a to your computer and use it in GitHub Desktop.
zalando spider
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| <?php | |
| /** | |
| * Created by PhpStorm. | |
| * User: erm | |
| * Date: 24-04-15 | |
| * Time: 14:03 | |
| */ | |
| namespace Cronjob\Spider; | |
| use \Zend\Dom\Query; | |
| use Application\Model\Image; | |
| use Application\Model\Remotefile; | |
| use Zend\Http\Client as HttpClient; | |
| class zalando | |
| { | |
| /** | |
| * @param $rawUrl | |
| * @param $config | |
| * @param $primId | |
| * @return bool|int | |
| */ | |
| public static function parseData($rawUrl, $config, $primId) | |
| { | |
| $counter = 0; | |
| try { | |
| $client = new HttpClient(); | |
| $client->setAdapter('Zend\Http\Client\Adapter\Curl'); | |
| $client->setUri($rawUrl); | |
| $result = $client->send(); | |
| $body = $result->getBody(); | |
| $dom = new Query($body); | |
| foreach ($dom->execute('.articleMedia img') as $values) { | |
| $counter++; | |
| $imageUrl = $values->getAttribute('src'); | |
| Remotefile::downloadExtraImages($imageUrl, $primId, $config, $primId . "_" . $counter); | |
| } | |
| return $counter; | |
| } catch (\Exception $e) { | |
| echo "skip\n"; | |
| return false; | |
| } | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment