Created
November 3, 2010 19:49
-
-
Save Ozerich/661595 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| <?php | |
| /* Подключаем личный класс для операций по скачивания страниц: */ | |
| require_once PARSERS_BASE_DIR . '/parsers/parserOzerich.php'; | |
| function str($text) | |
| { | |
| $result = trim(htmlspecialchars_decode(strip_tags($text))); | |
| $result = str_replace(array(""","«","»", " ", "\n"), array("\"", "«", "»", " ", ""), $result); | |
| return $result; | |
| } | |
| class ISP_camelot_ru extends ItemsSiteParser_Ozerich | |
| { | |
| protected $shopBaseUrl = 'http://www.camelot.ru'; // Адрес главной страницы сайта | |
| private function parseItem($url) | |
| { | |
| $result = new ParserItem(); | |
| $text = $this->httpClient->getUrlText($url); | |
| preg_match("#наименование:(.+?)<br>#sui", $text, $name); | |
| $result->name = str($name[1]); | |
| preg_match("#url\((.+?)\)#sui", $text, $image_name); | |
| $image = new ParserImage(); | |
| $image->url = $image_name[1]; | |
| $this->httpClient->getUrlBinary($image->url); | |
| $image->path = $this->httpClient->getLastCacheFile(); | |
| preg_match("#/((\d+)\.GIF)#sui",$image->url, $image_name); | |
| $image->id = $image_name[2]; | |
| $image->fname = $image_name[1]; | |
| $image->type = "gif"; | |
| $result->images[] = $image; | |
| $result->url = $url; | |
| return $result; | |
| } | |
| private function parseItems($section_url) | |
| { | |
| $result = array(); | |
| $page = 1; | |
| while(true) | |
| { | |
| $url = $section_url."&page=$page"; | |
| $text = $this->httpClient->getUrlText($url); | |
| preg_match_all('#<a href="(\?id=(\d+))(.*?)>(.+?)</a>#sui', $text, $items, PREG_SET_ORDER); | |
| if(!$items)break; | |
| for($i = 0; $i < count($items); $i+=2) | |
| { | |
| $item = $items[$i]; | |
| $url = $this->shopBaseUrl."/catalog/".$item[1]; | |
| $res_item = $this->parseItem($url); | |
| $res_item->id = $item[2]; | |
| $result[] = $res_item; | |
| } | |
| $page++; | |
| } | |
| return $result; | |
| } | |
| public function loadItems () | |
| { | |
| $base = array (); | |
| $url = $this->shopBaseUrl."/catalog"; | |
| $text = $this->httpClient->getUrlText($url); | |
| $collection = new ParserCollection(); | |
| preg_match("#<h3>Коллекция:(.+?)</h3>#sui", $text, $name); | |
| $collection->name = str($name[1]); | |
| $collection->url = $url; | |
| preg_match_all('#<div class="title_2">\s*<span class="title_bg_link" style="padding-left:20px;">(.+?)</span></div>\s* | |
| \s*<ul>\s* | |
| <li><a href="(.+?)">(.+?)</a></li>\s* | |
| <li><a href="(.+?)">(.+?)</a></li>\s* | |
| </ul>#sui', $text, $sections, PREG_SET_ORDER); | |
| foreach($sections as $section) | |
| { | |
| $category = array($section[1]); | |
| $category[1] = $section[3]; | |
| $items = $this->parseItems($url."/".$section[2]); | |
| foreach($items as $item) | |
| $collection->items[] = $item; | |
| $category[1] = $section[5]; | |
| $items = $this->parseItems($url."/".$section[4]); | |
| foreach($items as $item) | |
| $collection->items[] = $item; | |
| } | |
| $base[] = $collection; | |
| return $this->saveItemsResult ($base); | |
| } | |
| public function loadPhysicalPoints () | |
| { | |
| $base = array (); | |
| $url = $this->shopBaseUrl."/shop-address"; | |
| $text = $this->httpClient->getUrlText($url); | |
| preg_match_all("#<li id=\"n\"><a href='(.+?) id=\"u\">(.+?)</a></li>#sui", $text, $cities, PREG_SET_ORDER); | |
| foreach($cities as $city) | |
| { | |
| $url = $this->shopBaseUrl.$city[1]; | |
| $text = $this->httpClient->getUrlText($url); | |
| //$items = array(); | |
| preg_match("#<td colspan='3'>\s*<div style='margin-left: 10px;'>(.+?)</div>#sui", $text, $content); | |
| if(!$content)continue; | |
| $content=$content[1]; | |
| preg_match_all('#<font face=\\\"Calibri\\\">(.+?)</font>#sui', $content, $items_, PREG_SET_ORDER); | |
| if(!$items_) preg_match_all('#<font face=\"Calibri\">(.+?)</font>#sui', $content, $items_, PREG_SET_ORDER); | |
| if($items_) | |
| foreach($items_ as $item) | |
| $items[]['address'] = str($item[1]); | |
| if(!$items_) | |
| { | |
| preg_match_all("#(.+?)<br />\s*<br />#sui", $content, $temp, PREG_SET_ORDER); | |
| foreach($temp as $item) | |
| { | |
| $item = $item[1]; | |
| preg_match("#^(.+?)</p>\s*<p>(.+?)</p>#sui", $item, $item); | |
| $newitem['address'] = str($item[1]); | |
| $newitem['phone'] = str($item[2]); | |
| if(strpos($newitem['phone'], "Тел.:")) | |
| $newitem['phone'] = substr($newitem['phone'], strpos($newitem['phone'], "Тел.:") + strlen("Тел.:")); | |
| $items[] = $newitem; | |
| } | |
| } | |
| if(!$items) | |
| { | |
| $content = substr($content, strlen("<p>"), strlen($content) - strlen("</p></p>")); | |
| preg_match_all("#<p>(.+?)</p>#sui", $content, $temp, PREG_SET_ORDER); | |
| foreach($temp as $item) | |
| { | |
| $item = str(trim($item[1])); | |
| if(strlen($item) < 2)continue; | |
| $items[]['address'] = str($item); | |
| } | |
| } | |
| } | |
| foreach($items as $item) | |
| { | |
| $result_item = new ParserPhysical(); | |
| $address = trim($item['address']); | |
| if(isset($item['phone'])) | |
| $result_item->phone = trim($item['phone']); | |
| if(substr($address, 0, strlen("Московская обл., ")) == "Московская обл., ") | |
| $address = substr($address, strlen("Московская обл., ")); | |
| if(substr($address, 0, strlen("г.")) == "г.") | |
| $address = trim(substr($address, strlen("г."))); | |
| $city = substr($address, 0, strpos($address, " ")); | |
| $result_item->address = substr($address, strlen($city)); | |
| if($city[strlen($city) - 1] == ",") | |
| $city = substr($city, 0, strlen($city) - 1); | |
| if($city == "Ленинский") | |
| { | |
| $city = "Ленинский р-н"; | |
| $result_item->address = substr($result_item->address, 8); | |
| } | |
| $result_item->city = $city; | |
| $base[] = $result_item; | |
| } | |
| return $this->savePhysicalResult ($base); | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment