Skip to content

Instantly share code, notes, and snippets.

@Ozerich
Created November 3, 2010 19:49
Show Gist options
  • Save Ozerich/661595 to your computer and use it in GitHub Desktop.
Save Ozerich/661595 to your computer and use it in GitHub Desktop.
<?php
/* Подключаем личный класс для операций по скачивания страниц: */
require_once PARSERS_BASE_DIR . '/parsers/parserOzerich.php';
function str($text)
{
$result = trim(htmlspecialchars_decode(strip_tags($text)));
$result = str_replace(array("&quot;","&laquo;","&raquo;", "&nbsp;", "\n"), array("\"", "«", "»", " ", ""), $result);
return $result;
}
class ISP_camelot_ru extends ItemsSiteParser_Ozerich
{
protected $shopBaseUrl = 'http://www.camelot.ru'; // Адрес главной страницы сайта
private function parseItem($url)
{
$result = new ParserItem();
$text = $this->httpClient->getUrlText($url);
preg_match("#наименование:(.+?)<br>#sui", $text, $name);
$result->name = str($name[1]);
preg_match("#url\((.+?)\)#sui", $text, $image_name);
$image = new ParserImage();
$image->url = $image_name[1];
$this->httpClient->getUrlBinary($image->url);
$image->path = $this->httpClient->getLastCacheFile();
preg_match("#/((\d+)\.GIF)#sui",$image->url, $image_name);
$image->id = $image_name[2];
$image->fname = $image_name[1];
$image->type = "gif";
$result->images[] = $image;
$result->url = $url;
return $result;
}
private function parseItems($section_url)
{
$result = array();
$page = 1;
while(true)
{
$url = $section_url."&page=$page";
$text = $this->httpClient->getUrlText($url);
preg_match_all('#<a href="(\?id=(\d+))(.*?)>(.+?)</a>#sui', $text, $items, PREG_SET_ORDER);
if(!$items)break;
for($i = 0; $i < count($items); $i+=2)
{
$item = $items[$i];
$url = $this->shopBaseUrl."/catalog/".$item[1];
$res_item = $this->parseItem($url);
$res_item->id = $item[2];
$result[] = $res_item;
}
$page++;
}
return $result;
}
public function loadItems ()
{
$base = array ();
$url = $this->shopBaseUrl."/catalog";
$text = $this->httpClient->getUrlText($url);
$collection = new ParserCollection();
preg_match("#<h3>Коллекция:(.+?)</h3>#sui", $text, $name);
$collection->name = str($name[1]);
$collection->url = $url;
preg_match_all('#<div class="title_2">\s*<span class="title_bg_link" style="padding-left:20px;">(.+?)</span></div>\s*
\s*<ul>\s*
<li><a href="(.+?)">(.+?)</a></li>\s*
<li><a href="(.+?)">(.+?)</a></li>\s*
</ul>#sui', $text, $sections, PREG_SET_ORDER);
foreach($sections as $section)
{
$category = array($section[1]);
$category[1] = $section[3];
$items = $this->parseItems($url."/".$section[2]);
foreach($items as $item)
$collection->items[] = $item;
$category[1] = $section[5];
$items = $this->parseItems($url."/".$section[4]);
foreach($items as $item)
$collection->items[] = $item;
}
$base[] = $collection;
return $this->saveItemsResult ($base);
}
public function loadPhysicalPoints ()
{
$base = array ();
$url = $this->shopBaseUrl."/shop-address";
$text = $this->httpClient->getUrlText($url);
preg_match_all("#<li id=\"n\"><a href='(.+?) id=\"u\">(.+?)</a></li>#sui", $text, $cities, PREG_SET_ORDER);
foreach($cities as $city)
{
$url = $this->shopBaseUrl.$city[1];
$text = $this->httpClient->getUrlText($url);
//$items = array();
preg_match("#<td colspan='3'>\s*<div style='margin-left: 10px;'>(.+?)</div>#sui", $text, $content);
if(!$content)continue;
$content=$content[1];
preg_match_all('#<font face=\\\"Calibri\\\">(.+?)</font>#sui', $content, $items_, PREG_SET_ORDER);
if(!$items_) preg_match_all('#<font face=\"Calibri\">(.+?)</font>#sui', $content, $items_, PREG_SET_ORDER);
if($items_)
foreach($items_ as $item)
$items[]['address'] = str($item[1]);
if(!$items_)
{
preg_match_all("#(.+?)<br />\s*<br />#sui", $content, $temp, PREG_SET_ORDER);
foreach($temp as $item)
{
$item = $item[1];
preg_match("#^(.+?)</p>\s*<p>(.+?)</p>#sui", $item, $item);
$newitem['address'] = str($item[1]);
$newitem['phone'] = str($item[2]);
if(strpos($newitem['phone'], "Тел.:"))
$newitem['phone'] = substr($newitem['phone'], strpos($newitem['phone'], "Тел.:") + strlen("Тел.:"));
$items[] = $newitem;
}
}
if(!$items)
{
$content = substr($content, strlen("<p>"), strlen($content) - strlen("</p></p>"));
preg_match_all("#<p>(.+?)</p>#sui", $content, $temp, PREG_SET_ORDER);
foreach($temp as $item)
{
$item = str(trim($item[1]));
if(strlen($item) < 2)continue;
$items[]['address'] = str($item);
}
}
}
foreach($items as $item)
{
$result_item = new ParserPhysical();
$address = trim($item['address']);
if(isset($item['phone']))
$result_item->phone = trim($item['phone']);
if(substr($address, 0, strlen("Московская обл., ")) == "Московская обл., ")
$address = substr($address, strlen("Московская обл., "));
if(substr($address, 0, strlen("г.")) == "г.")
$address = trim(substr($address, strlen("г.")));
$city = substr($address, 0, strpos($address, " "));
$result_item->address = substr($address, strlen($city));
if($city[strlen($city) - 1] == ",")
$city = substr($city, 0, strlen($city) - 1);
if($city == "Ленинский")
{
$city = "Ленинский р-н";
$result_item->address = substr($result_item->address, 8);
}
$result_item->city = $city;
$base[] = $result_item;
}
return $this->savePhysicalResult ($base);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment