Skip to content

Instantly share code, notes, and snippets.

@Ozerich
Created December 10, 2010 17:05
Show Gist options
  • Select an option

  • Save Ozerich/736473 to your computer and use it in GitHub Desktop.

Select an option

Save Ozerich/736473 to your computer and use it in GitHub Desktop.
<?php
require_once PARSERS_BASE_DIR . '/parsers/baseClasses/ozerich.php';
class ISP_budumamoy_ru extends ItemsSiteParser_Ozerich
{
protected $shopBaseUrl = "http://www.budumamoy.ru/";
public function loadItems ()
{
$base = array ();
$collections = array("clothes", "underwear", "products");
foreach($collections as $collection_name)
{
$collection = new ParserCollection();
$collection->url = $this->shopBaseUrl."catalog/".$collection_name."/";
$collection->id = $collection_name;
$text = $this->httpClient->getUrlText($collection->url);
preg_match('#<h6>(.+?)</h6>#sui', $text, $name);
$collection->name = $name[1];
preg_match_all('#<li class="c.+?"><a href="/(.+?)">(.+?)</a></li>#sui', $text, $categories, PREG_SET_ORDER);
foreach($categories as $category)
{
$category_name = $category[2];
$page = 0;
while(true)
{
$text = $this->httpClient->getUrlText($this->shopBaseUrl.$category[1]."?page=$page");
preg_match_all('#<h3><a href="/(.+?)/">(.+?)</a></h3>#sui', $text, $items, PREG_SET_ORDER);
if(count($items) == 0)
break;
foreach($items as $item_value)
{
$item = new ParserItem();
$item->id = substr($item_value[1], strrpos($item_value[1], "/") + 1);
$item->url = $this->shopBaseUrl.$item_value[1];
$item->categ = $category_name;
$item->name = $item_value[2];
$text = $this->httpClient->getUrlText($item->url);
preg_match('#<div><b>Артикул:</b>(.+?)</div>#sui', $text, $articul);
if($articul)$item->articul = $this->txt($articul[1]);
preg_match('#<div><b>Состав:</b>(.+?)</div>#sui', $text, $structure);
if($structure)$item->structure = $this->txt($structure[1]);
preg_match('#<div><b>Размер:</b>(.+?)</div>#sui', $text, $sizes);
if($sizes)
{
if(strpos($sizes[1], ";") != false)
$item->sizes = explode(';', $sizes[1]);
else if(strpos($sizes[1], ",") != false)
$item->sizes = explode(',', $sizes[1]);
else
$item->sizes[] = $this->txt($sizes[1]);
}
preg_match('#<div><b>Розничная цена:</b>(.+?)</div>#sui', $text, $price);
if($price)$item->price = $this->txt($price[1]);
preg_match_all('#<p.*?>(.+?)</p>#sui', $text, $ptags, PREG_SET_ORDER);
if($ptags)$desc = $ptags[0];
if(isset($desc) && $desc != "" && strpos($desc[1], "Цвет:") === false)$item->descr = $this->txt($desc[1]);
preg_match('#<a href="/(brands/(.*?))">#sui', $text, $brand);
if($brand && $brand[2] != "")
{
$brand_text = $this->httpClient->getUrlText($this->shopBaseUrl.$brand[1]);
preg_match('#<h1>(.+?)</h1>#sui', $brand_text, $brand);
if($brand)$item->brand = $brand[1];
}
preg_match('#<dt id="zoomc">\s*<div style="background-image\:\surl\(\'(.+?)\'\);">#sui', $text, $main_image);
if($ptags)
{
$txt = $ptags[count($ptags)-1][1];
preg_match_all('#<a href="/(.+?)" target="_blank" class="zoomable">#sui', $txt, $images, PREG_SET_ORDER);
}
$images[] = $main_image;
foreach($images as $image_item)
{
$image = new ParserImage();
$image->url = $this->shopBaseUrl.substr($image_item[1],1);
$image->id = substr($image->url, strrpos($image->url, '/') + 1, strrpos($image->url, '.')-strrpos($image->url,'/') - 1);
$this->httpClient->getUrlBinary($image->url);
$image->type = substr($image->url, strrpos($image->url, ".") + 1);
$image->path = $this->httpClient->getLastCacheFile();
$item->images[] = $image;
}
if($ptags)
foreach($ptags as $txt)
if(strpos($txt[1], "Цвет:")!==false)
{
$colors = substr($txt[1], strlen("Цвет:")+1);
if(strpos($colors, ",") !== false)
$colors = explode(",", $colors);
else if(strpos($colors, ";") !== false)
$colors = explode(";", $colors);
else
$colors = array($colors);
foreach($colors as $color)
$item->colors[] = $this->txt($color);
break;
}
if(!$item->brand)
{
preg_match('#<strong>тм(.+?)</strong>#sui', $text, $brand);
if($brand)
$item->brand = $brand[1];
}
$collection->items[] = $item;
}
$page += 12;
}
}
$base[] = $collection;
}
return $this->saveItemsResult ($base);
}
public function loadPhysicalPoints ()
{
$base = array ();
$text = $this->httpClient->getUrlText($this->shopBaseUrl."shops/");
preg_match_all('#<a href="/(shops/.+?)">(?:.+?)/>(.+?)</a>#sui', $text, $cities, PREG_SET_ORDER);
foreach($cities as $city)
{
$text = $this->httpClient->getUrlText($this->shopBaseUrl.$city[1]);
preg_match_all('#<li><a href="/(.+?)">#sui', $text, $shops, PREG_SET_ORDER);
foreach($shops as $shop_item)
{
$url = $this->shopBaseUrl.$shop_item[1];
$text = $this->httpClient->getUrlText($url);
$shop = new ParserPhysical();
$shop->id = substr(substr($url, 0, -1), strrpos(substr($url, 0, -1), "/") + 1);
$shop->url = $url;
preg_match('#<div><b>Адрес:</b>(.+?)</div>#sui', $text, $address);
if($address)$shop->address = $this->txt($address[1]);
preg_match('#<div><b>Часы работы:</b>(.+?)</div>#sui', $text, $timetable);
if($address)$shop->timetable = $this->txt($timetable[1]);
preg_match('#<div><b>Телефон:</b>(.+?)</div>#sui', $text, $phone);
if($address)$shop->phone = $this->txt($phone[1]);
$shop->city = $city[2];
$base[] = $shop;
}
}
return $this->savePhysicalResult ($base);
}
public function loadNews ()
{
$base = array ();
$url = $this->shopBaseUrl."shares/";
$text = $this->httpClient->getUrlText($url);
preg_match_all('#<dd><span>(.+?)</span><h3><a href="/(.+?)/">(.+?)</a></h3><div>(.+?)</div><a href="/(.+?)" class="more">Подробнее</a></dd>#sui', $text, $news, PREG_SET_ORDER);
foreach($news as $item)
{
$news_item = new ParserNews();
$news_item->urlShort = $url;
$news_item->urlFull = $this->shopBaseUrl.$item[2];
$news_item->id = substr($news_item->urlFull, strrpos($news_item->urlFull, "/")+1);
$news_item->contentShort = $item[4];
$news_item->date = $item[1];
$news_item->header = $item[3];
$text = $this->httpClient->getUrlText($news_item->urlFull);
preg_match('#<h1>.+?</h1>(.+?)<a class="back" href="/shares/">Назад к списку</a>#sui', $text, $content);
$news_item->contentFull = $this->txt($content[1]);
$base[] = $news_item;
}
return $this->saveNewsResult ($base);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment