Skip to content

Instantly share code, notes, and snippets.

@burningtree
Created April 16, 2013 18:31
Show Gist options
  • Select an option

  • Save burningtree/5398343 to your computer and use it in GitHub Desktop.

Select an option

Save burningtree/5398343 to your computer and use it in GitHub Desktop.
<?php
// https://raw.github.com/olamedia/nokogiri/master/nokogiri.php
require 'nokogiri.php';
$template = array
(
"title" => "ASKO nábytek",
"descr" => null,
"datetime_end" => null,
"datetime_begin" => null,
"file_orig" => null,
"source_url" => "http://www.asko-nabytek.cz/katalog-letak-asko"
);
function find_dates($str)
{
preg_match_all("/[\d\s\.]+/u", $str, $dates);
$out = array();
foreach($dates[0] as $date)
{
$date = strtotime(preg_replace("/([^\d^\.]+)/", "", $date));
if($date)
{
$out[] = $date;
}
}
return (count($out) == 2 ? $out : FALSE);
};
function find_code($url)
{
$ret = preg_match("/katalog.asko.cz\/([^\/]+)/", $url, $match);
return $ret ? $match[1] : FALSE;
};
function check_content_type($url, $content_type)
{
$headers = get_headers($url, 1);
return $headers['Content-Type'] == $content_type;
};
// zaciname zpracovani
$data = array('urls' => array(), 'validity' => array(), 'valid_for' => array());
$html = new nokogiri(file_get_contents($template['source_url']));
$list = $html->get('.content-main .list-page');
foreach($list->get('p.ta-c') as $line)
{
$data['urls'][] = $line['a'][0]['href'];
}
foreach($list->get('ul.list-arrow-small') as $line)
{
$data['validity'][] = $line['li'][0]['#text'][0];
$data['valid_for'][] = (count($line['li'][1]) == 1) ? "všechny pobočky" : "E-shop";
}
// zkontroluje pocet prvku
$items_count = count($data['urls']);
foreach($data as $key=>$vals)
{
if(count($data[$key]) != $items_count)
{
throw new Exception('Nesouhlasi pocet dat');
}
}
$out = array();
for($i=0;$i<count($data['urls']);$i++)
{
$dates = find_dates($data['validity'][$i]);
$code = find_code($data['urls'][$i]);
$file_orig = $data['urls'][$i]."/includes/".str_replace("-","_", $code).".pdf";
if(!$dates OR !$code)
{
throw new Exception('Spatne data');
}
// overime existenci PDFka
if(!check_content_type($file_orig, "application/pdf"))
{
throw new Exception('PDF neexistuje: '.$file_orig);
}
$item = $template;
$item['datetime_begin'] = date("Y-m-d H:i:s",$dates[0]);
$item['datetime_end'] = date("Y-m-d H:i:s",strtotime("-1 second", $dates[1]));
$item['file_orig'] = $file_orig;
$item['descr'] = "Platnost letáku od ".date("j.m.Y", $dates[0])." do ".date("j.m.Y", $dates[1])." pro ".$data['valid_for'][$i];
$out[] = $item;
}
var_dump($out);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment