Skip to content

Instantly share code, notes, and snippets.

@gorborukov
Created December 5, 2014 08:36
Show Gist options
  • Save gorborukov/ae431fe53039510a8921 to your computer and use it in GitHub Desktop.
Save gorborukov/ae431fe53039510a8921 to your computer and use it in GitHub Desktop.
master-web example from internet
<?
/**
** Сохранение данных о СПО в базе:
** 1. http-запрос
** 2. xml-парсинг
** 3. вставка в mysql:masterweb_ext.mwToursInfo
**/
define('INC_DIR', dirname(__FILE__).'/../includes/');
require_once INC_DIR.'auto_prepend.php';
require_once INC_DIR.'showcase/params.php';
require_once INC_DIR.'pidfile.php';
class importToursSet{
private $urlImport=null;
private $parser=null;
private $recordOnce=array();
private $recordCollection=array();
private $currentTagName=null;
private $waitingForFirstTag=true;
private $insertedRowCount=0;
private $tagInsideData=false;
private $source_id=null;
private $isFirst=true;
private $timer=null;
private $countExcursion=0;
const MAX_INSERT_TUPLE_SIZE=100;
# public function __destruct() {}
public function __construct($source_key='ru'){
echo "\nStart at ".date('Y.m.d, H:i:s')."\n";
$this->timer=time();
$rowSource=ShowcaseParams::getSource($source_key);
# $rowSource=ShowcaseParams::getSource(array('source_key'=>$source_key));
$this->urlImport= $rowSource['import_url'];
$this->source_id=$rowSource['id'];
echo "Source: ".$source_key." #".$this->source_id."\n\t".$this->urlImport."\n";
mysql::charset('utf8');
$statusParsing=$this->xmlParsing();
mysql::charset('cp1251');
if(!$statusParsing){
echo "\n !!! Parsing Error\n";
return false;
}
$this->hackData();
$this->createGroups();
echo "Finish at ".date('Y.m.d, H:i:s')."\n";
}
private function time_report($msg){
echo 'TIME REPORT: '.$msg.' '.(time()-$this->timer)."c\n";
}
function mysql_query($sql){
$res=mysql::q($sql);
if(empty($res)){
trigger_error('Mysql error. Program terminated. Sql: '.$sql, E_USER_ERROR);
exit;
}
}
function hackData(){
echo "HACKING IMPORTED DATA\n";
$sqlCcaTail='
FROM '.masterweb::TBL_TOURSINFO.'
WHERE airline="Czech Connect Airlines"
';
$sql="
SELECT COUNT(*)
".$sqlCcaTail."
";
$countCcaTours=mysql::qaf1($sql);
echo "Delete tour by Czech Connect Airlines (CCA): $countCcaTours\n";
$sql="
DELETE
".$sqlCcaTail."
";
mysql::q($sql);
$pt_tourkey_delete="19249, 19251, 19252, 19891, 19893";
echo "Delete pt_tourkey ($pt_tourkey_delete)\n";
$sql="
DELETE FROM ".masterweb::TBL_TOURSINFO."
WHERE pt_tourkey IN(".$pt_tourkey_delete.")
";
$this->mysql_query($sql);
$lockDeparture='%\_1';
echo "Delete tours from departure $lockDeparture\n";
$sql='DELETE FROM '.masterweb::TBL_TOURSINFO.' WHERE departure LIKE "'.$lockDeparture.'"';
$res=$this->mysql_query($sql);
echo "Delete empty night tours\n";
$sql='DELETE FROM '.masterweb::TBL_TOURSINFO.' WHERE nights="" AND source_id='.$this->source_id;
$res=$this->mysql_query($sql);
echo "Delete empty price tours\n";
$sql='DELETE FROM '.masterweb::TBL_TOURSINFO.' WHERE min_price=0 AND source_id='.$this->source_id;
$res=$this->mysql_query($sql);
echo "Delete empty date tour\n";
$sql='DELETE FROM '.masterweb::TBL_TOURSINFO.' WHERE dates="" AND source_id='.$this->source_id;
$res=$this->mysql_query($sql);
/*
foreach(ShowcaseParams::$listDeparture as $depId=>$depName){
$sqlDeparture[]="WHEN $depId THEN '$depName'\n";
}
$sql="
UPDATE ".masterweb::TBL_TOURSINFO."
SET departure=(CASE pt_ctkeyfrom
".join('',$sqlDeparture)."
ELSE pt_ctkeyfrom
END
)
WHERE source_id=$this->source_id
";
#$this->mysql_query($sql);
*/
echo "Update departure dictinct\n";
$sql="
REPLACE INTO ".masterweb::TBL_DEPARTURE." (id,name)
SELECT pt_ctkeyfrom,departure
FROM ".masterweb::TBL_TOURSINFO."
WHERE departure!=''
AND source_id=".$this->source_id."
GROUP BY pt_ctkeyfrom,departure
";
mysql::q($sql);
echo "Fix departure for old verion of SPO1\n";
$sql="
UPDATE ".masterweb::TBL_TOURSINFO.",".masterweb::TBL_DEPARTURE."
SET ".masterweb::TBL_TOURSINFO.".departure=".masterweb::TBL_DEPARTURE.".name
WHERE ".masterweb::TBL_TOURSINFO.".departure=''
AND ".masterweb::TBL_TOURSINFO.".source_id=".$this->source_id."
AND ".masterweb::TBL_TOURSINFO.".pt_ctkeyfrom=".masterweb::TBL_DEPARTURE.".id
";
mysql::q($sql);
echo "Moving `MPS`\n";
$sql='
UPDATE '.masterweb::TBL_TOURSINFO.'
SET gd="МПС", airline=""
WHERE airline="МПС"
AND source_id='.$this->source_id.'
';
mysql::q($sql);
echo "Rename `Krayser` to `Atlant-soyuz`\n";
$sql='
UPDATE '.masterweb::TBL_TOURSINFO.'
SET pt_rate="USD"
WHERE pt_rate="$"
';
mysql::q($sql);
$sql='
UPDATE '.masterweb::TBL_TOURSINFO.'
SET airline="Атлант Союз"
WHERE airline="Красэйр"
AND source_id='.$this->source_id.'
';
mysql::q($sql);
# пометить все туры по более чем одному отелю
$sql='
UPDATE '.masterweb::TBL_TOURSINFO.'
SET is_hotel_multi=1
WHERE countrynames LIKE "%,%"
';
mysql::q($sql);
$sql='
SELECT countrynames,countrykeys
FROM '.masterweb::TBL_TOURSINFO.'
WHERE countrynames LIKE "%,%"
GROUP BY countrynames
';
$rows=mysql::qaf_rows($sql);
foreach($rows as &$row){
$a=explode(', ', $row['countrynames']);
$unique=join(', ',array_unique($a));
$ak=explode(', ', $row['countrykeys']);
$uniquek=join('-',array_unique($ak));
$sql='
UPDATE '.masterweb::TBL_TOURSINFO.'
SET countrynames="'.$unique.'", countrykeys="'.$uniquek.'"
WHERE countrynames="'.$row['countrynames'].'"
';
mysql::q($sql);
}
#$this->time_report('End hack data');
}
function createGroups(){
echo "Set refill updates spo\n";
$sql='
UPDATE '.masterweb::TBL_TOURSINFO.' AS ti
,'.masterweb::TBL_SPO_STATUS.' AS ls
SET ls.status="reprocessing"
WHERE ls.status="using"
AND ls.source_id=ti.source_id
AND ls.listspo_id=ti.pt_tlkey
AND ti.pt_tourcreated>ls.update_dt
';
mysql::q($sql);
echo "Save spo create log\n";
$sql='
INSERT IGNORE INTO '.masterweb::TBL_SPO_UPDATE_LOG.'
(pt_tlkey, pt_tourcreated, source_id)
SELECT pt_tlkey, pt_tourcreated, source_id
FROM '.masterweb::TBL_TOURSINFO.'
';
mysql::q($sql);
echo "Filling groupname fields\n";
$sql='
SELECT *
FROM '.masterweb::TBL_TOURSINFO.'
WHERE source_id='.$this->source_id.'
';
# WHERE groupname=""
foreach(mysql::qaf_rows($sql) as $row){
# make GROUPNAME
$groupname=$row['pt_tourname'];
$groupname=preg_replace('|№\s+|', '№', $groupname);
if($this->source_id==2){ # только для закахской базы обрезаем длительность в конце названия
$groupname=preg_replace('|\s*\>?\s*\d+\/\d+.*$|', '', $groupname);
}else{
if($row['ct_name']=='Прага'){
# для Праги удаляем все, что до символа ">"
$groupname=preg_replace('|^.+?>|', '', $groupname);
}elseif(preg_match('|^SPO\s*>(.+)$|', $groupname, $matches)){
# если название начинается с SPO
$groupname='SPO '.$matches[1];
}else{
# удаляем все после символа ">"
if($this->source_id!=2){ # для закахской базы не обрезаем
$groupname=preg_replace('|>.+|', '', $groupname);
}
}
$groupname=preg_replace('|\([^\)]*\)|', '', $groupname);
}
# make WDATES
$year=substr($row['pt_firsttourdate'], 0, 4);
$firsttourdate=str_replace('-','',$row['pt_firsttourdate']);
$dates=array();
$lastdate=0;
$wdates=0;
# echo "$row[dates]===\n";
# формирование битового поля с днями недели отправления - bit_wdates
foreach(explode(', ', $row['dates']) as $dateOld){
$dateOld=trim($dateOld);
if(!preg_match('|^(\d\d)\.(\d\d)|', $dateOld))
continue;
$dparts=explode('.', $dateOld);
$date=$year.$dparts[1].$dparts[0];
if(sizeof($dparts)==3){# если год задан явно
$dparts[2]=(strlen($dparts[2])==2 ? '20' : '').$dparts[2];
if($dparts[2]!=$year)# если заданный год не равен текущему
$date=($year=$dparts[2]).$dparts[1].$dparts[0];
}else{
if($firsttourdate>$date) # если текущая дата оказалась ранее начала СПО
$date=(++$year).$dparts[1].$dparts[0];
if($lastdate>$date)# если текущая дата оказалась ранее предыдущей
$date=(++$year).$dparts[1].$dparts[0];
}
$dates[]=$lastdate=$date;
$d=strptime($date, '%Y%m%d');
$wdates|=1<<$d['tm_wday'];
# echo "$dateOld -- $d[tm_wday]\n";
}
# echo $wdates,"+++\n";exit;
# заполнение поля groupname
$sql='
UPDATE '.masterweb::TBL_TOURSINFO.'
SET groupname="'.mysql::escape(trim($groupname)).'"
,pt_lasttourdate="'.$year.'-'.$dparts[1].'-'.$dparts[0].'"
,bit_wdates='.$wdates.'
WHERE pt_tourkey='.$row['pt_tourkey'].'
AND source_id='.$this->source_id.'
';
mysql::q($sql);
}
# filling toursinfo group table
$sql='DELETE FROM '.masterweb::TBL_TOURSINFOGROUP.' WHERE source_id='.$this->source_id;
mysql::q($sql);
$sql='
INSERT IGNORE INTO '.masterweb::TBL_TOURSINFOGROUP.' (departure, pt_ctkeyfrom, groupname
, pt_cnkey, cn_name, countrynames, countrykeys, ct_name, pt_tourkey, pt_tourname, pt_rate, bit_wdates
, nights, min_price
, pt_firsttourdate, pt_lasttourdate, pttourtype, pt_tourtype
, to_attribute, airline, gd
, source_id
, pt_tourcreated
, pt_tlkey
, pt_tlkeys
, pt_tourkeys
, is_hotel_multi
)
SELECT departure, pt_ctkeyfrom, groupname
, pt_cnkey, cn_name
, IF(pttourtype REGEXP "'.masterweb::REGEXP_COMBI.'", countrynames, NULL)
, IF(pttourtype REGEXP "'.masterweb::REGEXP_COMBI.'", countrykeys, NULL)
, ct_name, pt_tourkey, pt_tourname, pt_rate, BIT_OR(bit_wdates) AS bit_wdates
, GROUP_CONCAT(DISTINCT nights SEPARATOR ", ") AS nights, MIN(min_price) AS min_price
, MIN(pt_firsttourdate) AS pt_firsttourdate, MAX(pt_lasttourdate) AS pt_lasttourdate, pttourtype, pt_tourtype
, BIT_OR(to_attribute) AS to_attribute, airline, gd
, source_id
, MAX(pt_tourcreated)
, pt_tlkey
, GROUP_CONCAT(pt_tlkey)
, GROUP_CONCAT(pt_tourkey)
, is_hotel_multi
FROM '.masterweb::TBL_TOURSINFO.'
WHERE source_id='.$this->source_id.'
GROUP BY departure, cn_name, ct_name, countrynames, pttourtype, groupname, airline, gd
';
$this->timer=time();
$time=time();
mysql::q($sql);
$this->time_report('Import data from '.masterweb::TBL_TOURSINFO.' to '.masterweb::TBL_TOURSINFOGROUP.'');
#nights update
$sql='
SELECT nights
FROM '.masterweb::TBL_TOURSINFOGROUP.'
WHERE source_id='.$this->source_id.'
GROUP BY nights
';
foreach(mysql::qaf_cols($sql) as $nights){
$aNights=explode(', ', $nights);
$aNights=array_unique($aNights);
sort($aNights);
$nightsNew=join(', ', $aNights);
$sql='
UPDATE '.masterweb::TBL_TOURSINFOGROUP.'
SET nights="'.$nightsNew.'"
WHERE nights="'.$nights.'"
AND source_id='.$this->source_id.'
';
mysql::q($sql);
}
$sql='
UPDATE '.masterweb::TBL_TOURSINFOGROUP.' SET
firsttourmonth=DATE_FORMAT(pt_firsttourdate,"%Y%m")
,lasttourmonth=DATE_FORMAT(pt_lasttourdate,"%Y%m")
,min_nights=nights+0
WHERE
source_id='.$this->source_id.'
';
mysql::q($sql);
$sql='
UPDATE '.masterweb::TBL_TOURSINFOGROUP.' AS g, '.masterweb::TBL_TOURTYPE_SORTORDER.' AS so
SET g.tourtype_sort_index=so.sort_index
WHERE g.cn_name=so.cn_name
AND g.pttourtype=so.pttourtype
AND source_id='.$this->source_id.'
';
mysql::q($sql);
}
function xmlParsing(){
$this->parser = xml_parser_create("UTF-8");
xml_set_object($this->parser, $this);
xml_parser_set_option($this->parser, XML_OPTION_CASE_FOLDING, 0);
xml_parser_set_option($this->parser, XML_OPTION_SKIP_WHITE, 1);
xml_set_element_handler($this->parser, 'xmlStartElement', 'xmlEndElement');
xml_set_character_data_handler ($this->parser, 'xmlCharacterData');
if(!($fp = fopen($this->urlImport, "r"))) {
echo "Cannot open XML data file: $this->urlImport";
return false;
}
while($data = fread($fp, 4096)) {
if(!xml_parse($this->parser, $data, feof($fp))) {
echo sprintf("XML error: %s at line %d",
xml_error_string(xml_get_error_code($this->parser)),
xml_get_current_line_number($this->parser))
;
return false;
}
}
$this->time_report('Waiting for xml data');
$this->mysqlInserting();
xml_parser_free($this->parser);
fclose($fp);
echo "Inserted into mysql $this->insertedRowCount rows\n";
echo "Fix ".$this->countExcursion." excursions\n";
return true;
}
function xmlStartElement($parser, $name, $attrs){
if($name=='mwToursInfo'){
$this->waitingForFirstTag=false;
}
if(!$this->waitingForFirstTag)
$this->currentTagName=$name;
$this->tagInsideData=false;
}
function xmlEndElement($parser, $name){
# fix multi airline / Begin
if(($pos=mb_strpos($this->recordOnce['airline'],','))!==false){
$this->recordOnce['airline']=mb_substr($this->recordOnce['airline'], 0, $pos);
}
# fix multi airline / End
if($name=='mwToursInfo'){
# fix excursion / Begin
$this->countExcursion++;
if(isset($this->recordOnce['countrynames'])
&& mb_strpos($this->recordOnce['countrynames'],',')!==false
){
$cn_name = explode(', ',$this->recordOnce['countrynames']);
#$countrykeys = explode(', ',$this->recordOnce['countrykeys']); # не испльзуется
$ct_name = explode(', ',$this->recordOnce['ct_name']);
for($i=0; $i<sizeof($ct_name); $i++){
$this->recordOnce['ct_name']=$ct_name[$i];
$this->recordOnce['cn_name']=trim($cn_name[$i]);
$this->recordCollection[]=$this->recordOnce;
}
}else{
$this->recordOnce['cn_name']=trim($this->recordOnce['cn_name']);
$this->recordCollection[]=$this->recordOnce;
}
# fix excursion / End
$this->recordOnce=array();
if(self::MAX_INSERT_TUPLE_SIZE<=sizeof($this->recordCollection))
$this->mysqlInserting();
}
$this->tagInsideData=false;
}
function xmlCharacterData($parser, $data){
if($this->currentTagName && $data){
// $data=iconv("UTF-8","KOI8-R",$data);
$data=trim($data);
$data=mysql::escape($data);
if($this->tagInsideData)
$this->recordOnce[$this->currentTagName].=' '.$data;
else
$this->recordOnce[$this->currentTagName]=$data;
$this->tagInsideData=true;
}
}
function mysqlInserting(){
$fields=array(
/*
'ct_name',
'pt_ctkeyfrom',
'cnctkey',
'pt_cnkey',
'cn_name',
'pt_tourname',
'pt_toururl',
'pt_rate',
'dates',
'min_price',
'nights',
'pt_firsttourdate',
'pt_tourcreated',
'pt_tourkey',
*/
'pt_cnkey',
'cn_name',
'pt_ctkeyfrom',
'departure',
'ct_name',
'countrynames',
'countrykeys',
'pt_tourkey',
'pt_tourname',
'pt_rate',
'dates',
'nights',
'min_price',
'pt_firsttourdate',
'pt_tourcreated',
'pt_tlkey',
'hotelkeys',
'ctkeys',
'pttourtype',
'pt_tourtype',
'to_attribute',
'airline',
'gd',
'source_id',
);
if(empty( $this->recordCollection ))
return;
if($this->isFirst){
$this->isFirst=false;
#$sql='TRUNCATE '. masterweb::TBL_TOURSINFO;
$sql='DELETE FROM '.masterweb::TBL_TOURSINFO.' WHERE source_id='.$this->source_id;
$res=$this->mysql_query($sql);
}
$sql="REPLACE INTO ".masterweb::TBL_TOURSINFO."(".join(',',$fields).') VALUES ';
foreach($this->recordCollection as $key=>$record){
$record['source_id']=$this->source_id;
# $sql.=($key?',':'').'("'.join('","', array_values($record)).'")';
$sql.=($key?',':'').'(';
/*
departure - new table field
На этом этапе для старых версий spo1 поле departure может заполняться пустыми строками
*/
$record['departure']=isset($record['ct_departure']) ? $record['ct_departure'] : '';
foreach($fields as $key2=>$fld)
$sql.=($key2?',':'').'"'.$record[$fld].'"';
$sql.=')';
}
$this->mysql_query($sql);
$this->insertedRowCount+=sizeof($this->recordCollection);
$this->recordCollection=array();
}
/*
function updateWDates(){
return;#TODO
$sql='SELECT dates FROM '.masterweb::TBL_TOURSINFO.' GROUP BY dates';
$dates=mysql::qaf_cols($sql);
echo "Updated ".sizeof($dates)." rows by WDates fields\n";
}
*/
static function updateIconCountryIso(){
$sql='
DELETE FROM '.masterweb::TBL_ICON.' WHERE ico_type="country_iso"
';
mysql::q($sql);
$sql='
INSERT INTO '.masterweb::TBL_ICON.'(ico_type, ico_name, filename)
SELECT "country_iso", c_name.cn_name, CONCAT(c_iso.iso2,".png")
FROM '.masterweb::TBL_COUNTRY.' AS c_iso
INNER JOIN (
SELECT cn_name
FROM '.masterweb::TBL_TOURSINFOGROUP.'
GROUP BY cn_name
) AS c_name
ON c_name.cn_name=c_iso.name_ru
';
mysql::q($sql);
}
}
CPidFile::lock();
$keys=ShowcaseParams::getEnableKeys();
foreach($keys as $key){
new importToursSet($key);
}
#importToursSet::updateSortOrder();
#EOF
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment