Created
December 5, 2014 08:36
-
-
Save gorborukov/ae431fe53039510a8921 to your computer and use it in GitHub Desktop.
master-web example from internet
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<? | |
/** | |
** Сохранение данных о СПО в базе: | |
** 1. http-запрос | |
** 2. xml-парсинг | |
** 3. вставка в mysql:masterweb_ext.mwToursInfo | |
**/ | |
define('INC_DIR', dirname(__FILE__).'/../includes/'); | |
require_once INC_DIR.'auto_prepend.php'; | |
require_once INC_DIR.'showcase/params.php'; | |
require_once INC_DIR.'pidfile.php'; | |
class importToursSet{ | |
private $urlImport=null; | |
private $parser=null; | |
private $recordOnce=array(); | |
private $recordCollection=array(); | |
private $currentTagName=null; | |
private $waitingForFirstTag=true; | |
private $insertedRowCount=0; | |
private $tagInsideData=false; | |
private $source_id=null; | |
private $isFirst=true; | |
private $timer=null; | |
private $countExcursion=0; | |
const MAX_INSERT_TUPLE_SIZE=100; | |
# public function __destruct() {} | |
public function __construct($source_key='ru'){ | |
echo "\nStart at ".date('Y.m.d, H:i:s')."\n"; | |
$this->timer=time(); | |
$rowSource=ShowcaseParams::getSource($source_key); | |
# $rowSource=ShowcaseParams::getSource(array('source_key'=>$source_key)); | |
$this->urlImport= $rowSource['import_url']; | |
$this->source_id=$rowSource['id']; | |
echo "Source: ".$source_key." #".$this->source_id."\n\t".$this->urlImport."\n"; | |
mysql::charset('utf8'); | |
$statusParsing=$this->xmlParsing(); | |
mysql::charset('cp1251'); | |
if(!$statusParsing){ | |
echo "\n !!! Parsing Error\n"; | |
return false; | |
} | |
$this->hackData(); | |
$this->createGroups(); | |
echo "Finish at ".date('Y.m.d, H:i:s')."\n"; | |
} | |
private function time_report($msg){ | |
echo 'TIME REPORT: '.$msg.' '.(time()-$this->timer)."c\n"; | |
} | |
function mysql_query($sql){ | |
$res=mysql::q($sql); | |
if(empty($res)){ | |
trigger_error('Mysql error. Program terminated. Sql: '.$sql, E_USER_ERROR); | |
exit; | |
} | |
} | |
function hackData(){ | |
echo "HACKING IMPORTED DATA\n"; | |
$sqlCcaTail=' | |
FROM '.masterweb::TBL_TOURSINFO.' | |
WHERE airline="Czech Connect Airlines" | |
'; | |
$sql=" | |
SELECT COUNT(*) | |
".$sqlCcaTail." | |
"; | |
$countCcaTours=mysql::qaf1($sql); | |
echo "Delete tour by Czech Connect Airlines (CCA): $countCcaTours\n"; | |
$sql=" | |
DELETE | |
".$sqlCcaTail." | |
"; | |
mysql::q($sql); | |
$pt_tourkey_delete="19249, 19251, 19252, 19891, 19893"; | |
echo "Delete pt_tourkey ($pt_tourkey_delete)\n"; | |
$sql=" | |
DELETE FROM ".masterweb::TBL_TOURSINFO." | |
WHERE pt_tourkey IN(".$pt_tourkey_delete.") | |
"; | |
$this->mysql_query($sql); | |
$lockDeparture='%\_1'; | |
echo "Delete tours from departure $lockDeparture\n"; | |
$sql='DELETE FROM '.masterweb::TBL_TOURSINFO.' WHERE departure LIKE "'.$lockDeparture.'"'; | |
$res=$this->mysql_query($sql); | |
echo "Delete empty night tours\n"; | |
$sql='DELETE FROM '.masterweb::TBL_TOURSINFO.' WHERE nights="" AND source_id='.$this->source_id; | |
$res=$this->mysql_query($sql); | |
echo "Delete empty price tours\n"; | |
$sql='DELETE FROM '.masterweb::TBL_TOURSINFO.' WHERE min_price=0 AND source_id='.$this->source_id; | |
$res=$this->mysql_query($sql); | |
echo "Delete empty date tour\n"; | |
$sql='DELETE FROM '.masterweb::TBL_TOURSINFO.' WHERE dates="" AND source_id='.$this->source_id; | |
$res=$this->mysql_query($sql); | |
/* | |
foreach(ShowcaseParams::$listDeparture as $depId=>$depName){ | |
$sqlDeparture[]="WHEN $depId THEN '$depName'\n"; | |
} | |
$sql=" | |
UPDATE ".masterweb::TBL_TOURSINFO." | |
SET departure=(CASE pt_ctkeyfrom | |
".join('',$sqlDeparture)." | |
ELSE pt_ctkeyfrom | |
END | |
) | |
WHERE source_id=$this->source_id | |
"; | |
#$this->mysql_query($sql); | |
*/ | |
echo "Update departure dictinct\n"; | |
$sql=" | |
REPLACE INTO ".masterweb::TBL_DEPARTURE." (id,name) | |
SELECT pt_ctkeyfrom,departure | |
FROM ".masterweb::TBL_TOURSINFO." | |
WHERE departure!='' | |
AND source_id=".$this->source_id." | |
GROUP BY pt_ctkeyfrom,departure | |
"; | |
mysql::q($sql); | |
echo "Fix departure for old verion of SPO1\n"; | |
$sql=" | |
UPDATE ".masterweb::TBL_TOURSINFO.",".masterweb::TBL_DEPARTURE." | |
SET ".masterweb::TBL_TOURSINFO.".departure=".masterweb::TBL_DEPARTURE.".name | |
WHERE ".masterweb::TBL_TOURSINFO.".departure='' | |
AND ".masterweb::TBL_TOURSINFO.".source_id=".$this->source_id." | |
AND ".masterweb::TBL_TOURSINFO.".pt_ctkeyfrom=".masterweb::TBL_DEPARTURE.".id | |
"; | |
mysql::q($sql); | |
echo "Moving `MPS`\n"; | |
$sql=' | |
UPDATE '.masterweb::TBL_TOURSINFO.' | |
SET gd="МПС", airline="" | |
WHERE airline="МПС" | |
AND source_id='.$this->source_id.' | |
'; | |
mysql::q($sql); | |
echo "Rename `Krayser` to `Atlant-soyuz`\n"; | |
$sql=' | |
UPDATE '.masterweb::TBL_TOURSINFO.' | |
SET pt_rate="USD" | |
WHERE pt_rate="$" | |
'; | |
mysql::q($sql); | |
$sql=' | |
UPDATE '.masterweb::TBL_TOURSINFO.' | |
SET airline="Атлант Союз" | |
WHERE airline="Красэйр" | |
AND source_id='.$this->source_id.' | |
'; | |
mysql::q($sql); | |
# пометить все туры по более чем одному отелю | |
$sql=' | |
UPDATE '.masterweb::TBL_TOURSINFO.' | |
SET is_hotel_multi=1 | |
WHERE countrynames LIKE "%,%" | |
'; | |
mysql::q($sql); | |
$sql=' | |
SELECT countrynames,countrykeys | |
FROM '.masterweb::TBL_TOURSINFO.' | |
WHERE countrynames LIKE "%,%" | |
GROUP BY countrynames | |
'; | |
$rows=mysql::qaf_rows($sql); | |
foreach($rows as &$row){ | |
$a=explode(', ', $row['countrynames']); | |
$unique=join(', ',array_unique($a)); | |
$ak=explode(', ', $row['countrykeys']); | |
$uniquek=join('-',array_unique($ak)); | |
$sql=' | |
UPDATE '.masterweb::TBL_TOURSINFO.' | |
SET countrynames="'.$unique.'", countrykeys="'.$uniquek.'" | |
WHERE countrynames="'.$row['countrynames'].'" | |
'; | |
mysql::q($sql); | |
} | |
#$this->time_report('End hack data'); | |
} | |
function createGroups(){ | |
echo "Set refill updates spo\n"; | |
$sql=' | |
UPDATE '.masterweb::TBL_TOURSINFO.' AS ti | |
,'.masterweb::TBL_SPO_STATUS.' AS ls | |
SET ls.status="reprocessing" | |
WHERE ls.status="using" | |
AND ls.source_id=ti.source_id | |
AND ls.listspo_id=ti.pt_tlkey | |
AND ti.pt_tourcreated>ls.update_dt | |
'; | |
mysql::q($sql); | |
echo "Save spo create log\n"; | |
$sql=' | |
INSERT IGNORE INTO '.masterweb::TBL_SPO_UPDATE_LOG.' | |
(pt_tlkey, pt_tourcreated, source_id) | |
SELECT pt_tlkey, pt_tourcreated, source_id | |
FROM '.masterweb::TBL_TOURSINFO.' | |
'; | |
mysql::q($sql); | |
echo "Filling groupname fields\n"; | |
$sql=' | |
SELECT * | |
FROM '.masterweb::TBL_TOURSINFO.' | |
WHERE source_id='.$this->source_id.' | |
'; | |
# WHERE groupname="" | |
foreach(mysql::qaf_rows($sql) as $row){ | |
# make GROUPNAME | |
$groupname=$row['pt_tourname']; | |
$groupname=preg_replace('|№\s+|', '№', $groupname); | |
if($this->source_id==2){ # только для закахской базы обрезаем длительность в конце названия | |
$groupname=preg_replace('|\s*\>?\s*\d+\/\d+.*$|', '', $groupname); | |
}else{ | |
if($row['ct_name']=='Прага'){ | |
# для Праги удаляем все, что до символа ">" | |
$groupname=preg_replace('|^.+?>|', '', $groupname); | |
}elseif(preg_match('|^SPO\s*>(.+)$|', $groupname, $matches)){ | |
# если название начинается с SPO | |
$groupname='SPO '.$matches[1]; | |
}else{ | |
# удаляем все после символа ">" | |
if($this->source_id!=2){ # для закахской базы не обрезаем | |
$groupname=preg_replace('|>.+|', '', $groupname); | |
} | |
} | |
$groupname=preg_replace('|\([^\)]*\)|', '', $groupname); | |
} | |
# make WDATES | |
$year=substr($row['pt_firsttourdate'], 0, 4); | |
$firsttourdate=str_replace('-','',$row['pt_firsttourdate']); | |
$dates=array(); | |
$lastdate=0; | |
$wdates=0; | |
# echo "$row[dates]===\n"; | |
# формирование битового поля с днями недели отправления - bit_wdates | |
foreach(explode(', ', $row['dates']) as $dateOld){ | |
$dateOld=trim($dateOld); | |
if(!preg_match('|^(\d\d)\.(\d\d)|', $dateOld)) | |
continue; | |
$dparts=explode('.', $dateOld); | |
$date=$year.$dparts[1].$dparts[0]; | |
if(sizeof($dparts)==3){# если год задан явно | |
$dparts[2]=(strlen($dparts[2])==2 ? '20' : '').$dparts[2]; | |
if($dparts[2]!=$year)# если заданный год не равен текущему | |
$date=($year=$dparts[2]).$dparts[1].$dparts[0]; | |
}else{ | |
if($firsttourdate>$date) # если текущая дата оказалась ранее начала СПО | |
$date=(++$year).$dparts[1].$dparts[0]; | |
if($lastdate>$date)# если текущая дата оказалась ранее предыдущей | |
$date=(++$year).$dparts[1].$dparts[0]; | |
} | |
$dates[]=$lastdate=$date; | |
$d=strptime($date, '%Y%m%d'); | |
$wdates|=1<<$d['tm_wday']; | |
# echo "$dateOld -- $d[tm_wday]\n"; | |
} | |
# echo $wdates,"+++\n";exit; | |
# заполнение поля groupname | |
$sql=' | |
UPDATE '.masterweb::TBL_TOURSINFO.' | |
SET groupname="'.mysql::escape(trim($groupname)).'" | |
,pt_lasttourdate="'.$year.'-'.$dparts[1].'-'.$dparts[0].'" | |
,bit_wdates='.$wdates.' | |
WHERE pt_tourkey='.$row['pt_tourkey'].' | |
AND source_id='.$this->source_id.' | |
'; | |
mysql::q($sql); | |
} | |
# filling toursinfo group table | |
$sql='DELETE FROM '.masterweb::TBL_TOURSINFOGROUP.' WHERE source_id='.$this->source_id; | |
mysql::q($sql); | |
$sql=' | |
INSERT IGNORE INTO '.masterweb::TBL_TOURSINFOGROUP.' (departure, pt_ctkeyfrom, groupname | |
, pt_cnkey, cn_name, countrynames, countrykeys, ct_name, pt_tourkey, pt_tourname, pt_rate, bit_wdates | |
, nights, min_price | |
, pt_firsttourdate, pt_lasttourdate, pttourtype, pt_tourtype | |
, to_attribute, airline, gd | |
, source_id | |
, pt_tourcreated | |
, pt_tlkey | |
, pt_tlkeys | |
, pt_tourkeys | |
, is_hotel_multi | |
) | |
SELECT departure, pt_ctkeyfrom, groupname | |
, pt_cnkey, cn_name | |
, IF(pttourtype REGEXP "'.masterweb::REGEXP_COMBI.'", countrynames, NULL) | |
, IF(pttourtype REGEXP "'.masterweb::REGEXP_COMBI.'", countrykeys, NULL) | |
, ct_name, pt_tourkey, pt_tourname, pt_rate, BIT_OR(bit_wdates) AS bit_wdates | |
, GROUP_CONCAT(DISTINCT nights SEPARATOR ", ") AS nights, MIN(min_price) AS min_price | |
, MIN(pt_firsttourdate) AS pt_firsttourdate, MAX(pt_lasttourdate) AS pt_lasttourdate, pttourtype, pt_tourtype | |
, BIT_OR(to_attribute) AS to_attribute, airline, gd | |
, source_id | |
, MAX(pt_tourcreated) | |
, pt_tlkey | |
, GROUP_CONCAT(pt_tlkey) | |
, GROUP_CONCAT(pt_tourkey) | |
, is_hotel_multi | |
FROM '.masterweb::TBL_TOURSINFO.' | |
WHERE source_id='.$this->source_id.' | |
GROUP BY departure, cn_name, ct_name, countrynames, pttourtype, groupname, airline, gd | |
'; | |
$this->timer=time(); | |
$time=time(); | |
mysql::q($sql); | |
$this->time_report('Import data from '.masterweb::TBL_TOURSINFO.' to '.masterweb::TBL_TOURSINFOGROUP.''); | |
#nights update | |
$sql=' | |
SELECT nights | |
FROM '.masterweb::TBL_TOURSINFOGROUP.' | |
WHERE source_id='.$this->source_id.' | |
GROUP BY nights | |
'; | |
foreach(mysql::qaf_cols($sql) as $nights){ | |
$aNights=explode(', ', $nights); | |
$aNights=array_unique($aNights); | |
sort($aNights); | |
$nightsNew=join(', ', $aNights); | |
$sql=' | |
UPDATE '.masterweb::TBL_TOURSINFOGROUP.' | |
SET nights="'.$nightsNew.'" | |
WHERE nights="'.$nights.'" | |
AND source_id='.$this->source_id.' | |
'; | |
mysql::q($sql); | |
} | |
$sql=' | |
UPDATE '.masterweb::TBL_TOURSINFOGROUP.' SET | |
firsttourmonth=DATE_FORMAT(pt_firsttourdate,"%Y%m") | |
,lasttourmonth=DATE_FORMAT(pt_lasttourdate,"%Y%m") | |
,min_nights=nights+0 | |
WHERE | |
source_id='.$this->source_id.' | |
'; | |
mysql::q($sql); | |
$sql=' | |
UPDATE '.masterweb::TBL_TOURSINFOGROUP.' AS g, '.masterweb::TBL_TOURTYPE_SORTORDER.' AS so | |
SET g.tourtype_sort_index=so.sort_index | |
WHERE g.cn_name=so.cn_name | |
AND g.pttourtype=so.pttourtype | |
AND source_id='.$this->source_id.' | |
'; | |
mysql::q($sql); | |
} | |
function xmlParsing(){ | |
$this->parser = xml_parser_create("UTF-8"); | |
xml_set_object($this->parser, $this); | |
xml_parser_set_option($this->parser, XML_OPTION_CASE_FOLDING, 0); | |
xml_parser_set_option($this->parser, XML_OPTION_SKIP_WHITE, 1); | |
xml_set_element_handler($this->parser, 'xmlStartElement', 'xmlEndElement'); | |
xml_set_character_data_handler ($this->parser, 'xmlCharacterData'); | |
if(!($fp = fopen($this->urlImport, "r"))) { | |
echo "Cannot open XML data file: $this->urlImport"; | |
return false; | |
} | |
while($data = fread($fp, 4096)) { | |
if(!xml_parse($this->parser, $data, feof($fp))) { | |
echo sprintf("XML error: %s at line %d", | |
xml_error_string(xml_get_error_code($this->parser)), | |
xml_get_current_line_number($this->parser)) | |
; | |
return false; | |
} | |
} | |
$this->time_report('Waiting for xml data'); | |
$this->mysqlInserting(); | |
xml_parser_free($this->parser); | |
fclose($fp); | |
echo "Inserted into mysql $this->insertedRowCount rows\n"; | |
echo "Fix ".$this->countExcursion." excursions\n"; | |
return true; | |
} | |
function xmlStartElement($parser, $name, $attrs){ | |
if($name=='mwToursInfo'){ | |
$this->waitingForFirstTag=false; | |
} | |
if(!$this->waitingForFirstTag) | |
$this->currentTagName=$name; | |
$this->tagInsideData=false; | |
} | |
function xmlEndElement($parser, $name){ | |
# fix multi airline / Begin | |
if(($pos=mb_strpos($this->recordOnce['airline'],','))!==false){ | |
$this->recordOnce['airline']=mb_substr($this->recordOnce['airline'], 0, $pos); | |
} | |
# fix multi airline / End | |
if($name=='mwToursInfo'){ | |
# fix excursion / Begin | |
$this->countExcursion++; | |
if(isset($this->recordOnce['countrynames']) | |
&& mb_strpos($this->recordOnce['countrynames'],',')!==false | |
){ | |
$cn_name = explode(', ',$this->recordOnce['countrynames']); | |
#$countrykeys = explode(', ',$this->recordOnce['countrykeys']); # не испльзуется | |
$ct_name = explode(', ',$this->recordOnce['ct_name']); | |
for($i=0; $i<sizeof($ct_name); $i++){ | |
$this->recordOnce['ct_name']=$ct_name[$i]; | |
$this->recordOnce['cn_name']=trim($cn_name[$i]); | |
$this->recordCollection[]=$this->recordOnce; | |
} | |
}else{ | |
$this->recordOnce['cn_name']=trim($this->recordOnce['cn_name']); | |
$this->recordCollection[]=$this->recordOnce; | |
} | |
# fix excursion / End | |
$this->recordOnce=array(); | |
if(self::MAX_INSERT_TUPLE_SIZE<=sizeof($this->recordCollection)) | |
$this->mysqlInserting(); | |
} | |
$this->tagInsideData=false; | |
} | |
function xmlCharacterData($parser, $data){ | |
if($this->currentTagName && $data){ | |
// $data=iconv("UTF-8","KOI8-R",$data); | |
$data=trim($data); | |
$data=mysql::escape($data); | |
if($this->tagInsideData) | |
$this->recordOnce[$this->currentTagName].=' '.$data; | |
else | |
$this->recordOnce[$this->currentTagName]=$data; | |
$this->tagInsideData=true; | |
} | |
} | |
function mysqlInserting(){ | |
$fields=array( | |
/* | |
'ct_name', | |
'pt_ctkeyfrom', | |
'cnctkey', | |
'pt_cnkey', | |
'cn_name', | |
'pt_tourname', | |
'pt_toururl', | |
'pt_rate', | |
'dates', | |
'min_price', | |
'nights', | |
'pt_firsttourdate', | |
'pt_tourcreated', | |
'pt_tourkey', | |
*/ | |
'pt_cnkey', | |
'cn_name', | |
'pt_ctkeyfrom', | |
'departure', | |
'ct_name', | |
'countrynames', | |
'countrykeys', | |
'pt_tourkey', | |
'pt_tourname', | |
'pt_rate', | |
'dates', | |
'nights', | |
'min_price', | |
'pt_firsttourdate', | |
'pt_tourcreated', | |
'pt_tlkey', | |
'hotelkeys', | |
'ctkeys', | |
'pttourtype', | |
'pt_tourtype', | |
'to_attribute', | |
'airline', | |
'gd', | |
'source_id', | |
); | |
if(empty( $this->recordCollection )) | |
return; | |
if($this->isFirst){ | |
$this->isFirst=false; | |
#$sql='TRUNCATE '. masterweb::TBL_TOURSINFO; | |
$sql='DELETE FROM '.masterweb::TBL_TOURSINFO.' WHERE source_id='.$this->source_id; | |
$res=$this->mysql_query($sql); | |
} | |
$sql="REPLACE INTO ".masterweb::TBL_TOURSINFO."(".join(',',$fields).') VALUES '; | |
foreach($this->recordCollection as $key=>$record){ | |
$record['source_id']=$this->source_id; | |
# $sql.=($key?',':'').'("'.join('","', array_values($record)).'")'; | |
$sql.=($key?',':'').'('; | |
/* | |
departure - new table field | |
На этом этапе для старых версий spo1 поле departure может заполняться пустыми строками | |
*/ | |
$record['departure']=isset($record['ct_departure']) ? $record['ct_departure'] : ''; | |
foreach($fields as $key2=>$fld) | |
$sql.=($key2?',':'').'"'.$record[$fld].'"'; | |
$sql.=')'; | |
} | |
$this->mysql_query($sql); | |
$this->insertedRowCount+=sizeof($this->recordCollection); | |
$this->recordCollection=array(); | |
} | |
/* | |
function updateWDates(){ | |
return;#TODO | |
$sql='SELECT dates FROM '.masterweb::TBL_TOURSINFO.' GROUP BY dates'; | |
$dates=mysql::qaf_cols($sql); | |
echo "Updated ".sizeof($dates)." rows by WDates fields\n"; | |
} | |
*/ | |
static function updateIconCountryIso(){ | |
$sql=' | |
DELETE FROM '.masterweb::TBL_ICON.' WHERE ico_type="country_iso" | |
'; | |
mysql::q($sql); | |
$sql=' | |
INSERT INTO '.masterweb::TBL_ICON.'(ico_type, ico_name, filename) | |
SELECT "country_iso", c_name.cn_name, CONCAT(c_iso.iso2,".png") | |
FROM '.masterweb::TBL_COUNTRY.' AS c_iso | |
INNER JOIN ( | |
SELECT cn_name | |
FROM '.masterweb::TBL_TOURSINFOGROUP.' | |
GROUP BY cn_name | |
) AS c_name | |
ON c_name.cn_name=c_iso.name_ru | |
'; | |
mysql::q($sql); | |
} | |
} | |
CPidFile::lock(); | |
$keys=ShowcaseParams::getEnableKeys(); | |
foreach($keys as $key){ | |
new importToursSet($key); | |
} | |
#importToursSet::updateSortOrder(); | |
#EOF |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment