Last active
December 19, 2015 12:39
-
-
Save athurg/5956420 to your computer and use it in GitHub Desktop.
用于解析Wordpress导出的WXR格式的工具。可输出为PHP数组或者SQL语句。
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
class WXRDecoder | |
{ | |
protected $_parser; | |
protected $_ns; | |
public $posts=array(); | |
public $tags=array(); | |
public $post_table = 'tbl_posts'; | |
public $tag_table = 'tbl_tags'; | |
public function __construct($filename='wordpress.xml') | |
{ | |
$this->_parser = new SimpleXMLElement($filename, LIBXML_NSCLEAN | LIBXML_NOCDATA, true); | |
$this->_ns = $this->_parser->getNamespaces(true); | |
} | |
public function set_post_table($table) | |
{ | |
$this->post_table = $table; | |
} | |
public function set_tag_table($table) | |
{ | |
$this->tag_table = $table; | |
} | |
public function parse_posts() | |
{ | |
$items = $this->_parser->channel->item; | |
foreach ($items as $item) { | |
//非发布状态的文章直接跳过 | |
$status = $item->children($this->_ns['wp'])->status; | |
if ($status!='publish') { | |
continue; | |
} | |
$title = $item->title; | |
$post_id = $item->children($this->_ns['wp'])->post_id; | |
$post_date = $item->children($this->_ns['wp'])->post_date; | |
$type = $item->children($this->_ns['wp'])->post_type; | |
$content = $item->children($this->_ns['content'])->encoded; | |
//解析标签和分类(都统一合并到标签中) | |
$tags = array(); | |
foreach ($item->category as $meta) { | |
$t = ($meta=='Uncategorized') ? '未分类': $meta; | |
$tags[] = (string)$t; | |
} | |
$tags = implode(',',array_unique($tags)); | |
$post = array('id'=>$post_id,'title'=>$title,'date'=>$post_date, | |
'tags'=>$tags,'content'=>$content); | |
$this->posts[(string)$post_id] = (object)$post; | |
} | |
sort($this->posts); | |
return $this->posts; | |
} | |
public function parse_tags() | |
{ | |
$items = $this->_parser->channel->children($this->_ns['wp'])->tag; | |
$tags = array(); | |
foreach ($items as $item) { | |
//非发布状态的文章直接跳过 | |
$tag = $item->children($this->_ns['wp'])->tag_name; | |
$tags[] = (string)$tag; | |
} | |
$this->tags = array_unique($tags); | |
sort($this->tags); | |
return $this->tags; | |
} | |
public function parse_posts_sql() | |
{ | |
$this->parse_posts(); | |
$sql = ''; | |
foreach ($this->posts as $p) { | |
$sql .= "INSERT INTO `{$this->post_table}` (`id`,`title`,`date`,`tags`,`content`)"; | |
$sql .= " VALUES ('{$p->id}','{$p->title}','{$p->date}','{$p->tags}','{$p->content}');\n"; | |
} | |
return $sql; | |
} | |
public function parse_tags_sql() | |
{ | |
$this->parse_tags(); | |
$sql = ''; | |
foreach ($this->tags as $p) { | |
$sql .= "INSERT INTO `{$this->tag_table}` (`name`) VALUES ('{$p}');\n"; | |
} | |
return $sql; | |
} | |
} | |
//$decoder = new WXRDecoder('gooth.wordpress.2013-07-10.xml'); | |
//echo $decoder->parse_posts_sql(); | |
//echo $decoder->parse_tags_sql(); | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment