Created
October 20, 2016 09:33
-
-
Save cnicodeme/6b6beddda408b3eed37c51795a2bdc4f to your computer and use it in GitHub Desktop.
Wordpress XML export creation in PHP
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
date_default_timezone_set('UTC'); | |
define('DATE_FORMAT', '%a, %d %b %Y %H:%M:%S +0000'); | |
$domain = 'blog.cnicodeme.com'; | |
$blogId = 1; | |
$pdo = new PDO('mysql:dbname=postera;host=127.0.0.1', 'root', '', array(PDO::MYSQL_ATTR_INIT_COMMAND => 'SET NAMES \'UTF8\'')); | |
$pdo->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION); | |
// The following two SQL requests may change based on your database, but that's all : | |
$tagsStmt = $pdo->prepare('SELECT t.name AS name FROM tags t LEFT JOIN article_has_tags aht ON aht.tag_id = t.id WHERE aht.article_id = :id'); | |
$stmt = $pdo->query('SELECT id, unique_id, name, title, content, created, published, last_updated FROM articles WHERE blog_id = '.$blogId.' AND removed IS NULL AND is_visible = 1;'); | |
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
// ~~~~~~~~~~~~~~~~~~ Starting the real code :) ~~~~~~~~~~~~~~~~~~~~~~~~ | |
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
class SimpleXMLExtended extends SimpleXMLElement { | |
private function addCData($cdata_text) | |
{ | |
$node= dom_import_simplexml($this); | |
$no = $node->ownerDocument; | |
$node->appendChild($no->createCDATASection($cdata_text)); | |
} | |
public function addChildCData($name, $value, $ns=null) { | |
$c = $this->addChild($name, '', $ns); | |
$c->addCData($value); | |
return $c; | |
} | |
} | |
$xml = new SimpleXMLExtended('<rss />'); | |
$xml->addAttribute('xmlns:xmlns:excerpt', 'http://wordpress.org/export/1.2/excerpt/'); | |
$xml->addAttribute('xmlns:xmlns:content', 'http://purl.org/rss/1.0/modules/content/'); | |
$xml->addAttribute('xmlns:xmlns:wfw', 'http://wellformedweb.org/CommentAPI/'); | |
$xml->addAttribute('xmlns:xmlns:dc', 'http://purl.org/dc/elements/1.1/'); | |
$xml->addAttribute('xmlns:xmlns:wp', 'http://wordpress.org/export/1.2/'); | |
$xml->addAttribute('version', '2.0'); | |
$channel = $xml->addChild('channel'); | |
$channel->addChild('title', 'Cyril\'s blog'); | |
$channel->addChild('link', 'http://blog.cnicodeme.com'); | |
$channel->addChild('description', 'Adventures of an entrepreneur'); | |
$channel->addChild('pubDate', strftime(DATE_FORMAT)); | |
$channel->addChild('language', 'en-US'); | |
$channel->addChild('wp:wxr_version', '1.2', 'wp'); | |
$channel->addChild('wp:base_site_url', 'http://cnicodeme.com', 'wp'); | |
$channel->addChild('wp:base_blog_url', 'http://blog.cnicodeme.com', 'wp'); | |
$channel->addChild('generator', 'https://wordpress.org/?v=4.6.1'); | |
$i = 0; | |
$guid = array(); | |
$attachments = array(); | |
while ($post = $stmt->fetch(PDO::FETCH_ASSOC)) { | |
if (isset($guid[$post['name']]) { | |
// Ensuring unicity | |
exit('The post name "'.$post['name'].'" already exists!'); | |
} | |
$guid[$post['name']] = true; | |
$item = $channel->addChild('item'); | |
$published = strftime(DATE_FORMAT, strtotime($post['published'])); | |
$item->addChild('title', $post['title']); | |
$item->addChild('link', 'http://'.$domain.'/'.$post['name']); | |
$item->addChild('pubDate', $published); | |
$item->addChildCData('xmlns:dc:creator', 'Cyril Nicodème'); | |
$permalink = $item->addChild('guid', 'http://'.$domain.'/'.$post['name']); | |
$permalink->addAttribute('isPermaLink', 'false'); | |
$item->addChild('description', ''); | |
$post['content'] = str_replace('blog1.reflectiv.net', 'blog.reflectiv.net', $post['content']); | |
$item->addChildCData('xmlns:content:encoded', $post['content']); | |
$more = $post['content']; | |
$more = str_replace('<!--more -->', '<!--more-->', $more); | |
$more = str_replace('<!-- more-->', '<!--more-->', $more); | |
$more = str_replace('<!-- more -->', '<!--more-->', $more); | |
$item->addChildCData('xmlns:excerpt:encoded', explode('<!--more-->', $more)[0]); | |
$item->addChild('xmlns:wp:post_id', $i); | |
$item->addChildCData('xmlns:wp:post_date', $published); | |
$item->addChildCData('xmlns:wp:post_date_gmt', $published); | |
$item->addChildCData('xmlns:wp:comment_status', 'closed'); | |
$item->addChildCData('xmlns:wp:ping_status', 'closed'); | |
$item->addChildCData('xmlns:wp:post_name', $post['name']); | |
$item->addChildCData('xmlns:wp:status', 'publish'); | |
$item->addChild('xmlns:wp:post_parent', '0'); | |
$item->addChild('xmlns:wp:menu_order', '0'); | |
$item->addChildCData('xmlns:wp:post_type', 'post'); | |
$item->addChildCData('xmlns:wp:post_password', ''); | |
$item->addChild('xmlns:wp:is_sticky', '0'); | |
// TAGS | |
$tagsStmt->execute(array('id' => $post['id'])); | |
while ($tag = $tagsStmt->fetch(PDO::FETCH_ASSOC)) { | |
$tag = $item->addChild('category', $tag['name']); | |
$tag->addAttribute('domain', 'post_tag'); | |
$tag->addAttribute('nicename', $tag['name']); | |
} | |
// UPLOADS | |
preg_match_all('/src=["\'](http:\/\/'.$domain.')?\/([^\'"]*)["\']/i', $post['content'], $matches, PREG_PATTERN_ORDER); | |
if (count($matches[2]) > 0) { | |
foreach($matches[2] as $url) { | |
if (substr($url, 0, 1) === '/') continue; | |
$attachments[] = array( | |
'url' => 'http://'.$domain.'/'.$url, | |
'name' => $url, | |
'date' => $published | |
); | |
} | |
} | |
$i++; | |
} | |
@mkdir(dirname(__FILE__).DIRECTORY_SEPARATOR.$domain.DIRECTORY_SEPARATOR.'images', 0777, true); | |
foreach ($attachments as $attachment) { | |
$item = $channel->addChild('item'); | |
$item->addChild('title', $attachment['name']); | |
$item->addChild('link', $attachment['url']); | |
$item->addChild('pubDate', $attachment['date']); | |
$item->addChildCData('xmlns:dc:creator', 'Cyril Nicodème'); | |
$permalink = $item->addChild('guid', $attachment['url']); | |
$permalink->addAttribute('isPermaLink', 'false'); | |
$item->addChild('description', ''); | |
$item->addChildCData('xmlns:content:encoded', ''); | |
$item->addChildCData('xmlns:excerpt:encoded', ''); | |
$item->addChild('xmlns:wp:post_id', $i); | |
$item->addChildCData('xmlns:wp:post_date', $attachment['date']); | |
$item->addChildCData('xmlns:wp:post_date_gmt', $attachment['date']); | |
$item->addChildCData('xmlns:wp:comment_status', 'closed'); | |
$item->addChildCData('xmlns:wp:ping_status', 'closed'); | |
$item->addChildCData('xmlns:wp:post_name', $post['name']); | |
$item->addChildCData('xmlns:wp:status', 'publish'); | |
$item->addChild('xmlns:wp:post_parent', '0'); | |
$item->addChild('xmlns:wp:menu_order', '0'); | |
$item->addChildCData('xmlns:wp:post_type', 'attachment'); | |
$item->addChildCData('xmlns:wp:post_password', ''); | |
$item->addChild('xmlns:wp:is_sticky', '0'); | |
$item->addChild('xmlns:wp:attachment_url', $attachment['url']); | |
$item->addChild('xmlns:wp:meta_key', '_wp_attached_file'); | |
$item->addChild('xmlns:wp:meta_value', '/'.$attachment['name']); | |
// Downloading items | |
$content = @file_get_contents($attachment['url']); | |
if ($content !== false) { | |
file_put_contents(dirname(__FILE__).DIRECTORY_SEPARATOR.$domain.DIRECTORY_SEPARATOR.'images'.DIRECTORY_SEPARATOR.str_replace('/', '_', $attachment['name']), $content); | |
} | |
} | |
file_put_contents(dirname(__FILE__).DIRECTORY_SEPARATOR.$domain.DIRECTORY_SEPARATOR.'wordpress.xml', $xml->asXML()); | |
header('Content-type: text/xml'); | |
echo $xml->asXML(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment