Skip to content

Instantly share code, notes, and snippets.

@cnicodeme
Created October 20, 2016 09:33
Show Gist options
  • Save cnicodeme/6b6beddda408b3eed37c51795a2bdc4f to your computer and use it in GitHub Desktop.
Save cnicodeme/6b6beddda408b3eed37c51795a2bdc4f to your computer and use it in GitHub Desktop.
Wordpress XML export creation in PHP
<?php
date_default_timezone_set('UTC');
define('DATE_FORMAT', '%a, %d %b %Y %H:%M:%S +0000');
$domain = 'blog.cnicodeme.com';
$blogId = 1;
$pdo = new PDO('mysql:dbname=postera;host=127.0.0.1', 'root', '', array(PDO::MYSQL_ATTR_INIT_COMMAND => 'SET NAMES \'UTF8\''));
$pdo->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION);
// The following two SQL requests may change based on your database, but that's all :
$tagsStmt = $pdo->prepare('SELECT t.name AS name FROM tags t LEFT JOIN article_has_tags aht ON aht.tag_id = t.id WHERE aht.article_id = :id');
$stmt = $pdo->query('SELECT id, unique_id, name, title, content, created, published, last_updated FROM articles WHERE blog_id = '.$blogId.' AND removed IS NULL AND is_visible = 1;');
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
// ~~~~~~~~~~~~~~~~~~ Starting the real code :) ~~~~~~~~~~~~~~~~~~~~~~~~
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
class SimpleXMLExtended extends SimpleXMLElement {
private function addCData($cdata_text)
{
$node= dom_import_simplexml($this);
$no = $node->ownerDocument;
$node->appendChild($no->createCDATASection($cdata_text));
}
public function addChildCData($name, $value, $ns=null) {
$c = $this->addChild($name, '', $ns);
$c->addCData($value);
return $c;
}
}
$xml = new SimpleXMLExtended('<rss />');
$xml->addAttribute('xmlns:xmlns:excerpt', 'http://wordpress.org/export/1.2/excerpt/');
$xml->addAttribute('xmlns:xmlns:content', 'http://purl.org/rss/1.0/modules/content/');
$xml->addAttribute('xmlns:xmlns:wfw', 'http://wellformedweb.org/CommentAPI/');
$xml->addAttribute('xmlns:xmlns:dc', 'http://purl.org/dc/elements/1.1/');
$xml->addAttribute('xmlns:xmlns:wp', 'http://wordpress.org/export/1.2/');
$xml->addAttribute('version', '2.0');
$channel = $xml->addChild('channel');
$channel->addChild('title', 'Cyril\'s blog');
$channel->addChild('link', 'http://blog.cnicodeme.com');
$channel->addChild('description', 'Adventures of an entrepreneur');
$channel->addChild('pubDate', strftime(DATE_FORMAT));
$channel->addChild('language', 'en-US');
$channel->addChild('wp:wxr_version', '1.2', 'wp');
$channel->addChild('wp:base_site_url', 'http://cnicodeme.com', 'wp');
$channel->addChild('wp:base_blog_url', 'http://blog.cnicodeme.com', 'wp');
$channel->addChild('generator', 'https://wordpress.org/?v=4.6.1');
$i = 0;
$guid = array();
$attachments = array();
while ($post = $stmt->fetch(PDO::FETCH_ASSOC)) {
if (isset($guid[$post['name']]) {
// Ensuring unicity
exit('The post name "'.$post['name'].'" already exists!');
}
$guid[$post['name']] = true;
$item = $channel->addChild('item');
$published = strftime(DATE_FORMAT, strtotime($post['published']));
$item->addChild('title', $post['title']);
$item->addChild('link', 'http://'.$domain.'/'.$post['name']);
$item->addChild('pubDate', $published);
$item->addChildCData('xmlns:dc:creator', 'Cyril Nicodème');
$permalink = $item->addChild('guid', 'http://'.$domain.'/'.$post['name']);
$permalink->addAttribute('isPermaLink', 'false');
$item->addChild('description', '');
$post['content'] = str_replace('blog1.reflectiv.net', 'blog.reflectiv.net', $post['content']);
$item->addChildCData('xmlns:content:encoded', $post['content']);
$more = $post['content'];
$more = str_replace('<!--more -->', '<!--more-->', $more);
$more = str_replace('<!-- more-->', '<!--more-->', $more);
$more = str_replace('<!-- more -->', '<!--more-->', $more);
$item->addChildCData('xmlns:excerpt:encoded', explode('<!--more-->', $more)[0]);
$item->addChild('xmlns:wp:post_id', $i);
$item->addChildCData('xmlns:wp:post_date', $published);
$item->addChildCData('xmlns:wp:post_date_gmt', $published);
$item->addChildCData('xmlns:wp:comment_status', 'closed');
$item->addChildCData('xmlns:wp:ping_status', 'closed');
$item->addChildCData('xmlns:wp:post_name', $post['name']);
$item->addChildCData('xmlns:wp:status', 'publish');
$item->addChild('xmlns:wp:post_parent', '0');
$item->addChild('xmlns:wp:menu_order', '0');
$item->addChildCData('xmlns:wp:post_type', 'post');
$item->addChildCData('xmlns:wp:post_password', '');
$item->addChild('xmlns:wp:is_sticky', '0');
// TAGS
$tagsStmt->execute(array('id' => $post['id']));
while ($tag = $tagsStmt->fetch(PDO::FETCH_ASSOC)) {
$tag = $item->addChild('category', $tag['name']);
$tag->addAttribute('domain', 'post_tag');
$tag->addAttribute('nicename', $tag['name']);
}
// UPLOADS
preg_match_all('/src=["\'](http:\/\/'.$domain.')?\/([^\'"]*)["\']/i', $post['content'], $matches, PREG_PATTERN_ORDER);
if (count($matches[2]) > 0) {
foreach($matches[2] as $url) {
if (substr($url, 0, 1) === '/') continue;
$attachments[] = array(
'url' => 'http://'.$domain.'/'.$url,
'name' => $url,
'date' => $published
);
}
}
$i++;
}
@mkdir(dirname(__FILE__).DIRECTORY_SEPARATOR.$domain.DIRECTORY_SEPARATOR.'images', 0777, true);
foreach ($attachments as $attachment) {
$item = $channel->addChild('item');
$item->addChild('title', $attachment['name']);
$item->addChild('link', $attachment['url']);
$item->addChild('pubDate', $attachment['date']);
$item->addChildCData('xmlns:dc:creator', 'Cyril Nicodème');
$permalink = $item->addChild('guid', $attachment['url']);
$permalink->addAttribute('isPermaLink', 'false');
$item->addChild('description', '');
$item->addChildCData('xmlns:content:encoded', '');
$item->addChildCData('xmlns:excerpt:encoded', '');
$item->addChild('xmlns:wp:post_id', $i);
$item->addChildCData('xmlns:wp:post_date', $attachment['date']);
$item->addChildCData('xmlns:wp:post_date_gmt', $attachment['date']);
$item->addChildCData('xmlns:wp:comment_status', 'closed');
$item->addChildCData('xmlns:wp:ping_status', 'closed');
$item->addChildCData('xmlns:wp:post_name', $post['name']);
$item->addChildCData('xmlns:wp:status', 'publish');
$item->addChild('xmlns:wp:post_parent', '0');
$item->addChild('xmlns:wp:menu_order', '0');
$item->addChildCData('xmlns:wp:post_type', 'attachment');
$item->addChildCData('xmlns:wp:post_password', '');
$item->addChild('xmlns:wp:is_sticky', '0');
$item->addChild('xmlns:wp:attachment_url', $attachment['url']);
$item->addChild('xmlns:wp:meta_key', '_wp_attached_file');
$item->addChild('xmlns:wp:meta_value', '/'.$attachment['name']);
// Downloading items
$content = @file_get_contents($attachment['url']);
if ($content !== false) {
file_put_contents(dirname(__FILE__).DIRECTORY_SEPARATOR.$domain.DIRECTORY_SEPARATOR.'images'.DIRECTORY_SEPARATOR.str_replace('/', '_', $attachment['name']), $content);
}
}
file_put_contents(dirname(__FILE__).DIRECTORY_SEPARATOR.$domain.DIRECTORY_SEPARATOR.'wordpress.xml', $xml->asXML());
header('Content-type: text/xml');
echo $xml->asXML();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment