Created
January 22, 2013 15:40
-
-
Save moonthug/4595624 to your computer and use it in GitHub Desktop.
Extract users and post from a Wordpress data export XML file and insert them into PyroCMS
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
//////////////////////////////////////////////////////////////////////////////// | |
// | |
// Wordpress -> PyroCMS | |
// | |
// | |
// Config | |
$filename = "<WP EXPORT FILE>"; | |
$db_schema = "<SCHEMA>"; | |
$site_prefix = "<SITE PREFIX>"; // i.e. "default" | |
$db_users_default_pass = "<ENCODED_PASS>"; // i.e. "bd0fc5f02742c4e2a94f7fc6baa0d9844b69544a" = m00m00 | |
$db_users_default_salt = "<SALT>"; // i.e. "ef7ab2" = ^ salt | |
//////////////////////////////////////////////////////////////////////////////// | |
// | |
// Setup | |
// | |
// | |
// DB | |
include_once "ezsql_core.php"; | |
include_once "ezsql.php"; | |
$db = new ezSQL_mysql("root", "root", $db_schema, "localhost"); | |
// | |
// File IO | |
$fh = fopen($filename, "r"); | |
$data = fread($fh, filesize($filename)); | |
fclose($fh); | |
// | |
// XML | |
define("NS_WP", "http://wordpress.org/export/1.2/"); | |
define("NS_DC", "http://purl.org/dc/elements/1.1/"); | |
define("NS_CONTENT", "http://purl.org/rss/1.0/modules/content/"); | |
$doc = new DOMDocument(); | |
$doc->loadXML($data); | |
// | |
// Variables | |
$author_map = array(); | |
$tag_map = array(); | |
$category_map = array(); | |
$titles = array(); | |
//////////////////////////////////////////////////////////////////////////////// | |
// | |
// Authors | |
// | |
$author_list = $doc->getElementsByTagName("author"); | |
foreach($author_list as $author) | |
{ | |
$author_login = $author->getElementsByTagNameNS(NS_WP, "author_login")->item(0)->nodeValue; | |
$author_email = $author->getElementsByTagNameNS(NS_WP, "author_email")->item(0)->nodeValue; | |
$author_username = $author->getElementsByTagNameNS(NS_WP, "author_display_name")->item(0)->nodeValue; | |
$author_first_name = $author->getElementsByTagNameNS(NS_WP, "author_first_name")->item(0)->nodeValue; | |
$author_last_name = $author->getElementsByTagNameNS(NS_WP, "author_last_name")->item(0)->nodeValue; | |
$created_on = time(); | |
// | |
// Create User | |
$db->query("INSERT INTO `{$db_schema}`.`{$site_prefix}_users` (email, password, salt, group_id, ip_address, active, activation_code, created_on, last_login, username, forgotten_password_code, remember_code) " . | |
"VALUES('{$author_email}', '{$db_users_default_pass}', '{$db_users_default_salt}', 2, '127.0.0.1', 1, NULL, {$created_on}, 0, '{$author_login}', NULL, NULL);"); | |
$user_id = mysql_insert_id(); | |
// | |
// Create Profile | |
$created_date = date("Y-m-d H:i:s"); | |
$db->query("INSERT INTO `{$db_schema}`.`{$site_prefix}_profiles` (`created`,`updated`,`created_by`,`ordering_count`,`user_id`,`display_name`,`first_name`,`last_name`,`company`,`lang`,`bio`,`dob`) " . | |
"VALUES('{$created_date}', NULL, '1', '1', '{$user_id}', '{$author_username}', '{$author_first_name}', '{$author_last_name}', NULL, 'en', NULL, '0');"); | |
$author_map[$author_login] = $user_id; | |
} | |
//////////////////////////////////////////////////////////////////////////////// | |
// | |
// Posts | |
// | |
$item_list = $doc->getElementsByTagName("item"); | |
foreach($item_list as $item) | |
{ | |
// | |
// Handle Post | |
$post_title = mysql_real_escape_string(htmlentities($item->getElementsByTagName("title")->item(0)->nodeValue)); | |
$post_title = !empty($post_title) ? $post_title : "Untitled"; | |
$post_content = mysql_real_escape_string(htmlentities($item->getElementsByTagNameNS(NS_CONTENT, "encoded")->item(0)->nodeValue)); | |
$post_date = strtotime($item->getElementsByTagNameNS(NS_WP, "post_date")->item(0)->nodeValue); | |
// | |
$post_creator = $item->getElementsByTagNameNS(NS_DC, "creator")->item(0)->nodeValue; | |
$post_creator = isset($author_map[$post_creator]) ? $author_map[$post_creator] : 1; | |
// | |
$post_status = $item->getElementsByTagNameNS(NS_WP, "status")->item(0)->nodeValue; | |
$post_slug = uniqid("post_"); | |
if($post_status === "publish") | |
{ | |
$post_slug = $item->getElementsByTagNameNS(NS_WP, "post_name")->item(0)->nodeValue; | |
} | |
// Ignore Trash ? | |
else if($post_status === "trash") | |
continue; | |
// | |
// Handle Tags/Categories | |
$category_list = $item->getElementsByTagName("category"); | |
$post_categories = array(); | |
$post_tags = array(); | |
if($category_list->length > 0) | |
{ | |
foreach($category_list as $category) | |
{ | |
$tag_domain = $category->getAttribute("domain"); | |
$tag_slug = $category->getAttribute("nicename"); | |
$tag_value = $category->nodeValue; | |
if($tag_domain === "post_tag") | |
{ | |
if(!isset($tag_map[$tag_slug])) | |
{ | |
$db->query("INSERT INTO `$db_schema`.`{$site_prefix}_keywords` (`name`) VALUES ('{$tag_value}');"); | |
$tag_id = mysql_insert_id(); | |
$tag_map[$tag_slug] = $tag_id; | |
} | |
else | |
$tag_id = $tag_map[$tag_slug]; | |
$post_tags[] = array( | |
"id" => $tag_id, | |
"slug" => $tag_slug | |
); | |
} | |
else | |
{ | |
if(!isset($category_map[$tag_slug])) | |
{ | |
$db->query("INSERT INTO `$db_schema`.`{$site_prefix}_blog_categories` (`slug`, `title`) VALUES ('{$tag_slug}', '{$tag_value}');"); | |
$category_id = mysql_insert_id(); | |
$category_map[$tag_slug] = $category_id; | |
} | |
else | |
$category_id = $category_map[$tag_slug]; | |
$post_categories[] = $category_id; | |
} | |
} | |
} | |
// | |
// Handle applied keywords (Tags) | |
$post_tags_hash = ""; | |
if(count($post_tags) > 0) | |
{ | |
$post_tags_hash = uniqid("tags_"); | |
$query = "INSERT INTO `$db_schema`.`{$site_prefix}_keywords_applied` (`hash`, `keyword_id`) VALUES "; | |
foreach($post_tags as $i=>$tag) | |
{ | |
if($i > 0) $query .= ","; | |
$query .= "('{$post_tags_hash}', {$tag["id"]})"; | |
} | |
$query .= ";"; | |
$db->query($query); | |
} | |
// | |
// Check unique titles | |
$found_title = FALSE; | |
for($i = 0; $i < count($titles); $i++) | |
{ | |
$clean_title = trim(strtolower($post_title)); | |
if($clean_title === $titles[$i]["value"]) | |
{ | |
$post_title .= "_" . $titles[$i]["count"]; | |
$titles[$i]["count"]++; | |
break; | |
} | |
} | |
if($found_title === FALSE) | |
$titles[] = array("name" => $post_title, "value" => trim(strtolower($post_title)), "count" => 1); | |
// | |
// Defaults | |
$post_category = isset($post_categories[0]) ? $post_categories[0] : 1; | |
$post_status = $post_status === "publish" ? "live" : "draft"; | |
// | |
// Insert | |
$db->query("INSERT INTO `{$db_schema}`.`{$site_prefix}_blog` (`title`,`slug`,`category_id`,`attachment`,`intro`,`body`,`parsed`,`keywords`,`author_id`,`created_on`,`updated_on`,`comments_enabled`,`status`,`type`,`preview_hash`) ". | |
"VALUES('{$post_title}', '{$post_slug}', $post_category, '', '{$post_content}','{$post_content}', '', '{$post_tags_hash}', $post_creator, $post_date, $post_date, 1, '{$post_status}', 'wysiwyg-advanced', '');"); | |
} | |
echo "Done!"; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment