Last active
December 13, 2015 16:39
-
-
Save chrismeller/4941933 to your computer and use it in GitHub Desktop.
Crappy WXR Parser using PHP DOM
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
date_default_timezone_set('UTC'); | |
error_reporting(-1); | |
ini_set('display_errors', true); | |
$file = '/Users/chris/Downloads/wordpress-wxr-example.xml'; | |
$contents = file_get_contents( $file ); | |
$dom = new DOMDocument( '1.0', 'utf-8' ); | |
$dom->loadXML( $contents, LIBXML_NOCDATA ); | |
$xpath = new DOMXPath( $dom ); | |
$channel = $xpath->query( './channel' )->item(0); | |
$title = $xpath->query( './title', $channel )->item(0)->nodeValue; | |
$link = $xpath->query( './link', $channel )->item(0)->nodeValue; | |
$description = $xpath->query( './description', $channel )->item(0)->nodeValue; | |
$pubdate = $xpath->query( './pubDate', $channel )->item(0)->nodeValue; | |
$generator = $xpath->query( './generator', $channel )->item(0)->nodeValue; | |
$language = $xpath->query( './language', $channel )->item(0)->nodeValue; | |
$base_site_url = $xpath->query( './wp:base_site_url', $channel )->item(0)->nodeValue; | |
$base_blog_url = $xpath->query( './wp:base_blog_url', $channel )->item(0)->nodeValue; | |
$wxr_version = $xpath->query( './wp:wxr_version', $channel )->item(0)->nodeValue; | |
$categories = $xpath->query( './wp:category', $channel ); | |
$cats = array(); | |
foreach ( $categories as $category ) { | |
$nicename = $xpath->query( './wp:category_nicename', $category )->item(0)->nodeValue; | |
$parent = $xpath->query( './wp:category_parent', $category )->item(0)->nodeValue; | |
$name = $xpath->query( './wp:cat_name', $category )->item(0)->nodeValue; | |
$cats[] = array( | |
'nicename' => $nicename, | |
'parent' => $parent, | |
'name' => $name, | |
); | |
} | |
$tags = $xpath->query( './wp:tag', $channel ); | |
$ts = array(); | |
foreach ( $tags as $tag ) { | |
$slug = $xpath->query( './wp:tag_slug', $tag )->item(0)->nodeValue; | |
$name = $xpath->query( './wp:tag_name', $tag )->item(0)->nodeValue; | |
$ts[] = array( | |
'slug' => $slug, | |
'name' => $name, | |
); | |
} | |
$items = $xpath->query( './item', $channel ); | |
$is = array(); | |
foreach ( $items as $item ) { | |
$i = array( | |
'title' => $xpath->query( './title', $item )->item(0)->nodeValue, | |
'link' => $xpath->query( './link', $item )->item(0)->nodeValue, | |
'pubdate' => $xpath->query( './pubDate', $item )->item(0)->nodeValue, | |
'creator' => $xpath->query( './dc:creator', $item )->item(0)->nodeValue, | |
'description' => $xpath->query( './description', $item )->item(0)->nodeValue, | |
'content_encoded' => $xpath->query( './content:encoded', $item )->item(0)->nodeValue, | |
'post_id' => $xpath->query( './wp:post_id', $item )->item(0)->nodeValue, | |
'post_date' => $xpath->query( './wp:post_date', $item )->item(0)->nodeValue, | |
'post_date_gmt' => $xpath->query( './wp:post_date_gmt', $item )->item(0)->nodeValue, | |
'comment_status' => $xpath->query( './wp:comment_status', $item )->item(0)->nodeValue, | |
'ping_status' => $xpath->query( './wp:ping_status', $item )->item(0)->nodeValue, | |
'post_name' => $xpath->query( './wp:post_name', $item )->item(0)->nodeValue, | |
'status' => $xpath->query( './wp:status', $item )->item(0)->nodeValue, | |
'post_parent' => $xpath->query( './wp:post_parent', $item )->item(0)->nodeValue, | |
'menu_order' => $xpath->query( './wp:menu_order', $item )->item(0)->nodeValue, | |
'post_type' => $xpath->query( './wp:post_type', $item )->item(0)->nodeValue, | |
'post_password' => $xpath->query( './wp:post_password', $item )->item(0)->nodeValue, | |
'excerpt_encoded' => null, | |
'is_sticky' => null, | |
); | |
$excerpt = $xpath->query( './excerpt:encoded', $item ); | |
if ( $excerpt->length > 0 ) { | |
$i['excerpt_encoded'] = $excerpt->item(0)->nodeValue; | |
} | |
$is_sticky = $xpath->query( './wp:is_sticky', $item ); | |
if ( $is_sticky->length > 0 ) { | |
$i['is_sticky'] = $is_sticky->item(0)->nodeValue; | |
} | |
$guid = $xpath->query( './guid', $item )->item(0); | |
$i['guid_is_permalink'] = $guid->getAttribute( 'isPermaLink' ); | |
$i['guid'] = $guid->nodeValue; | |
$categories = $xpath->query( './category', $item ); | |
$i['categories'] = array(); | |
foreach ( $categories as $category ) { | |
$cat = array( | |
'name' => $category->nodeValue, | |
'domain' => $category->getAttribute( 'domain' ), | |
'nicename' => $category->getAttribute( 'nicename' ), | |
); | |
$i['categories'][] = $cat; | |
} | |
$comments = $xpath->query( './wp:comment', $item ); | |
$i['comments'] = array(); | |
foreach ( $comments as $comment ) { | |
$c = array( | |
'id' => $xpath->query( './wp:comment_id', $comment )->item(0)->nodeValue, | |
'author' => $xpath->query( './wp:comment_author', $comment )->item(0)->nodeValue, | |
'author_email' => $xpath->query( './wp:comment_author_email', $comment )->item(0)->nodeValue, | |
'author_url' => $xpath->query( './wp:comment_author_url', $comment )->item(0)->nodeValue, | |
'author_ip' => $xpath->query( './wp:comment_author_IP', $comment )->item(0)->nodeValue, | |
'date' => $xpath->query( './wp:comment_date', $comment )->item(0)->nodeValue, | |
'date_gmt' => $xpath->query( './wp:comment_date_gmt', $comment )->item(0)->nodeValue, | |
'content' => $xpath->query( './wp:comment_content', $comment )->item(0)->nodeValue, | |
'approved' => $xpath->query( './wp:comment_approved', $comment )->item(0)->nodeValue, | |
'type' => $xpath->query( './wp:comment_type', $comment )->item(0)->nodeValue, | |
'parent' => $xpath->query( './wp:comment_parent', $comment )->item(0)->nodeValue, | |
'user_id' => $xpath->query( './wp:comment_user_id', $comment )->item(0)->nodeValue, | |
); | |
$i['comments'][] = $c; | |
} | |
$meta = $xpath->query( './wp:postmeta', $item ); | |
$i['meta'] = array(); | |
foreach ( $meta as $metar ) { | |
$key = $xpath->query( './wp:meta_key', $metar )->item(0)->nodeValue; | |
$value = $xpath->query( './wp:meta_value', $metar )->item(0)->nodeValue; | |
$i['meta'][ $key ] = $value; | |
} | |
$is[] = $i; | |
} | |
var_dump($is); | |
echo $title . ': ' . $link; | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment