Last active
December 13, 2015 16:39
-
-
Save chrismeller/4941933 to your computer and use it in GitHub Desktop.
Crappy WXR Parser using PHP DOM
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| <?php | |
| date_default_timezone_set('UTC'); | |
| error_reporting(-1); | |
| ini_set('display_errors', true); | |
| $file = '/Users/chris/Downloads/wordpress-wxr-example.xml'; | |
| $contents = file_get_contents( $file ); | |
| $dom = new DOMDocument( '1.0', 'utf-8' ); | |
| $dom->loadXML( $contents, LIBXML_NOCDATA ); | |
| $xpath = new DOMXPath( $dom ); | |
| $channel = $xpath->query( './channel' )->item(0); | |
| $title = $xpath->query( './title', $channel )->item(0)->nodeValue; | |
| $link = $xpath->query( './link', $channel )->item(0)->nodeValue; | |
| $description = $xpath->query( './description', $channel )->item(0)->nodeValue; | |
| $pubdate = $xpath->query( './pubDate', $channel )->item(0)->nodeValue; | |
| $generator = $xpath->query( './generator', $channel )->item(0)->nodeValue; | |
| $language = $xpath->query( './language', $channel )->item(0)->nodeValue; | |
| $base_site_url = $xpath->query( './wp:base_site_url', $channel )->item(0)->nodeValue; | |
| $base_blog_url = $xpath->query( './wp:base_blog_url', $channel )->item(0)->nodeValue; | |
| $wxr_version = $xpath->query( './wp:wxr_version', $channel )->item(0)->nodeValue; | |
| $categories = $xpath->query( './wp:category', $channel ); | |
| $cats = array(); | |
| foreach ( $categories as $category ) { | |
| $nicename = $xpath->query( './wp:category_nicename', $category )->item(0)->nodeValue; | |
| $parent = $xpath->query( './wp:category_parent', $category )->item(0)->nodeValue; | |
| $name = $xpath->query( './wp:cat_name', $category )->item(0)->nodeValue; | |
| $cats[] = array( | |
| 'nicename' => $nicename, | |
| 'parent' => $parent, | |
| 'name' => $name, | |
| ); | |
| } | |
| $tags = $xpath->query( './wp:tag', $channel ); | |
| $ts = array(); | |
| foreach ( $tags as $tag ) { | |
| $slug = $xpath->query( './wp:tag_slug', $tag )->item(0)->nodeValue; | |
| $name = $xpath->query( './wp:tag_name', $tag )->item(0)->nodeValue; | |
| $ts[] = array( | |
| 'slug' => $slug, | |
| 'name' => $name, | |
| ); | |
| } | |
| $items = $xpath->query( './item', $channel ); | |
| $is = array(); | |
| foreach ( $items as $item ) { | |
| $i = array( | |
| 'title' => $xpath->query( './title', $item )->item(0)->nodeValue, | |
| 'link' => $xpath->query( './link', $item )->item(0)->nodeValue, | |
| 'pubdate' => $xpath->query( './pubDate', $item )->item(0)->nodeValue, | |
| 'creator' => $xpath->query( './dc:creator', $item )->item(0)->nodeValue, | |
| 'description' => $xpath->query( './description', $item )->item(0)->nodeValue, | |
| 'content_encoded' => $xpath->query( './content:encoded', $item )->item(0)->nodeValue, | |
| 'post_id' => $xpath->query( './wp:post_id', $item )->item(0)->nodeValue, | |
| 'post_date' => $xpath->query( './wp:post_date', $item )->item(0)->nodeValue, | |
| 'post_date_gmt' => $xpath->query( './wp:post_date_gmt', $item )->item(0)->nodeValue, | |
| 'comment_status' => $xpath->query( './wp:comment_status', $item )->item(0)->nodeValue, | |
| 'ping_status' => $xpath->query( './wp:ping_status', $item )->item(0)->nodeValue, | |
| 'post_name' => $xpath->query( './wp:post_name', $item )->item(0)->nodeValue, | |
| 'status' => $xpath->query( './wp:status', $item )->item(0)->nodeValue, | |
| 'post_parent' => $xpath->query( './wp:post_parent', $item )->item(0)->nodeValue, | |
| 'menu_order' => $xpath->query( './wp:menu_order', $item )->item(0)->nodeValue, | |
| 'post_type' => $xpath->query( './wp:post_type', $item )->item(0)->nodeValue, | |
| 'post_password' => $xpath->query( './wp:post_password', $item )->item(0)->nodeValue, | |
| 'excerpt_encoded' => null, | |
| 'is_sticky' => null, | |
| ); | |
| $excerpt = $xpath->query( './excerpt:encoded', $item ); | |
| if ( $excerpt->length > 0 ) { | |
| $i['excerpt_encoded'] = $excerpt->item(0)->nodeValue; | |
| } | |
| $is_sticky = $xpath->query( './wp:is_sticky', $item ); | |
| if ( $is_sticky->length > 0 ) { | |
| $i['is_sticky'] = $is_sticky->item(0)->nodeValue; | |
| } | |
| $guid = $xpath->query( './guid', $item )->item(0); | |
| $i['guid_is_permalink'] = $guid->getAttribute( 'isPermaLink' ); | |
| $i['guid'] = $guid->nodeValue; | |
| $categories = $xpath->query( './category', $item ); | |
| $i['categories'] = array(); | |
| foreach ( $categories as $category ) { | |
| $cat = array( | |
| 'name' => $category->nodeValue, | |
| 'domain' => $category->getAttribute( 'domain' ), | |
| 'nicename' => $category->getAttribute( 'nicename' ), | |
| ); | |
| $i['categories'][] = $cat; | |
| } | |
| $comments = $xpath->query( './wp:comment', $item ); | |
| $i['comments'] = array(); | |
| foreach ( $comments as $comment ) { | |
| $c = array( | |
| 'id' => $xpath->query( './wp:comment_id', $comment )->item(0)->nodeValue, | |
| 'author' => $xpath->query( './wp:comment_author', $comment )->item(0)->nodeValue, | |
| 'author_email' => $xpath->query( './wp:comment_author_email', $comment )->item(0)->nodeValue, | |
| 'author_url' => $xpath->query( './wp:comment_author_url', $comment )->item(0)->nodeValue, | |
| 'author_ip' => $xpath->query( './wp:comment_author_IP', $comment )->item(0)->nodeValue, | |
| 'date' => $xpath->query( './wp:comment_date', $comment )->item(0)->nodeValue, | |
| 'date_gmt' => $xpath->query( './wp:comment_date_gmt', $comment )->item(0)->nodeValue, | |
| 'content' => $xpath->query( './wp:comment_content', $comment )->item(0)->nodeValue, | |
| 'approved' => $xpath->query( './wp:comment_approved', $comment )->item(0)->nodeValue, | |
| 'type' => $xpath->query( './wp:comment_type', $comment )->item(0)->nodeValue, | |
| 'parent' => $xpath->query( './wp:comment_parent', $comment )->item(0)->nodeValue, | |
| 'user_id' => $xpath->query( './wp:comment_user_id', $comment )->item(0)->nodeValue, | |
| ); | |
| $i['comments'][] = $c; | |
| } | |
| $meta = $xpath->query( './wp:postmeta', $item ); | |
| $i['meta'] = array(); | |
| foreach ( $meta as $metar ) { | |
| $key = $xpath->query( './wp:meta_key', $metar )->item(0)->nodeValue; | |
| $value = $xpath->query( './wp:meta_value', $metar )->item(0)->nodeValue; | |
| $i['meta'][ $key ] = $value; | |
| } | |
| $is[] = $i; | |
| } | |
| var_dump($is); | |
| echo $title . ': ' . $link; | |
| ?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment