Skip to content

Instantly share code, notes, and snippets.

@ethangardner
Last active September 16, 2016 14:04
Show Gist options
  • Save ethangardner/2fe4c5b4b75ac008a2fab21da35def4b to your computer and use it in GitHub Desktop.
Save ethangardner/2fe4c5b4b75ac008a2fab21da35def4b to your computer and use it in GitHub Desktop.
Makes a tab-delimited file from a WordPress post export
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="2.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:excerpt="http://wordpress.org/export/1.2/excerpt/"
xmlns:content="http://purl.org/rss/1.0/modules/content/"
xmlns:wfw="http://wellformedweb.org/CommentAPI/"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:wp="http://wordpress.org/export/1.2/">
<xsl:output encoding="UTF-8" indent="yes" cdata-section-elements="content:encoded wp:meta_value category" method="text" standalone="yes" />
<xsl:variable name="language">en</xsl:variable>
<xsl:variable name="domain">language</xsl:variable>
<xsl:template match="/">
<xsl:text>post_id&#x9;</xsl:text>
<xsl:text>pubDate&#x9;</xsl:text>
<xsl:text>status&#x9;</xsl:text>
<xsl:text>title&#x9;</xsl:text>
<xsl:text>link&#x9;</xsl:text>
<xsl:text>categories</xsl:text>
<xsl:text>&#xd;</xsl:text>
<xsl:apply-templates select="rss/channel/item" />
</xsl:template>
<xsl:template match="item" priority="3">
<xsl:choose>
<xsl:when test="wp:post_type[contains(., 'post')]">
<xsl:if test="category[@domain = $domain] and category[@nicename = $language]">
<xsl:call-template name="item" />
</xsl:if>
</xsl:when>
</xsl:choose>
</xsl:template>
<xsl:template name="item">
<item>
<xsl:variable name="title" select="title" />
<xsl:value-of select="wp:post_id" />
<xsl:text>&#x9;</xsl:text>
<xsl:value-of select="pubDate" />
<xsl:text>&#x9;</xsl:text>
<xsl:value-of select="wp:status" />
<xsl:text>&#x9;</xsl:text>
<xsl:value-of select="replace($title, '&#x9;', '')" />
<xsl:text>&#x9;</xsl:text>
<!-- leave the last &#x9; as a placeholder cell for category -->
<xsl:value-of select="link" /><xsl:text>?flag=us</xsl:text>
<xsl:text>&#xd;</xsl:text>
</item>
</xsl:template>
</xsl:stylesheet>
post_id pubDate status title link categories
602 Mon, 14 Apr 2014 19:19:22 +0000 publish Example post http://127.0.0.1/sample-post/?flag=us
<?xml version="1.0" encoding="UTF-8" ?>
<!-- This is a WordPress eXtended RSS file generated by WordPress as an export of your site. -->
<!-- It contains information about your site's posts, pages, comments, categories, and other content. -->
<!-- You may use this file to transfer that content from one site to another. -->
<!-- This file is not intended to serve as a complete backup of your site. -->
<!-- To import this information into a WordPress site follow these steps: -->
<!-- 1. Log in to that site as an administrator. -->
<!-- 2. Go to Tools: Import in the WordPress admin panel. -->
<!-- 3. Install the "WordPress" importer from the list. -->
<!-- 4. Activate & Run Importer. -->
<!-- 5. Upload this file using the form provided on that page. -->
<!-- 6. You will first be asked to map the authors in this export file to users -->
<!-- on the site. For each author, you may choose to map to an -->
<!-- existing user on the site or to create a new user. -->
<!-- 7. WordPress will then import each of the posts, pages, comments, categories, etc. -->
<!-- contained in this file into your site. -->
<!-- generator="WordPress/4.6.1" created="2016-09-15 18:45" -->
<rss version="2.0"
xmlns:excerpt="http://wordpress.org/export/1.2/excerpt/"
xmlns:content="http://purl.org/rss/1.0/modules/content/"
xmlns:wfw="http://wellformedweb.org/CommentAPI/"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:wp="http://wordpress.org/export/1.2/"
>
<channel>
<title>Sitename</title>
<link>http://127.0.0.1</link>
<description>another wp website</description>
<pubDate>Thu, 15 Sep 2016 18:45:29 +0000</pubDate>
<language>en-US</language>
<wp:wxr_version>1.2</wp:wxr_version>
<wp:base_site_url>http://127.0.0.1</wp:base_site_url>
<wp:base_blog_url>http://127.0.0.1</wp:base_blog_url>
<wp:author><wp:author_id>3</wp:author_id><wp:author_login><![CDATA[admin]]></wp:author_login><wp:author_email><![CDATA[[email protected]]]></wp:author_email><wp:author_display_name><![CDATA[admin]]></wp:author_display_name><wp:author_first_name><![CDATA[]]></wp:author_first_name><wp:author_last_name><![CDATA[]]></wp:author_last_name></wp:author>
<generator>https://wordpress.org/?v=4.6.1</generator>
<item>
<title>Example post</title>
<link>http://127.0.0.1/sample-post/</link>
<pubDate>Mon, 14 Apr 2014 19:19:22 +0000</pubDate>
<dc:creator><![CDATA[admin]]></dc:creator>
<guid isPermaLink="false">http://127.0.0.1/?p=602</guid>
<description></description>
<content:encoded><![CDATA[<a href="/wp-content/uploads/2014/05/file">Sample post</a>]]></content:encoded>
<excerpt:encoded><![CDATA[]]></excerpt:encoded>
<wp:post_id>602</wp:post_id>
<wp:post_date><![CDATA[2014-04-14 14:19:22]]></wp:post_date>
<wp:post_date_gmt><![CDATA[2014-04-14 19:19:22]]></wp:post_date_gmt>
<wp:comment_status><![CDATA[closed]]></wp:comment_status>
<wp:ping_status><![CDATA[closed]]></wp:ping_status>
<wp:post_name><![CDATA[sample-post]]></wp:post_name>
<wp:status><![CDATA[publish]]></wp:status>
<wp:post_parent>0</wp:post_parent>
<wp:menu_order>0</wp:menu_order>
<wp:post_type><![CDATA[post]]></wp:post_type>
<wp:post_password><![CDATA[]]></wp:post_password>
<wp:is_sticky>0</wp:is_sticky>
<category domain="language" nicename="en"><![CDATA[English]]></category>
<category domain="category" nicename="newsroom"><![CDATA[Newsroom]]></category>
<category domain="post_translations" nicename="pll_54454839cbd7f"><![CDATA[pll_54454839cbd7f]]></category>
<wp:postmeta>
<wp:meta_key><![CDATA[_edit_last]]></wp:meta_key>
<wp:meta_value><![CDATA[6]]></wp:meta_value>
</wp:postmeta>
<wp:postmeta>
<wp:meta_key><![CDATA[slide_template]]></wp:meta_key>
<wp:meta_value><![CDATA[default]]></wp:meta_value>
</wp:postmeta>
<wp:postmeta>
<wp:meta_key><![CDATA[newsroom-pdf]]></wp:meta_key>
<wp:meta_value><![CDATA[761]]></wp:meta_value>
</wp:postmeta>
<wp:postmeta>
<wp:meta_key><![CDATA[_newsroom-pdf]]></wp:meta_key>
<wp:meta_value><![CDATA[field_53921515bf5a9]]></wp:meta_value>
</wp:postmeta>
</item>
</channel>
</rss>
java -jar c:\tools\saxon\saxon9he.jar -s:sample_input.xml -xsl:posts_by_language.xsl -o:results.txt
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment