Last active
September 16, 2016 14:04
-
-
Save ethangardner/2fe4c5b4b75ac008a2fab21da35def4b to your computer and use it in GitHub Desktop.
Makes a tab-delimited file from a WordPress post export
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" encoding="UTF-8"?> | |
<xsl:stylesheet version="2.0" | |
xmlns:xsl="http://www.w3.org/1999/XSL/Transform" | |
xmlns:excerpt="http://wordpress.org/export/1.2/excerpt/" | |
xmlns:content="http://purl.org/rss/1.0/modules/content/" | |
xmlns:wfw="http://wellformedweb.org/CommentAPI/" | |
xmlns:dc="http://purl.org/dc/elements/1.1/" | |
xmlns:wp="http://wordpress.org/export/1.2/"> | |
<xsl:output encoding="UTF-8" indent="yes" cdata-section-elements="content:encoded wp:meta_value category" method="text" standalone="yes" /> | |
<xsl:variable name="language">en</xsl:variable> | |
<xsl:variable name="domain">language</xsl:variable> | |
<xsl:template match="/"> | |
<xsl:text>post_id	</xsl:text> | |
<xsl:text>pubDate	</xsl:text> | |
<xsl:text>status	</xsl:text> | |
<xsl:text>title	</xsl:text> | |
<xsl:text>link	</xsl:text> | |
<xsl:text>categories</xsl:text> | |
<xsl:text>
</xsl:text> | |
<xsl:apply-templates select="rss/channel/item" /> | |
</xsl:template> | |
<xsl:template match="item" priority="3"> | |
<xsl:choose> | |
<xsl:when test="wp:post_type[contains(., 'post')]"> | |
<xsl:if test="category[@domain = $domain] and category[@nicename = $language]"> | |
<xsl:call-template name="item" /> | |
</xsl:if> | |
</xsl:when> | |
</xsl:choose> | |
</xsl:template> | |
<xsl:template name="item"> | |
<item> | |
<xsl:variable name="title" select="title" /> | |
<xsl:value-of select="wp:post_id" /> | |
<xsl:text>	</xsl:text> | |
<xsl:value-of select="pubDate" /> | |
<xsl:text>	</xsl:text> | |
<xsl:value-of select="wp:status" /> | |
<xsl:text>	</xsl:text> | |
<xsl:value-of select="replace($title, '	', '')" /> | |
<xsl:text>	</xsl:text> | |
<!-- leave the last 	 as a placeholder cell for category --> | |
<xsl:value-of select="link" /><xsl:text>?flag=us</xsl:text> | |
<xsl:text>
</xsl:text> | |
</item> | |
</xsl:template> | |
</xsl:stylesheet> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
post_id pubDate status title link categories | |
602 Mon, 14 Apr 2014 19:19:22 +0000 publish Example post http://127.0.0.1/sample-post/?flag=us |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" encoding="UTF-8" ?> | |
<!-- This is a WordPress eXtended RSS file generated by WordPress as an export of your site. --> | |
<!-- It contains information about your site's posts, pages, comments, categories, and other content. --> | |
<!-- You may use this file to transfer that content from one site to another. --> | |
<!-- This file is not intended to serve as a complete backup of your site. --> | |
<!-- To import this information into a WordPress site follow these steps: --> | |
<!-- 1. Log in to that site as an administrator. --> | |
<!-- 2. Go to Tools: Import in the WordPress admin panel. --> | |
<!-- 3. Install the "WordPress" importer from the list. --> | |
<!-- 4. Activate & Run Importer. --> | |
<!-- 5. Upload this file using the form provided on that page. --> | |
<!-- 6. You will first be asked to map the authors in this export file to users --> | |
<!-- on the site. For each author, you may choose to map to an --> | |
<!-- existing user on the site or to create a new user. --> | |
<!-- 7. WordPress will then import each of the posts, pages, comments, categories, etc. --> | |
<!-- contained in this file into your site. --> | |
<!-- generator="WordPress/4.6.1" created="2016-09-15 18:45" --> | |
<rss version="2.0" | |
xmlns:excerpt="http://wordpress.org/export/1.2/excerpt/" | |
xmlns:content="http://purl.org/rss/1.0/modules/content/" | |
xmlns:wfw="http://wellformedweb.org/CommentAPI/" | |
xmlns:dc="http://purl.org/dc/elements/1.1/" | |
xmlns:wp="http://wordpress.org/export/1.2/" | |
> | |
<channel> | |
<title>Sitename</title> | |
<link>http://127.0.0.1</link> | |
<description>another wp website</description> | |
<pubDate>Thu, 15 Sep 2016 18:45:29 +0000</pubDate> | |
<language>en-US</language> | |
<wp:wxr_version>1.2</wp:wxr_version> | |
<wp:base_site_url>http://127.0.0.1</wp:base_site_url> | |
<wp:base_blog_url>http://127.0.0.1</wp:base_blog_url> | |
<wp:author><wp:author_id>3</wp:author_id><wp:author_login><![CDATA[admin]]></wp:author_login><wp:author_email><![CDATA[[email protected]]]></wp:author_email><wp:author_display_name><![CDATA[admin]]></wp:author_display_name><wp:author_first_name><![CDATA[]]></wp:author_first_name><wp:author_last_name><![CDATA[]]></wp:author_last_name></wp:author> | |
<generator>https://wordpress.org/?v=4.6.1</generator> | |
<item> | |
<title>Example post</title> | |
<link>http://127.0.0.1/sample-post/</link> | |
<pubDate>Mon, 14 Apr 2014 19:19:22 +0000</pubDate> | |
<dc:creator><![CDATA[admin]]></dc:creator> | |
<guid isPermaLink="false">http://127.0.0.1/?p=602</guid> | |
<description></description> | |
<content:encoded><![CDATA[<a href="/wp-content/uploads/2014/05/file">Sample post</a>]]></content:encoded> | |
<excerpt:encoded><![CDATA[]]></excerpt:encoded> | |
<wp:post_id>602</wp:post_id> | |
<wp:post_date><![CDATA[2014-04-14 14:19:22]]></wp:post_date> | |
<wp:post_date_gmt><![CDATA[2014-04-14 19:19:22]]></wp:post_date_gmt> | |
<wp:comment_status><![CDATA[closed]]></wp:comment_status> | |
<wp:ping_status><![CDATA[closed]]></wp:ping_status> | |
<wp:post_name><![CDATA[sample-post]]></wp:post_name> | |
<wp:status><![CDATA[publish]]></wp:status> | |
<wp:post_parent>0</wp:post_parent> | |
<wp:menu_order>0</wp:menu_order> | |
<wp:post_type><![CDATA[post]]></wp:post_type> | |
<wp:post_password><![CDATA[]]></wp:post_password> | |
<wp:is_sticky>0</wp:is_sticky> | |
<category domain="language" nicename="en"><![CDATA[English]]></category> | |
<category domain="category" nicename="newsroom"><![CDATA[Newsroom]]></category> | |
<category domain="post_translations" nicename="pll_54454839cbd7f"><![CDATA[pll_54454839cbd7f]]></category> | |
<wp:postmeta> | |
<wp:meta_key><![CDATA[_edit_last]]></wp:meta_key> | |
<wp:meta_value><![CDATA[6]]></wp:meta_value> | |
</wp:postmeta> | |
<wp:postmeta> | |
<wp:meta_key><![CDATA[slide_template]]></wp:meta_key> | |
<wp:meta_value><![CDATA[default]]></wp:meta_value> | |
</wp:postmeta> | |
<wp:postmeta> | |
<wp:meta_key><![CDATA[newsroom-pdf]]></wp:meta_key> | |
<wp:meta_value><![CDATA[761]]></wp:meta_value> | |
</wp:postmeta> | |
<wp:postmeta> | |
<wp:meta_key><![CDATA[_newsroom-pdf]]></wp:meta_key> | |
<wp:meta_value><![CDATA[field_53921515bf5a9]]></wp:meta_value> | |
</wp:postmeta> | |
</item> | |
</channel> | |
</rss> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
java -jar c:\tools\saxon\saxon9he.jar -s:sample_input.xml -xsl:posts_by_language.xsl -o:results.txt |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment