Created April 4, 2014 09:49
Excel importable XML that lists out indexable Umbraco pages and their 'meta data' and 'page titles'. Can be used to analyse duplicate page headers which has a negative impact on SEO.
<?xml version="1.0"?>
<!DOCTYPE xsl:stylesheet [
<!ENTITY MaxLevel "5">
<!ENTITY Page "*[@isDoc and @level &lt;= &MaxLevel;] [not(self::FormPage | self::ThankYouPage )]">
xmlns:umb="urn:umbraco.library" xmlns:msxml="urn:schemas-microsoft-com:xslt"
exclude-result-prefixes="umb xsl msxml"
<xsl:output method="xml" omit-xml-declaration="no" indent="yes" version="1.0" encoding="utf-8" />
<xsl:param name="currentPage" />
<!-- SEO Titles for Excel Analysis (via import XML), Laurie 2014 -->
<!-- Excel hints and tips! -->
<xsl:variable name="urlPrefix" select="concat('http://' , umb:RequestServerVariables('HTTP_HOST'))" />
<xsl:template match="/">
<!-- To Import into Excel, everything must be inside one containing element -->
<!-- Process the homepage using the 'sitemap-entry' template -->
<xsl:apply-templates select="$currentPage/ancestor-or-self::*[@level=1 and @isDoc and not(sitemapNoIndex = 1)]" mode="sitemap-entry">
<xsl:with-param name="recurse" select="false()" />
<xsl:call-template name="renderSitemapArea">
<xsl:with-param name="parent" select="$currentPage/ancestor-or-self::*[@level=1 and @isDoc]"/>
<xsl:template name="renderSitemapArea">
<xsl:param name="parent"/>
<xsl:if test="umb:IsProtected($parent/@id, $parent/@path) = 0 or (umb:IsProtected($parent/@id, $parent/@path) = 1 and umb:IsLoggedOn() = 1) and @level &lt;= &MaxLevel;">
<xsl:for-each select="$parent/&Page;">
<xsl:variable name="nodeAncestorOrSelf" select="node()/ancestor-or-self::*[@level = 2 and @isDoc]/@nodeName"/>
<xsl:apply-templates select="self::*" mode="sitemap-entry" />
<!-- Templates for repeative tasks -->
<!-- [1] Sitemap entry -->
<xsl:template match="* [@isDoc]" mode="sitemap-entry">
<xsl:param name="recurse" select="true()" />
<dt><xsl:value-of select="local-name(.)" /></dt>
<xsl:apply-templates select="@id" />
<lastmod><xsl:value-of select="@updateDate" />+00:00</lastmod>
<metaTitle><xsl:value-of select="metaTitle" /></metaTitle>
<metaDescription><xsl:value-of select="metaDescription" /></metaDescription>
<canonicalUrl><xsl:value-of select="canonicalUrl" /></canonicalUrl>
<xsl:value-of select="contentHeader | @nodeName[not(./contentHeader)] [1]" />
<!-- Enless 'recurse' is false, let's keep adding enteries -->
<xsl:if test="$recurse != false() and count(./* [@isDoc and @level &lt;= &MaxLevel;]) &gt; 0">
<xsl:call-template name="renderSitemapArea">
<xsl:with-param name="parent" select="."/>
<!-- [2] URL's -->
<xsl:template match="@id">
<xsl:value-of select="$urlPrefix" />
<!-- Not quite sure what this does? Does it remove the trailing slash from any root nodes, e.g. -->
<!-- The only data that is avalible is @id, so we access it using '.', meaning this or self -->
<xsl:when test="substring(umb:NiceUrl(.) , string-length(umb:NiceUrl(.)) , 1) = '/'">
<xsl:value-of select="substring(umb:NiceUrl(.) , 0 , string-length(umb:NiceUrl(.)))" />
<xsl:value-of select="umb:NiceUrl(.)" />
