-
-
Save dianariyanto/685a31e3aed7ad783d32d218d7d13510 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* | |
* This is a quick way to turn a simple text file | |
* with a very long list of urls in a text file (sitemap-urls.txt) | |
* Where "very long" is an expected url number greater than 10,000 | |
* If loaded without a valid query parameter "page" it will load a | |
* Site Index site map, otherwise load the individual XML site map | |
* 10,000 urls into a valid XML Sitemap: | |
* http://en.wikipedia.org/wiki/Sitemaps | |
* Put this file sitemap.xml.php and sitemap-urls.txt at | |
* the webroot http://example.com/sitemap.xml.php | |
* Then add the text in quotes below to your robots.txt file as a new line: | |
* "Sitemap: http://example.com/sitemap.xml.php" | |
* | |
* Questions? email [email protected] | |
* | |
* Based on https://gist.github.com/artlung/210438 | |
*/ | |
$per_page = 10000; | |
$filename = 'sitemap-urls.txt'; | |
$urls = file($filename); | |
$filectime = filectime($filename); | |
$urls = array_map('trim', $urls); | |
$page = (int)$_GET['page']; | |
$sitemap = array(); | |
foreach($urls as $url) { | |
if ($url != '') { | |
$priority = '0.5'; | |
$sitemap[] = array( | |
'loc' => $url, | |
'lastmod' => date('Y-m-d',$filectime), | |
'changefreq' => 'weekly', | |
'priority' => $priority, | |
); | |
} | |
} | |
$pages = array_chunk($sitemap, $per_page); | |
$page_numbers = range(1, count($pages)); | |
header('Content-Type: text/xml'); | |
echo '<?xml version=\'1.0\' encoding=\'UTF-8\'?>'; | |
echo "\n"; | |
$path = explode('?', $_SERVER['REQUEST_URI']); | |
$path = array_shift($path); | |
$url = (isset($_SERVER['HTTPS']) && $_SERVER['HTTPS'] !== 'off' ? 'https' : 'http') . '://' . $_SERVER['HTTP_HOST'] . $path . "?page="; | |
$lastmod = date('Y-m-d',$filectime); | |
if (!in_array($page, $page_numbers)) { | |
// Valid Page Number | |
echo '<sitemapindex xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | |
xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" | |
xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/siteindex.xsd">'; | |
echo "\n"; | |
foreach ($page_numbers as $pg_num) { | |
echo "\t<sitemap>\n"; | |
echo "\t\t<loc>" . htmlentities($url) . $pg_num . "</loc>\n"; | |
echo "\t\t<lastmod>{$lastmod}</lastmod>\n"; | |
echo "\t</sitemap>\n"; | |
} | |
echo '</sitemapindex>'; | |
} else { | |
// Output the Site Map | |
echo '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" | |
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | |
xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">'; | |
echo "\n"; | |
foreach ($pages[$page-1] as $link) { | |
echo "\t<url>\n"; | |
echo "\t\t<loc>" . htmlentities($link['loc']) . "</loc>\n"; | |
echo "\t\t<lastmod>{$link['lastmod']}</lastmod>\n"; | |
echo "\t\t<changefreq>{$link['changefreq']}</changefreq>\n"; | |
echo "\t\t<priority>{$link['priority']}</priority>\n"; | |
echo "\t</url>\n"; | |
} | |
echo '</urlset>'; | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment