Last active
May 6, 2016 09:41
-
-
Save WinterSilence/2c1dfcc6b8df3c329c96a9c1ca4707dd to your computer and use it in GitHub Desktop.
Sitemap refactoring
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
namespace Sitemap; | |
/** | |
* A class for generating sitemaps. | |
* @see http://sitemaps.org | |
*/ | |
class Sitemap | |
{ | |
/** | |
* @var WriterInterface | |
*/ | |
private $writer; | |
/** | |
* Sets writer. | |
* @param WriterInterface $writer | |
* @return void | |
*/ | |
public function __construct(WriterInterface $writer) | |
{ | |
$this->writer = $writer; | |
} | |
/** | |
* Adds an URL to sitemap. | |
* | |
* @param string $location | |
* @param array $optional optional elements\tags | |
* @return self | |
*/ | |
public function addUrl($location, array $optional = array()) | |
{ | |
$this->writer->writeUrl($location, $optional); | |
return $this; | |
} | |
/** | |
* Finishes writing and return info. | |
* @return array | |
*/ | |
public function save() | |
{ | |
return $this->writer->save(); | |
} | |
/** | |
* Returns an array of URLs written | |
* | |
* @param string $baseUrl base URL of all the sitemaps written | |
* @return array URLs of sitemaps written | |
*/ | |
public function getSitemapUrls($baseUrl) | |
{ | |
$urls = []; | |
foreach ($this->writer->getFiles() as $file) { | |
$urls[$file] = $baseUrl . pathinfo($file, PATHINFO_BASENAME); | |
} | |
return $urls; | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
namespace Sitemap; | |
class Writer implements WriterInterface | |
{ | |
const MAX_URL_LENGTH = 2047; | |
const MAX_URLS = 50000; | |
const MAX_FILE_SIZE = 10475520; | |
/** | |
* Valid values of "changefreq" element. | |
*/ | |
const ALWAYS = 'always'; | |
const HOURLY = 'hourly'; | |
const DAILY = 'daily'; | |
const WEEKLY = 'weekly'; | |
const MONTHLY = 'monthly'; | |
const YEARLY = 'yearly'; | |
const NEVER = 'never'; | |
/** | |
* @var integer Maximum allowed number of bytes per single file. | |
*/ | |
private $maxFileSize = self::MAX_FILE_SIZE; | |
/** | |
* @var integer Current file size written | |
*/ | |
private $fileSize = 0; | |
/** | |
* @var integer Maximum allowed number of URLs in a single file. | |
*/ | |
private $maxUrls = self::MAX_URLS; | |
/** | |
* @var integer number of URLs added | |
*/ | |
private $urlsCount = 0; | |
/** | |
* @var string path to the file to be written | |
*/ | |
private $filePath = './sitemap.xml'; | |
/** | |
* @var integer number of files written | |
*/ | |
private $fileCount = 0; | |
/** | |
* @var array path of files written | |
*/ | |
private $writtenFilePaths = []; | |
/** | |
* @var integer number of URLs to be kept in memory before writing it to file | |
*/ | |
private $bufferSize = 500; | |
/** | |
* @var XMLWriter | |
*/ | |
private $writer; | |
public function __construct(array $config = array()) | |
{ | |
foreach ($config as $property => $value) { | |
if (!property_exists($this, $property)) { | |
throw new \InvalidArgumentException( | |
'Property "' . $property . '" not defined.' | |
); | |
} | |
$method = 'set' . ucfirst($property); | |
if (!method_exists($this, $method)) { | |
throw new \InvalidArgumentException( | |
'Property "' . $property . '" not writable.' | |
); | |
} | |
$this->{$method}($value); | |
} | |
} | |
/** | |
* | |
* @param string $filePath path of the file to write to | |
* @throws \InvalidArgumentException | |
*/ | |
private function setFilePath($path) | |
{ | |
$dir = dirname($path); | |
if (!is_dir($dir)) { | |
throw new \InvalidArgumentException( | |
'Directory "' . $dir . '" not exists.' | |
); | |
} | |
$this->filePath = $path; | |
} | |
/** | |
* Sets maximum allowed number of URLs in a single file. No more than 50000 URLs. | |
* | |
* @param int $number | |
*/ | |
private function setMaxUrls($number) | |
{ | |
if ($priority <= 1 || $priority >= self::MAX_URLS) { | |
throw new \InvalidArgumentException( | |
'Invalid value (' . $number . ') of max URLs.' | |
); | |
} | |
$this->maxUrls = (int)$number; | |
} | |
/** | |
* Sets number of URLs to be kept in memory before writing it to file. | |
* | |
* @param int $number | |
*/ | |
private function setBufferSize($number) | |
{ | |
if ($number < 1) { | |
throw new \InvalidArgumentException( | |
'Invalid value (' . $number . ') of buffer size.' | |
); | |
} | |
$this->bufferSize = (int)$number; | |
} | |
/** | |
* Sets maximum allowed number of bytes per single file. No more than 10485760 bytes(10MB), 1 record ~ 10240 bytes(10KB). | |
* | |
* @param int $bytes | |
*/ | |
private function setMaxFileSize($bytes) | |
{ | |
if ($bytes <= 10240 || $bytes >= self::MAX_FILE_SIZE) { | |
throw new \InvalidArgumentException( | |
'Invalid value (' . $bytes . ') of max file size.' | |
); | |
} | |
$this->maxFileSize = (int)$bytes; | |
} | |
/** | |
* Creates new file. | |
*/ | |
private function createNewFile() | |
{ | |
$this->fileSize = 0; | |
$this->fileCount++; | |
$filePath = $this->getCurrentFilePath(); | |
$this->writtenFilePaths[] = $filePath; | |
@unlink($filePath); | |
$this->writer = new \XMLWriter; | |
$this->writer->openMemory(); | |
$this->writer->startDocument('1.0', 'UTF-8'); | |
$this->writer->setIndent(true); | |
$this->writer->startElement('urlset'); | |
$this->writer->writeAttribute('xmlns', 'http://www.sitemaps.org/schemas/sitemap/0.9'); | |
} | |
/** | |
* Writes closing tags to current file | |
*/ | |
private function finishFile() | |
{ | |
if ($this->writer !== null) { | |
$this->writer->endElement(); | |
$this->writer->endDocument(); | |
$this->flush(); | |
} | |
} | |
/** | |
* Flushes buffer into file | |
*/ | |
private function flush() | |
{ | |
$buffer = $this->writer->flush(true); | |
$this->fileSize += mb_strlen($buffer, '8bit'); | |
file_put_contents($this->getCurrentFilePath(), $buffer, FILE_APPEND); | |
} | |
/** | |
* @return string path of currently opened file | |
*/ | |
private function getCurrentFilePath() | |
{ | |
if ($this->fileCount < 2) { | |
return $this->filePath; | |
} | |
$parts = pathinfo($this->filePath); | |
return $parts['dirname'] . DIRECTORY_SEPARATOR . $parts['filename'] . '_' . $this->fileCount . '.' . $parts['extension']; | |
} | |
/** | |
* Finishes writing. | |
*/ | |
public function save() | |
{ | |
$this->finishFile(); | |
return $this->writtenFilePaths; | |
} | |
/** | |
* Sets "loc" element. | |
* @param string $location | |
* @return self | |
*/ | |
private function writeLoc($location) | |
{ | |
if (false === filter_var($location, FILTER_VALIDATE_URL)) { | |
throw new \InvalidArgumentException( | |
'The location URL "' . $location . '" is invalid.' | |
); | |
} | |
if (mb_strlen($location, 'UTF-8') > self::MAX_URL_LENGTH) { | |
throw new \InvalidArgumentException( | |
'The location must be less than ' . (self::MAX_URL_LENGTH + 1) . ' characters.' | |
); | |
} | |
$this->writer->writeElement('loc', $location); | |
return $this; | |
} | |
/** | |
* Sets "lastmod" element. | |
* @param string|int $date | |
* @return self | |
*/ | |
private function writeLastmod($date) | |
{ | |
$this->writer->writeElement('lastmod', date('c', $date)); | |
return $this; | |
} | |
/** | |
* Returns valid values of "changefreq" element. | |
* @return array | |
*/ | |
public function getValidFrequencies() | |
{ | |
return array( | |
self::ALWAYS, | |
self::HOURLY, | |
self::DAILY, | |
self::WEEKLY, | |
self::MONTHLY, | |
self::YEARLY, | |
self::NEVER | |
); | |
} | |
/** | |
* Sets "changefreq" element. | |
* @param string $frequency | |
* @return self | |
* @throws \InvalidArgumentException | |
*/ | |
private function writeChangefreq($frequency) | |
{ | |
if (!in_array($frequency, $this->getValidFrequencies(), true)) { | |
throw new \InvalidArgumentException( | |
'Invalid value (' . $frequency . ') of "changefreq" element.' | |
); | |
} | |
$this->writer->writeElement('changefreq', $frequency); | |
return $this; | |
} | |
/** | |
* Sets "priority" element. | |
* @param string|float $priority | |
* @return self | |
* @throws \InvalidArgumentException | |
*/ | |
private function writePriority($priority) | |
{ | |
if (!is_numeric($priority) || $priority < 0 || $priority > 1) { | |
throw new \InvalidArgumentException( | |
'Invalid value (' . $priority . ') of "priority" element.' | |
); | |
} | |
$this->writer->writeElement('priority', number_format($priority, 1, '.', '')); | |
return $this; | |
} | |
public function writeUrl($url, array $optional = array()) | |
{ | |
if ($this->urlsCount == 0) { | |
$this->createNewFile(); | |
} elseif ($this->urlsCount % $this->maxUrls == 0 || $this->fileSize >= $this->maxFileSize) { | |
$this->finishFile(); | |
$this->createNewFile(); | |
} | |
if ($this->urlsCount % $this->bufferSize == 0) { | |
$this->flush(); | |
} | |
$this->writer->startElement('url'); | |
$elements = array('loc' => $url) + $optional; | |
foreach ($elements as $element => $value) { | |
$method = 'write' . ucfirst($element); | |
if (!method_exists($this, $method)) { | |
throw new \InvalidArgumentException( | |
'Element "' . $element . '" not defined.' | |
); | |
} | |
$this->{$method}($value); | |
} | |
$this->writer->endElement(); | |
$this->urlsCount++; | |
} | |
public function getFiles() | |
{ | |
return $this->writtenFilePaths; | |
} | |
/** | |
* Finishes writing. | |
*/ | |
public function __destruct() | |
{ | |
$this->finishFile(); | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
namespace Sitemap; | |
interface WriterInterface | |
{ | |
public function __construct(array $config = array()); | |
public function writeUrl($url, array $optional = array()); | |
public function getFiles(); | |
public function save(); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment