Last active
July 24, 2020 17:39
-
-
Save bzerangue/4957571 to your computer and use it in GitHub Desktop.
Recursively search through a directory (and it's children directories) to find Markdown files and convert the list of files and their content and into an XML document.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
# | |
# INSPIRATION FROM Nick Dunn (in the Symphony CMS forum) | |
# "Convert a Directory of Markdown Text Files for Dynamic XML Datasource Use" | |
# http://getsymphony.com/discuss/thread/60701/#position-2 | |
# | |
# AND FROM Stack Overflow | |
# http://stackoverflow.com/questions/8545010/php-reading-first-2-lines-of-file-into-variable-and-cylce-through-subfolders/8545451#8545451 | |
# | |
# | |
# configuration | |
# | |
$path = '.'; | |
$fileFilter = '~\.(md|markdown)$~'; | |
$pattern = '~^(?:Title: (.*))?(?:(?:\r\n|\n)(?:Description: (.*)))?~u'; | |
# | |
# main | |
# | |
# init result array (the nice one) | |
$result = array(); | |
# recursive iterator for files | |
$iterator = new RecursiveIteratorIterator( | |
new RecursiveDirectoryIterator($path, FilesystemIterator::KEY_AS_PATHNAME | FilesystemIterator::CURRENT_AS_FILEINFO), | |
RecursiveIteratorIterator::SELF_FIRST); | |
foreach($iterator as $path => $info) | |
{ | |
# filter out files that don't match | |
if (!preg_match($fileFilter, $path)) continue; | |
# get first two lines | |
try | |
{ | |
for | |
( | |
$maxLines = 2, | |
$lines = '', | |
$file = $info->openFile() | |
; | |
!$file->eof() && $maxLines-- | |
; | |
$lines .= $file->fgets() | |
); | |
$lines = rtrim($lines, "\n"); | |
if (!strlen($lines)) # skip empty files | |
continue; | |
} | |
catch (RuntimeException $e) | |
{ | |
continue; # files which are not readable are skipped. | |
} | |
# parse md file | |
$r = preg_match($pattern, $lines, $matches); | |
if (FALSE === $r) | |
{ | |
throw new Exception('Regular expression failed.'); | |
} | |
list(, $title, $description) = $matches + array('', '', ''); | |
# grow result array | |
$result[dirname($path)][] = array($path, $title, $description); | |
} | |
# | |
# output | |
# | |
// ensure output is sent as XML | |
header('Content-Type: text/xml'); | |
echo('<?xml version="1.0" encoding="utf-8" ?>'); | |
include_once('includes/lib/markdown.php'); | |
$dirCounter = 0; | |
echo('<!-- '.dirname(__FILE__).' -->'); | |
echo('<files>'); | |
foreach ($result as $name => $dirs) | |
{ | |
printf("<directory name=\"%s\" path=\"%s\">\n", basename($name), $name); | |
foreach ($dirs as $entry) | |
{ | |
list($path, $title, $description) = $entry; | |
$text = file_get_contents($path); | |
/* | |
printf("<a href='%s'>%s from line 1 of Markdown %s</a> <br/>\n%s\n\n", | |
htmlspecialchars($path), | |
htmlspecialchars($title), | |
htmlspecialchars(basename($path)), | |
htmlspecialchars($description) | |
); | |
*/ | |
echo('<file path="' . $path . '" name="' . basename($path) . '" last-modified="' . date("Y-m-dTH:i:s", filemtime($path)) . '">' . Markdown($text) . '</file>'); | |
} | |
echo('</directory>'); | |
} | |
echo('</files>'); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment