Skip to content

Instantly share code, notes, and snippets.

Last active August 29, 2015 14:15
Show Gist options
  • Save IcyApril/4d4f6ba6990201b40789 to your computer and use it in GitHub Desktop.
Save IcyApril/4d4f6ba6990201b40789 to your computer and use it in GitHub Desktop.
* A basic PHP script that will allow you to export a MediaWiki pages directly from a database to HTML.
* Perfect for running via a cronjob to create HTML emergency backups of MediaWiki pages.
* This only requires a datbase connection and no modification to MediaWiki.
* Code is terrible, I know, I wrote most of it in nano over SSH. It does the job and not much else.
* I built this while working at Freestone Creative so our emergency documentation was ready as static HTML in emergencies.
* Backups are named by the day of the month followed by .html. They are overwrite the one from last month when a new one is made.
* @author: Junade Ali <[email protected]>
// Below enter database details for the Wiki:
$dbHost = "";
$dbUser = "root";
$dbPassword = "";
$db = "wiki";
// Where to store backups (as HTML file):
$backupsDIR = "/var/www/work/wikiexports";
class dbFunc {
public $dbh;
public $dsn;
function __construct ($host, $user, $password, $db) {
$this->dsn = 'mysql:dbname='.$db.';host='.$host;
try {
$this->dbh = new PDO($this->dsn, $user, $password);
} catch (PDOException $e) {
echo 'Connection failed:'.$e->getMessage();
$todayBackupDIR = $backupsDIR."/".date("j")."/";
array_map('unlink', glob($todayBackupDIR.'*.html'));
$dbUtils = new dbFunc($dbHost, $dbUser, $dbPassword, $db);
foreach ($dbUtils->dbh->query("SELECT `page_id`, `page_title`, `page_latest` FROM `page` ORDER BY `page_id` ASC") as $page) {
$revisionSTMT = $dbUtils->dbh->prepare("SELECT `rev_text_id` FROM `revision` WHERE `rev_id` = ? LIMIT 1");
$revisionNum = $revisionSTMT->fetch();
$pageContentSTMT = $dbUtils->dbh->prepare("SELECT `old_text` FROM `text` WHERE `old_id` = ? LIMIT 1");
$pageContent = $pageContentSTMT->fetch();
$wiky = new wiky;
$pageContent = $wiky->parse($pageContent['old_text']);
$pageOutput = "<!doctype html><html lang='en'><head><meta charset='utf-8'></head><body>";
$pageOutput .= $pageContent;
$pageOutput .= "</body></html>";
$title = $page['page_title'].'_'.$page['page_id'];
$title = preg_replace('/[^\w-" *"]/', '', $title);
file_put_contents($todayBackupDIR.$title.".html", $pageOutput);
//echo $page['page_title'];
//echo $wiky->parse($pageContent['old_text']);
/* Wiky.php - A tiny PHP "library" to convert Wiki Markup language to HTML
* Author: Toni Lähdekorpi <[email protected]>
* Modified by Junade Ali for usage in backupwiki.php <[email protected]>
* Code usage under any of these licenses:
* Apache License 2.0,
* Mozilla Public License 1.1,
* GNU Lesser General Public License 3.0,
* GNU General Public License 2.0,
* Creative Commons Attribution 3.0 Unported License,
class wiky {
private $patterns, $replacements;
public function __construct($analyze=false) {
// Headings
"/^==== (.+?) ====$/m", // Subsubheading
"/^====(.+?)====$/m", // Subsubheading without spaces
"/^=== (.+?) ===$/m", // Subheading
"/^===(.+?)===$/m", // Subheading without spaces
"/^== (.+?) ==$/m", // Subheading
"/^==(.+?)==$/m", // Subheading without spaces
"/^= (.+?) =$/m", // Heading
"/^=(.+?)=$/m", // Heading without space
// Formatting
"/\'\'\'\'\'(.+?)\'\'\'\'\'/s", // Bold-italic
"/\'\'\'(.+?)\'\'\'/s", // Bold
"/\'\'(.+?)\'\'/s", // Italic
// Special
"/^----+(\s*)$/m", // Horizontal line
"/\[\[(file|img):((ht|f)tp(s?):\/\/(.+?))( (.+))*\]\]/i", // (File|img):(http|https|ftp) aka image
"/\[((news|(ht|f)tp(s?)|irc):\/\/(.+?))( (.+))\]/i", // Other urls with text
"/\[((news|(ht|f)tp(s?)|irc):\/\/(.+?))\]/i", // Other urls without text
// Indentations
"/[\n\r]: *.+([\n\r]:+.+)*/", // Indentation first pass
"/^:(?!:) *(.+)$/m", // Indentation second pass
"/([\n\r]:: *.+)+/", // Subindentation first pass
"/^:: *(.+)$/m", // Subindentation second pass
// Ordered list
"/[\n\r]?#.+([\n|\r]#.+)+/", // First pass, finding all blocks
"/[\n\r]#(?!#) *(.+)(([\n\r]#{2,}.+)+)/", // List item with sub items of 2 or more
"/[\n\r]#{2}(?!#) *(.+)(([\n\r]#{3,}.+)+)/", // List item with sub items of 3 or more
"/[\n\r]#{3}(?!#) *(.+)(([\n\r]#{4,}.+)+)/", // List item with sub items of 4 or more
// Unordered list
"/[\n\r]?\*.+([\n|\r]\*.+)+/", // First pass, finding all blocks
"/[\n\r]\*(?!\*) *(.+)(([\n\r]\*{2,}.+)+)/", // List item with sub items of 2 or more
"/[\n\r]\*{2}(?!\*) *(.+)(([\n\r]\*{3,}.+)+)/", // List item with sub items of 3 or more
"/[\n\r]\*{3}(?!\*) *(.+)(([\n\r]\*{4,}.+)+)/", // List item with sub items of 4 or more
// List items
"/^[#\*]+ *(.+)$/m", // Wraps all list items to <li/>
// Newlines (TODO: make it smarter and so that it groupd paragraphs)
"/^(?!<li|dd).+(?=(<a|strong|em|img)).+$/mi", // Ones with breakable elements (TODO: Fix this crap, the li|dd comparison here is just stupid)
"/^[^><\n\r]+$/m", // Ones with no elements
// Headings
// Special
"<img src=\"$2\" alt=\"$6\"/>",
"<a href=\"$1\">$7</a>",
"<a href=\"$1\">$1</a>",
// Indentations
"\n<dl>$0\n</dl>", // Newline is here to make the second pass easier
// Ordered list
// Unordered list
// List items
// Newlines
if($analyze) {
foreach($this->patterns as $k=>$v) {
public function parse($input) {
return $output;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment