IcyApril · August 29, 2015 14:15
diff --git a/backupwiki.php b/backupwiki.php
 <?php 

 /*
 * A basic PHP script that will allow you to export a MediaWiki pages directly from a database to HTML.
 * Perfect for running via a cronjob to create HTML emergency backups of MediaWiki pages.
 * This only requires a datbase connection and no modification to MediaWiki.
 * Code is terrible, I know, I wrote most of it in nano over SSH. It does the job and not much else.
 * I built this while working at Freestone Creative so our emergency documentation was ready as static HTML in emergencies.
 * Backups are named by the day of the month followed by .html. They are overwrite the one from last month when a new one is made.
 *
 * @author: Junade Ali <[email protected]>
 */

 // Below enter database details for the Wiki:
 $dbHost		= "127.0.0.1";
 $dbUser		= "root";
 $dbPassword	= "";
 $db		= "wiki";

 // Where to store backups (as HTML file):
 $backupsDIR	= "/var/www/work/wikiexports";

 class dbFunc {

 	public $dbh;
 	public $dsn;
 	
 	function __construct ($host, $user, $password, $db) {

 		$this->dsn = 'mysql:dbname='.$db.';host='.$host;

 		try {
 			$this->dbh = new PDO($this->dsn, $user, $password);
 		} catch (PDOException $e) {
 			echo 'Connection failed:'.$e->getMessage();
 		}
 	}
 	
 }

 $todayBackupDIR = $backupsDIR."/".date("j")."/";

 mkdir($todayBackupDIR);
 array_map('unlink', glob($todayBackupDIR.'*.html'));

 $dbUtils = new dbFunc($dbHost, $dbUser, $dbPassword, $db);

 foreach ($dbUtils->dbh->query("SELECT `page_id`, `page_title`, `page_latest` FROM `page` ORDER BY `page_id` ASC") as $page) {

 	$revisionSTMT = $dbUtils->dbh->prepare("SELECT `rev_text_id` FROM `revision` WHERE `rev_id` = ? LIMIT 1");
 	$revisionSTMT->execute(array($page['page_latest']));
 	$revisionNum = $revisionSTMT->fetch();

 	//var_dump($page);
 	$pageContentSTMT = $dbUtils->dbh->prepare("SELECT `old_text` FROM `text` WHERE `old_id` = ? LIMIT 1");
 	$pageContentSTMT->execute(array($revisionNum['rev_text_id']));
 	$pageContent = $pageContentSTMT->fetch();

  $wiky = new wiky;
 	
 	$pageContent = $wiky->parse($pageContent['old_text']);

  $pageOutput = "<!doctype html><html lang='en'><head><meta charset='utf-8'></head><body>";
  $pageOutput .= $pageContent;
  $pageOutput .= "</body></html>";
      
  $title = $page['page_title'].'_'.$page['page_id'];
  $title = preg_replace('/[^\w-" *"]/', '', $title);

 	file_put_contents($todayBackupDIR.$title.".html", $pageOutput);

 	//echo $page['page_title'];
 	//echo $wiky->parse($pageContent['old_text']);
 }

 /* Wiky.php - A tiny PHP "library" to convert Wiki Markup language to HTML
 * Author: Toni Lähdekorpi <[email protected]>
 * Modified by Junade Ali for usage in backupwiki.php <[email protected]>
 *
 * Code usage under any of these licenses:
 * Apache License 2.0, http://www.apache.org/licenses/LICENSE-2.0
 * Mozilla Public License 1.1, http://www.mozilla.org/MPL/1.1/
 * GNU Lesser General Public License 3.0, http://www.gnu.org/licenses/lgpl-3.0.html
 * GNU General Public License 2.0, http://www.gnu.org/licenses/gpl-2.0.html
 * Creative Commons Attribution 3.0 Unported License, http://creativecommons.org/licenses/by/3.0/
 */

 class wiky {
 	private $patterns, $replacements;

 	public function __construct($analyze=false) {
 		$this->patterns=array(
 			"/\r\n/",
 			
 			// Headings
 			"/^==== (.+?) ====$/m",					// Subsubheading
 			"/^====(.+?)====$/m",						// Subsubheading without spaces
 			"/^=== (.+?) ===$/m",						// Subheading
      "/^===(.+?)===$/m",             // Subheading without spaces
      "/^== (.+?) ==$/m",             // Subheading
 			"/^==(.+?)==$/m",               // Subheading without spaces
      "/^= (.+?) =$/m",               // Heading
 			"/^=(.+?)=$/m",							    // Heading without space 

 	
 			// Formatting
 			"/\'\'\'\'\'(.+?)\'\'\'\'\'/s",		// Bold-italic
 			"/\'\'\'(.+?)\'\'\'/s",						// Bold
 			"/\'\'(.+?)\'\'/s",						    // Italic
 				
 			// Special
 			"/^----+(\s*)$/m",						                            // Horizontal line
 			"/\[\[(file|img):((ht|f)tp(s?):\/\/(.+?))( (.+))*\]\]/i",	// (File|img):(http|https|ftp) aka image
 			"/\[((news|(ht|f)tp(s?)|irc):\/\/(.+?))( (.+))\]/i",	  	// Other urls with text
 			"/\[((news|(ht|f)tp(s?)|irc):\/\/(.+?))\]/i",			        // Other urls without text
 	
 			// Indentations
 			"/[\n\r]: *.+([\n\r]:+.+)*/",					// Indentation first pass
 			"/^:(?!:) *(.+)$/m",						      // Indentation second pass
 			"/([\n\r]:: *.+)+/",						      // Subindentation first pass
 			"/^:: *(.+)$/m",					          	// Subindentation second pass
 	
 			// Ordered list
 			"/[\n\r]?#.+([\n|\r]#.+)+/",					            // First pass, finding all blocks
 			"/[\n\r]#(?!#) *(.+)(([\n\r]#{2,}.+)+)/",			    // List item with sub items of 2 or more
 			"/[\n\r]#{2}(?!#) *(.+)(([\n\r]#{3,}.+)+)/",			// List item with sub items of 3 or more
 			"/[\n\r]#{3}(?!#) *(.+)(([\n\r]#{4,}.+)+)/",			// List item with sub items of 4 or more
 	
 			// Unordered list
 			"/[\n\r]?\*.+([\n|\r]\*.+)+/",					            // First pass, finding all blocks
 			"/[\n\r]\*(?!\*) *(.+)(([\n\r]\*{2,}.+)+)/",			  // List item with sub items of 2 or more
 			"/[\n\r]\*{2}(?!\*) *(.+)(([\n\r]\*{3,}.+)+)/",			// List item with sub items of 3 or more
 			"/[\n\r]\*{3}(?!\*) *(.+)(([\n\r]\*{4,}.+)+)/",			// List item with sub items of 4 or more
 	
 			// List items
 			"/^[#\*]+ *(.+)$/m",						// Wraps all list items to <li/>
 	
 			// Newlines (TODO: make it smarter and so that it groupd paragraphs)
 			"/^(?!<li|dd).+(?=(<a|strong|em|img)).+$/mi",			// Ones with breakable elements (TODO: Fix this crap, the li|dd comparison here is just stupid)
 			"/^[^><\n\r]+$/m",						                    // Ones with no elements
 		);
 		$this->replacements=array(
 			"\n",
 			
 			// Headings
 			"<h4>$1</h4>",
      "<h4>$1</h4>",
 			"<h3>$1</h3>",
      "<h3>$1</h3>",
 			"<h2>$1</h2>",
      "<h2>$1</h2>",
      "<h1>$1</h1>",
      "<h1>$1</h1>",
 	
 			//Formatting
 			"<strong><em>$1</em></strong>",
 			"<strong>$1</strong>",
 			"<em>$1</em>",
 				
 			// Special
 			"<hr/>",
 			"<img src=\"$2\" alt=\"$6\"/>",
 			"<a href=\"$1\">$7</a>",
 			"<a href=\"$1\">$1</a>",
 	
 			// Indentations
 			"\n<dl>$0\n</dl>", // Newline is here to make the second pass easier
 			"<dd>$1</dd>",
 			"\n<dd><dl>$0\n</dl></dd>",
 			"<dd>$1</dd>",
 	
 			// Ordered list
 			"\n<ol>\n$0\n</ol>",
 			"\n<li>$1\n<ol>$2\n</ol>\n</li>",
 			"\n<li>$1\n<ol>$2\n</ol>\n</li>",
 			"\n<li>$1\n<ol>$2\n</ol>\n</li>",
 	
 			// Unordered list
 			"\n<ul>\n$0\n</ul>",
 			"\n<li>$1\n<ul>$2\n</ul>\n</li>",
 			"\n<li>$1\n<ul>$2\n</ul>\n</li>",
 			"\n<li>$1\n<ul>$2\n</ul>\n</li>",
 	
 			// List items
 			"<li>$1</li>",
 	
 			// Newlines
 			"$0<br/>",
 			"$0<br/>",
 		);
 		if($analyze) {
 			foreach($this->patterns as $k=>$v) {
 				$this->patterns[$k].="S";
 			}
 		}
 	}
 	public function parse($input) {
 		if(!empty($input))
 			$output=preg_replace($this->patterns,$this->replacements,$input);
 		else
 			$output=false;
 		return $output;
 	}
 }
	<?php

	/*
	* A basic PHP script that will allow you to export a MediaWiki pages directly from a database to HTML.
	* Perfect for running via a cronjob to create HTML emergency backups of MediaWiki pages.
	* This only requires a datbase connection and no modification to MediaWiki.
	* Code is terrible, I know, I wrote most of it in nano over SSH. It does the job and not much else.
	* I built this while working at Freestone Creative so our emergency documentation was ready as static HTML in emergencies.
	* Backups are named by the day of the month followed by .html. They are overwrite the one from last month when a new one is made.
	*
	* @author: Junade Ali <[email protected]>
	*/

	// Below enter database details for the Wiki:
	$dbHost = "127.0.0.1";
	$dbUser = "root";
	$dbPassword = "";
	$db = "wiki";

	// Where to store backups (as HTML file):
	$backupsDIR = "/var/www/work/wikiexports";

	class dbFunc {

	public $dbh;
	public $dsn;

	function __construct ($host, $user, $password, $db) {

	$this->dsn = 'mysql:dbname='.$db.';host='.$host;

	try {
	$this->dbh = new PDO($this->dsn, $user, $password);
	} catch (PDOException $e) {
	echo 'Connection failed:'.$e->getMessage();
	}
	}

	}

	$todayBackupDIR = $backupsDIR."/".date("j")."/";

	mkdir($todayBackupDIR);
	array_map('unlink', glob($todayBackupDIR.'*.html'));

	$dbUtils = new dbFunc($dbHost, $dbUser, $dbPassword, $db);

	foreach ($dbUtils->dbh->query("SELECT `page_id`, `page_title`, `page_latest` FROM `page` ORDER BY `page_id` ASC") as $page) {

	$revisionSTMT = $dbUtils->dbh->prepare("SELECT `rev_text_id` FROM `revision` WHERE `rev_id` = ? LIMIT 1");
	$revisionSTMT->execute(array($page['page_latest']));
	$revisionNum = $revisionSTMT->fetch();

	//var_dump($page);
	$pageContentSTMT = $dbUtils->dbh->prepare("SELECT `old_text` FROM `text` WHERE `old_id` = ? LIMIT 1");
	$pageContentSTMT->execute(array($revisionNum['rev_text_id']));
	$pageContent = $pageContentSTMT->fetch();

	$wiky = new wiky;

	$pageContent = $wiky->parse($pageContent['old_text']);

	$pageOutput = "<!doctype html><html lang='en'><head><meta charset='utf-8'></head><body>";
	$pageOutput .= $pageContent;
	$pageOutput .= "</body></html>";

	$title = $page['page_title'].'_'.$page['page_id'];
	$title = preg_replace('/[^\w-" *"]/', '', $title);

	file_put_contents($todayBackupDIR.$title.".html", $pageOutput);

	//echo $page['page_title'];
	//echo $wiky->parse($pageContent['old_text']);
	}

	/* Wiky.php - A tiny PHP "library" to convert Wiki Markup language to HTML
	* Author: Toni Lähdekorpi <[email protected]>
	* Modified by Junade Ali for usage in backupwiki.php <[email protected]>
	*
	* Code usage under any of these licenses:
	* Apache License 2.0, http://www.apache.org/licenses/LICENSE-2.0
	* Mozilla Public License 1.1, http://www.mozilla.org/MPL/1.1/
	* GNU Lesser General Public License 3.0, http://www.gnu.org/licenses/lgpl-3.0.html
	* GNU General Public License 2.0, http://www.gnu.org/licenses/gpl-2.0.html
	* Creative Commons Attribution 3.0 Unported License, http://creativecommons.org/licenses/by/3.0/
	*/

	class wiky {
	private $patterns, $replacements;

	public function __construct($analyze=false) {
	$this->patterns=array(
	"/\r\n/",

	// Headings
	"/^==== (.+?) ====$/m", // Subsubheading
	"/^====(.+?)====$/m", // Subsubheading without spaces
	"/^=== (.+?) ===$/m", // Subheading
	"/^===(.+?)===$/m", // Subheading without spaces
	"/^== (.+?) ==$/m", // Subheading
	"/^==(.+?)==$/m", // Subheading without spaces
	"/^= (.+?) =$/m", // Heading
	"/^=(.+?)=$/m", // Heading without space


	// Formatting
	"/\'\'\'\'\'(.+?)\'\'\'\'\'/s", // Bold-italic
	"/\'\'\'(.+?)\'\'\'/s", // Bold
	"/\'\'(.+?)\'\'/s", // Italic

	// Special
	"/^----+(\s*)$/m", // Horizontal line
	"/\[\[(file\|img):((ht\|f)tp(s?):\/\/(.+?))( (.+))*\]\]/i", // (File\|img):(http\|https\|ftp) aka image
	"/\[((news\|(ht\|f)tp(s?)\|irc):\/\/(.+?))( (.+))\]/i", // Other urls with text
	"/\[((news\|(ht\|f)tp(s?)\|irc):\/\/(.+?))\]/i", // Other urls without text

	// Indentations
	"/[\n\r]: .+([\n\r]:+.+)/", // Indentation first pass
	"/^:(?!:) *(.+)$/m", // Indentation second pass
	"/([\n\r]:: *.+)+/", // Subindentation first pass
	"/^:: *(.+)$/m", // Subindentation second pass

	// Ordered list
	"/[\n\r]?#.+([\n\|\r]#.+)+/", // First pass, finding all blocks
	"/[\n\r]#(?!#) *(.+)(([\n\r]#{2,}.+)+)/", // List item with sub items of 2 or more
	"/[\n\r]#{2}(?!#) *(.+)(([\n\r]#{3,}.+)+)/", // List item with sub items of 3 or more
	"/[\n\r]#{3}(?!#) *(.+)(([\n\r]#{4,}.+)+)/", // List item with sub items of 4 or more

	// Unordered list
	"/[\n\r]?\.+([\n\|\r]\.+)+/", // First pass, finding all blocks
	"/[\n\r]\(?!\) (.+)(([\n\r]\{2,}.+)+)/", // List item with sub items of 2 or more
	"/[\n\r]\{2}(?!\) (.+)(([\n\r]\{3,}.+)+)/", // List item with sub items of 3 or more
	"/[\n\r]\{3}(?!\) (.+)(([\n\r]\{4,}.+)+)/", // List item with sub items of 4 or more

	// List items
	"/^[#\]+ (.+)$/m", // Wraps all list items to <li/>

	// Newlines (TODO: make it smarter and so that it groupd paragraphs)
	"/^(?!<li\|dd).+(?=(<a\|strong\|em\|img)).+$/mi", // Ones with breakable elements (TODO: Fix this crap, the li\|dd comparison here is just stupid)
	"/^[^><\n\r]+$/m", // Ones with no elements
	);
	$this->replacements=array(
	"\n",

	// Headings
	"<h4>$1</h4>",
	"<h4>$1</h4>",
	"<h3>$1</h3>",
	"<h3>$1</h3>",
	"<h2>$1</h2>",
	"<h2>$1</h2>",
	"<h1>$1</h1>",
	"<h1>$1</h1>",

	//Formatting
	"<strong><em>$1</em></strong>",
	"<strong>$1</strong>",
	"<em>$1</em>",

	// Special
	"<hr/>",
	"<img src=\"$2\" alt=\"$6\"/>",
	"<a href=\"$1\">$7</a>",
	"<a href=\"$1\">$1</a>",

	// Indentations
	"\n<dl>$0\n</dl>", // Newline is here to make the second pass easier
	"<dd>$1</dd>",
	"\n<dd><dl>$0\n</dl></dd>",
	"<dd>$1</dd>",

	// Ordered list
	"\n<ol>\n$0\n</ol>",
	"\n<li>$1\n<ol>$2\n</ol>\n</li>",
	"\n<li>$1\n<ol>$2\n</ol>\n</li>",
	"\n<li>$1\n<ol>$2\n</ol>\n</li>",

	// Unordered list
	"\n<ul>\n$0\n</ul>",
	"\n<li>$1\n<ul>$2\n</ul>\n</li>",
	"\n<li>$1\n<ul>$2\n</ul>\n</li>",
	"\n<li>$1\n<ul>$2\n</ul>\n</li>",

	// List items
	"<li>$1</li>",

	// Newlines
	"$0<br/>",
	"$0<br/>",
	);
	if($analyze) {
	foreach($this->patterns as $k=>$v) {
	$this->patterns[$k].="S";
	}
	}
	}
	public function parse($input) {
	if(!empty($input))
	$output=preg_replace($this->patterns,$this->replacements,$input);
	else
	$output=false;
	return $output;
	}
	}