Last active
August 29, 2015 14:15
-
-
Save IcyApril/4d4f6ba6990201b40789 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/* | |
* A basic PHP script that will allow you to export a MediaWiki pages directly from a database to HTML. | |
* Perfect for running via a cronjob to create HTML emergency backups of MediaWiki pages. | |
* This only requires a datbase connection and no modification to MediaWiki. | |
* Code is terrible, I know, I wrote most of it in nano over SSH. It does the job and not much else. | |
* I built this while working at Freestone Creative so our emergency documentation was ready as static HTML in emergencies. | |
* Backups are named by the day of the month followed by .html. They are overwrite the one from last month when a new one is made. | |
* | |
* @author: Junade Ali <[email protected]> | |
*/ | |
// Below enter database details for the Wiki: | |
$dbHost = "127.0.0.1"; | |
$dbUser = "root"; | |
$dbPassword = ""; | |
$db = "wiki"; | |
// Where to store backups (as HTML file): | |
$backupsDIR = "/var/www/work/wikiexports"; | |
class dbFunc { | |
public $dbh; | |
public $dsn; | |
function __construct ($host, $user, $password, $db) { | |
$this->dsn = 'mysql:dbname='.$db.';host='.$host; | |
try { | |
$this->dbh = new PDO($this->dsn, $user, $password); | |
} catch (PDOException $e) { | |
echo 'Connection failed:'.$e->getMessage(); | |
} | |
} | |
} | |
$todayBackupDIR = $backupsDIR."/".date("j")."/"; | |
mkdir($todayBackupDIR); | |
array_map('unlink', glob($todayBackupDIR.'*.html')); | |
$dbUtils = new dbFunc($dbHost, $dbUser, $dbPassword, $db); | |
foreach ($dbUtils->dbh->query("SELECT `page_id`, `page_title`, `page_latest` FROM `page` ORDER BY `page_id` ASC") as $page) { | |
$revisionSTMT = $dbUtils->dbh->prepare("SELECT `rev_text_id` FROM `revision` WHERE `rev_id` = ? LIMIT 1"); | |
$revisionSTMT->execute(array($page['page_latest'])); | |
$revisionNum = $revisionSTMT->fetch(); | |
//var_dump($page); | |
$pageContentSTMT = $dbUtils->dbh->prepare("SELECT `old_text` FROM `text` WHERE `old_id` = ? LIMIT 1"); | |
$pageContentSTMT->execute(array($revisionNum['rev_text_id'])); | |
$pageContent = $pageContentSTMT->fetch(); | |
$wiky = new wiky; | |
$pageContent = $wiky->parse($pageContent['old_text']); | |
$pageOutput = "<!doctype html><html lang='en'><head><meta charset='utf-8'></head><body>"; | |
$pageOutput .= $pageContent; | |
$pageOutput .= "</body></html>"; | |
$title = $page['page_title'].'_'.$page['page_id']; | |
$title = preg_replace('/[^\w-" *"]/', '', $title); | |
file_put_contents($todayBackupDIR.$title.".html", $pageOutput); | |
//echo $page['page_title']; | |
//echo $wiky->parse($pageContent['old_text']); | |
} | |
/* Wiky.php - A tiny PHP "library" to convert Wiki Markup language to HTML | |
* Author: Toni Lähdekorpi <[email protected]> | |
* Modified by Junade Ali for usage in backupwiki.php <[email protected]> | |
* | |
* Code usage under any of these licenses: | |
* Apache License 2.0, http://www.apache.org/licenses/LICENSE-2.0 | |
* Mozilla Public License 1.1, http://www.mozilla.org/MPL/1.1/ | |
* GNU Lesser General Public License 3.0, http://www.gnu.org/licenses/lgpl-3.0.html | |
* GNU General Public License 2.0, http://www.gnu.org/licenses/gpl-2.0.html | |
* Creative Commons Attribution 3.0 Unported License, http://creativecommons.org/licenses/by/3.0/ | |
*/ | |
class wiky { | |
private $patterns, $replacements; | |
public function __construct($analyze=false) { | |
$this->patterns=array( | |
"/\r\n/", | |
// Headings | |
"/^==== (.+?) ====$/m", // Subsubheading | |
"/^====(.+?)====$/m", // Subsubheading without spaces | |
"/^=== (.+?) ===$/m", // Subheading | |
"/^===(.+?)===$/m", // Subheading without spaces | |
"/^== (.+?) ==$/m", // Subheading | |
"/^==(.+?)==$/m", // Subheading without spaces | |
"/^= (.+?) =$/m", // Heading | |
"/^=(.+?)=$/m", // Heading without space | |
// Formatting | |
"/\'\'\'\'\'(.+?)\'\'\'\'\'/s", // Bold-italic | |
"/\'\'\'(.+?)\'\'\'/s", // Bold | |
"/\'\'(.+?)\'\'/s", // Italic | |
// Special | |
"/^----+(\s*)$/m", // Horizontal line | |
"/\[\[(file|img):((ht|f)tp(s?):\/\/(.+?))( (.+))*\]\]/i", // (File|img):(http|https|ftp) aka image | |
"/\[((news|(ht|f)tp(s?)|irc):\/\/(.+?))( (.+))\]/i", // Other urls with text | |
"/\[((news|(ht|f)tp(s?)|irc):\/\/(.+?))\]/i", // Other urls without text | |
// Indentations | |
"/[\n\r]: *.+([\n\r]:+.+)*/", // Indentation first pass | |
"/^:(?!:) *(.+)$/m", // Indentation second pass | |
"/([\n\r]:: *.+)+/", // Subindentation first pass | |
"/^:: *(.+)$/m", // Subindentation second pass | |
// Ordered list | |
"/[\n\r]?#.+([\n|\r]#.+)+/", // First pass, finding all blocks | |
"/[\n\r]#(?!#) *(.+)(([\n\r]#{2,}.+)+)/", // List item with sub items of 2 or more | |
"/[\n\r]#{2}(?!#) *(.+)(([\n\r]#{3,}.+)+)/", // List item with sub items of 3 or more | |
"/[\n\r]#{3}(?!#) *(.+)(([\n\r]#{4,}.+)+)/", // List item with sub items of 4 or more | |
// Unordered list | |
"/[\n\r]?\*.+([\n|\r]\*.+)+/", // First pass, finding all blocks | |
"/[\n\r]\*(?!\*) *(.+)(([\n\r]\*{2,}.+)+)/", // List item with sub items of 2 or more | |
"/[\n\r]\*{2}(?!\*) *(.+)(([\n\r]\*{3,}.+)+)/", // List item with sub items of 3 or more | |
"/[\n\r]\*{3}(?!\*) *(.+)(([\n\r]\*{4,}.+)+)/", // List item with sub items of 4 or more | |
// List items | |
"/^[#\*]+ *(.+)$/m", // Wraps all list items to <li/> | |
// Newlines (TODO: make it smarter and so that it groupd paragraphs) | |
"/^(?!<li|dd).+(?=(<a|strong|em|img)).+$/mi", // Ones with breakable elements (TODO: Fix this crap, the li|dd comparison here is just stupid) | |
"/^[^><\n\r]+$/m", // Ones with no elements | |
); | |
$this->replacements=array( | |
"\n", | |
// Headings | |
"<h4>$1</h4>", | |
"<h4>$1</h4>", | |
"<h3>$1</h3>", | |
"<h3>$1</h3>", | |
"<h2>$1</h2>", | |
"<h2>$1</h2>", | |
"<h1>$1</h1>", | |
"<h1>$1</h1>", | |
//Formatting | |
"<strong><em>$1</em></strong>", | |
"<strong>$1</strong>", | |
"<em>$1</em>", | |
// Special | |
"<hr/>", | |
"<img src=\"$2\" alt=\"$6\"/>", | |
"<a href=\"$1\">$7</a>", | |
"<a href=\"$1\">$1</a>", | |
// Indentations | |
"\n<dl>$0\n</dl>", // Newline is here to make the second pass easier | |
"<dd>$1</dd>", | |
"\n<dd><dl>$0\n</dl></dd>", | |
"<dd>$1</dd>", | |
// Ordered list | |
"\n<ol>\n$0\n</ol>", | |
"\n<li>$1\n<ol>$2\n</ol>\n</li>", | |
"\n<li>$1\n<ol>$2\n</ol>\n</li>", | |
"\n<li>$1\n<ol>$2\n</ol>\n</li>", | |
// Unordered list | |
"\n<ul>\n$0\n</ul>", | |
"\n<li>$1\n<ul>$2\n</ul>\n</li>", | |
"\n<li>$1\n<ul>$2\n</ul>\n</li>", | |
"\n<li>$1\n<ul>$2\n</ul>\n</li>", | |
// List items | |
"<li>$1</li>", | |
// Newlines | |
"$0<br/>", | |
"$0<br/>", | |
); | |
if($analyze) { | |
foreach($this->patterns as $k=>$v) { | |
$this->patterns[$k].="S"; | |
} | |
} | |
} | |
public function parse($input) { | |
if(!empty($input)) | |
$output=preg_replace($this->patterns,$this->replacements,$input); | |
else | |
$output=false; | |
return $output; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment