Skip to content

Instantly share code, notes, and snippets.

@IcyApril
Last active August 29, 2015 14:15
Show Gist options
  • Save IcyApril/4d4f6ba6990201b40789 to your computer and use it in GitHub Desktop.
Save IcyApril/4d4f6ba6990201b40789 to your computer and use it in GitHub Desktop.
<?php
/*
* A basic PHP script that will allow you to export a MediaWiki pages directly from a database to HTML.
* Perfect for running via a cronjob to create HTML emergency backups of MediaWiki pages.
* This only requires a datbase connection and no modification to MediaWiki.
* Code is terrible, I know, I wrote most of it in nano over SSH. It does the job and not much else.
* I built this while working at Freestone Creative so our emergency documentation was ready as static HTML in emergencies.
* Backups are named by the day of the month followed by .html. They are overwrite the one from last month when a new one is made.
*
* @author: Junade Ali <[email protected]>
*/
// Below enter database details for the Wiki:
$dbHost = "127.0.0.1";
$dbUser = "root";
$dbPassword = "";
$db = "wiki";
// Where to store backups (as HTML file):
$backupsDIR = "/var/www/work/wikiexports";
class dbFunc {
public $dbh;
public $dsn;
function __construct ($host, $user, $password, $db) {
$this->dsn = 'mysql:dbname='.$db.';host='.$host;
try {
$this->dbh = new PDO($this->dsn, $user, $password);
} catch (PDOException $e) {
echo 'Connection failed:'.$e->getMessage();
}
}
}
$todayBackupDIR = $backupsDIR."/".date("j")."/";
mkdir($todayBackupDIR);
array_map('unlink', glob($todayBackupDIR.'*.html'));
$dbUtils = new dbFunc($dbHost, $dbUser, $dbPassword, $db);
foreach ($dbUtils->dbh->query("SELECT `page_id`, `page_title`, `page_latest` FROM `page` ORDER BY `page_id` ASC") as $page) {
$revisionSTMT = $dbUtils->dbh->prepare("SELECT `rev_text_id` FROM `revision` WHERE `rev_id` = ? LIMIT 1");
$revisionSTMT->execute(array($page['page_latest']));
$revisionNum = $revisionSTMT->fetch();
//var_dump($page);
$pageContentSTMT = $dbUtils->dbh->prepare("SELECT `old_text` FROM `text` WHERE `old_id` = ? LIMIT 1");
$pageContentSTMT->execute(array($revisionNum['rev_text_id']));
$pageContent = $pageContentSTMT->fetch();
$wiky = new wiky;
$pageContent = $wiky->parse($pageContent['old_text']);
$pageOutput = "<!doctype html><html lang='en'><head><meta charset='utf-8'></head><body>";
$pageOutput .= $pageContent;
$pageOutput .= "</body></html>";
$title = $page['page_title'].'_'.$page['page_id'];
$title = preg_replace('/[^\w-" *"]/', '', $title);
file_put_contents($todayBackupDIR.$title.".html", $pageOutput);
//echo $page['page_title'];
//echo $wiky->parse($pageContent['old_text']);
}
/* Wiky.php - A tiny PHP "library" to convert Wiki Markup language to HTML
* Author: Toni Lähdekorpi <[email protected]>
* Modified by Junade Ali for usage in backupwiki.php <[email protected]>
*
* Code usage under any of these licenses:
* Apache License 2.0, http://www.apache.org/licenses/LICENSE-2.0
* Mozilla Public License 1.1, http://www.mozilla.org/MPL/1.1/
* GNU Lesser General Public License 3.0, http://www.gnu.org/licenses/lgpl-3.0.html
* GNU General Public License 2.0, http://www.gnu.org/licenses/gpl-2.0.html
* Creative Commons Attribution 3.0 Unported License, http://creativecommons.org/licenses/by/3.0/
*/
class wiky {
private $patterns, $replacements;
public function __construct($analyze=false) {
$this->patterns=array(
"/\r\n/",
// Headings
"/^==== (.+?) ====$/m", // Subsubheading
"/^====(.+?)====$/m", // Subsubheading without spaces
"/^=== (.+?) ===$/m", // Subheading
"/^===(.+?)===$/m", // Subheading without spaces
"/^== (.+?) ==$/m", // Subheading
"/^==(.+?)==$/m", // Subheading without spaces
"/^= (.+?) =$/m", // Heading
"/^=(.+?)=$/m", // Heading without space
// Formatting
"/\'\'\'\'\'(.+?)\'\'\'\'\'/s", // Bold-italic
"/\'\'\'(.+?)\'\'\'/s", // Bold
"/\'\'(.+?)\'\'/s", // Italic
// Special
"/^----+(\s*)$/m", // Horizontal line
"/\[\[(file|img):((ht|f)tp(s?):\/\/(.+?))( (.+))*\]\]/i", // (File|img):(http|https|ftp) aka image
"/\[((news|(ht|f)tp(s?)|irc):\/\/(.+?))( (.+))\]/i", // Other urls with text
"/\[((news|(ht|f)tp(s?)|irc):\/\/(.+?))\]/i", // Other urls without text
// Indentations
"/[\n\r]: *.+([\n\r]:+.+)*/", // Indentation first pass
"/^:(?!:) *(.+)$/m", // Indentation second pass
"/([\n\r]:: *.+)+/", // Subindentation first pass
"/^:: *(.+)$/m", // Subindentation second pass
// Ordered list
"/[\n\r]?#.+([\n|\r]#.+)+/", // First pass, finding all blocks
"/[\n\r]#(?!#) *(.+)(([\n\r]#{2,}.+)+)/", // List item with sub items of 2 or more
"/[\n\r]#{2}(?!#) *(.+)(([\n\r]#{3,}.+)+)/", // List item with sub items of 3 or more
"/[\n\r]#{3}(?!#) *(.+)(([\n\r]#{4,}.+)+)/", // List item with sub items of 4 or more
// Unordered list
"/[\n\r]?\*.+([\n|\r]\*.+)+/", // First pass, finding all blocks
"/[\n\r]\*(?!\*) *(.+)(([\n\r]\*{2,}.+)+)/", // List item with sub items of 2 or more
"/[\n\r]\*{2}(?!\*) *(.+)(([\n\r]\*{3,}.+)+)/", // List item with sub items of 3 or more
"/[\n\r]\*{3}(?!\*) *(.+)(([\n\r]\*{4,}.+)+)/", // List item with sub items of 4 or more
// List items
"/^[#\*]+ *(.+)$/m", // Wraps all list items to <li/>
// Newlines (TODO: make it smarter and so that it groupd paragraphs)
"/^(?!<li|dd).+(?=(<a|strong|em|img)).+$/mi", // Ones with breakable elements (TODO: Fix this crap, the li|dd comparison here is just stupid)
"/^[^><\n\r]+$/m", // Ones with no elements
);
$this->replacements=array(
"\n",
// Headings
"<h4>$1</h4>",
"<h4>$1</h4>",
"<h3>$1</h3>",
"<h3>$1</h3>",
"<h2>$1</h2>",
"<h2>$1</h2>",
"<h1>$1</h1>",
"<h1>$1</h1>",
//Formatting
"<strong><em>$1</em></strong>",
"<strong>$1</strong>",
"<em>$1</em>",
// Special
"<hr/>",
"<img src=\"$2\" alt=\"$6\"/>",
"<a href=\"$1\">$7</a>",
"<a href=\"$1\">$1</a>",
// Indentations
"\n<dl>$0\n</dl>", // Newline is here to make the second pass easier
"<dd>$1</dd>",
"\n<dd><dl>$0\n</dl></dd>",
"<dd>$1</dd>",
// Ordered list
"\n<ol>\n$0\n</ol>",
"\n<li>$1\n<ol>$2\n</ol>\n</li>",
"\n<li>$1\n<ol>$2\n</ol>\n</li>",
"\n<li>$1\n<ol>$2\n</ol>\n</li>",
// Unordered list
"\n<ul>\n$0\n</ul>",
"\n<li>$1\n<ul>$2\n</ul>\n</li>",
"\n<li>$1\n<ul>$2\n</ul>\n</li>",
"\n<li>$1\n<ul>$2\n</ul>\n</li>",
// List items
"<li>$1</li>",
// Newlines
"$0<br/>",
"$0<br/>",
);
if($analyze) {
foreach($this->patterns as $k=>$v) {
$this->patterns[$k].="S";
}
}
}
public function parse($input) {
if(!empty($input))
$output=preg_replace($this->patterns,$this->replacements,$input);
else
$output=false;
return $output;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment