Skip to content

Instantly share code, notes, and snippets.

@ilhooq
Last active September 26, 2018 15:28
Show Gist options
  • Save ilhooq/65c1e2490fdb616413dfede8647f2691 to your computer and use it in GitHub Desktop.
Save ilhooq/65c1e2490fdb616413dfede8647f2691 to your computer and use it in GitHub Desktop.
Import wiki from WikiSpaces to MediaWiki
<?php
/**
* Import pages from Wikispaces
*
* ======================================================================
*
* To use this script, put it in your MediaWiki 'maintenance' folder
* Then call the script on the command line.
*
* Examples of use :
*
* php importWikispaces.php --help
*
* php importWikispaces.php --overwrite -u username -p password -s wikiname
*
* ======================================================================
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*
* @file
* @author Sylvain Philip <contact at sphilip.com>
* @see http://helpcenter.wikispaces.com/customer/portal/articles/1959131-wikitext
* @see http://helpcenter.wikispaces.com/customer/portal/articles/1964502-api-customizations
* @ingroup Maintenance
*/
use MediaWiki\MediaWikiServices;
require_once __DIR__ . '/Maintenance.php';
/**
* Maintenance script which reads in text files
* and imports their content to a page of the wiki.
*
* @ingroup Maintenance
*/
class ImportWikispacePages extends Maintenance {
const WIKISPACES_URL = 'http://www.wikispaces.com';
/**
* @var SoapClient
*/
protected $siteApi = null;
/**
* @var SoapClient
*/
protected $spaceApi = null;
/**
* @var SoapClient
*/
protected $pageApi = null;
/**
* @var resource
*/
protected $curlHandle = null;
/**
* @var string
*/
protected $cacheDir = '';
protected $exit = 0;
protected $successCount = 0;
protected $failCount = 0;
protected $skipCount = 0;
public function __construct() {
parent::__construct();
$this->addDescription( 'Reads WikiSpaces pages and imports their content and files in the wiki' );
$this->addOption('user', 'Wikispaces username.', true, true, 'u' );
$this->addOption('password', 'Wikispaces password.', true, true, 'p' );
$this->addOption('spacename', 'Wikispaces name.', true, true, 's' );
$this->addOption('page', 'Import only one page. Specify the name of the page.', false, true);
$this->addOption('overwrite', 'Overwrite existing pages. This will only overwrite pages if the remote page has been modified since the local page was last modified.' );
$this->addOption('history', 'Preserve history importing wikispaces pages revisions.');
$this->addOption( 'use-timestamp', 'Use the modification date of the page as the timestamp for the edit' );
$this->addOption( 'force', 'Force overwrite. It will force overwrite even if old content is equal with new one and doesn\'t check timestamp for the edit.', false, false, 'f');
$this->siteApi = new SoapClient(SELF::WIKISPACES_URL . '/site/api/?wsdl');
$this->spaceApi = new SoapClient(SELF::WIKISPACES_URL . '/space/api/?wsdl');
$this->pageApi = new SoapClient(SELF::WIKISPACES_URL . '/page/api/?wsdl');
$this->cacheDir = realpath(__DIR__ . '/../cache') . '/wikispaces';
if (!file_exists($this->cacheDir)) {
mkdir($this->cacheDir);
}
}
public function __destruct() {
if ($this->curlHandle !== null) {
curl_close($this->curlHandle);
}
}
public function execute() {
$session = $this->siteApi->login($this->getOption('user'), $this->getOption('password'));
$space = $this->spaceApi->getSpace($session, $this->getOption('spacename'));
$history = $this->hasOption('history');
$pageName = $this->getOption('page');
if (empty($pageName)) {
// Import all pages
$pages = $this->pageApi->listPages($session, $space->id);
foreach ($pages as $page) {
if ($history) {
$versions = $this->pageApi->listPageVersions($session, $space->id, $page->name);
foreach ($versions as $version) {
$this->addRevision($session, $version, $version->versionId);
}
} else {
$this->addRevision($session, $page);
}
}
}
else {
if ($history) {
$versions = $this->pageApi->listPageVersions($session, $space->id, $pageName);
foreach ($versions as $version) {
$this->addRevision($session, $version, $version->versionId);
}
} else {
$page = $this->pageApi->getPage($session, $space->id, $pageName);
$this->addRevision($session, $page);
}
}
$this->output( "Done! $this->successCount succeeded, $this->skipCount skipped.\n" );
if ($this->exit) {
$this->error( "Import failed with $this->failCount failed pages.\n", $this->exit);
}
}
protected function addRevision($session, $page, $version=0)
{
$summary = 'Imported from Wikispaces';
$overwrite = $this->hasOption('overwrite');
$force = $this->hasOption('force');
$useTimestamp = $this->hasOption( 'use-timestamp' );
$pageName = $page->name;
$timestamp = $useTimestamp ? wfTimestamp( TS_UNIX, (int) $page->date_created) : wfTimestampNow();
$title = Title::newFromText($pageName);
$user = User::newFromName($page->user_created_username);
if ($user === false) {
$username = Title::newFromText($page->user_created_username);
$this->error( "User creation failed with this Wikispace user:{$page->user_created_username}. Try with {$username}.\n");
$user = User::newFromName($username);
if ($user === false) {
$this->error( "Invalid username : {$username}. Revision skipped (Page: {$page->name}, Version: {$version}).\n");
$this->skipCount++;
return;
}
}
if (!$user->getId() ) {
$user->addToDatabase();
}
// Have to check for # manually, since it gets interpreted as a fragment
if (!$title || $title->hasFragment()) {
$this->error("Invalid title $pageName. Skipping.\n");
$this->skipCount++;
return;
}
$exists = $title->exists();
$oldRevID = $title->getLatestRevID();
$oldRev = $oldRevID ? Revision::newFromId( $oldRevID ) : null;
$actualTitle = $title->getPrefixedText();
if ($useTimestamp && $force) {
$timestamp = (int) $oldRev->getTimestamp() + 1000;
}
if ( $exists ) {
$touched = wfTimestamp( TS_UNIX, $title->getTouched() );
if (!$overwrite) {
$this->output( "Title $actualTitle already exists. Skipping.\n" );
$this->skipCount++;
return;
} elseif ( $useTimestamp && !$force && intval( $touched ) >= intval( $timestamp ) ) {
$this->output( "File for title $actualTitle has not been modified since the destination page was touched. Skipping.\n" );
$this->skipCount++;
return;
}
}
try {
if ($version) {
$completePage = $this->pageApi->getPageWithVersion($session, $page->spaceId, $page->name, $version);
} else {
$completePage = $this->pageApi->getPage($session, $page->spaceId, $page->name);
}
$text = $completePage->content;
} catch (\Exception $e) {
$this->error("Soap error: " . $e->getMessage() . " For page $page->name.");
$this->output("Try to download $page->name using WebDAV.\n");
$text = $this->downloadPage($page->name, $version);
}
$files = $this->extractFiles($text);
foreach ($files as $fileName) {
$file = $this->downloadFile($fileName);
if (!empty($file) && file_exists($file)) {
$this->importFile($file, $user);
$this->output('File imported : ' . $fileName . "\n");
}
}
$this->convertToMediaWiki($text);
$rev = new WikiRevision( MediaWikiServices::getInstance()->getMainConfig() );
$rev->setText( rtrim( $text ) );
$rev->setTitle( $title );
$rev->setUserObj( $user );
$rev->setComment( $summary );
$rev->setTimestamp( $timestamp );
if ($exists && $overwrite && !$force && $rev->getContent()->equals( $oldRev->getContent())) {
$this->output( "Page for title $actualTitle contains no changes from the current revision. Skipping.\n" );
$this->skipCount++;
return;
}
$status = $rev->importOldRevision();
if ($status) {
$action = $exists ? 'updated' : 'created';
$this->output( "Successfully $action $actualTitle\n" );
$this->successCount++;
} else {
$action = $exists ? 'update' : 'create';
$this->output( "Failed to $action $actualTitle\n" );
$this->failCount++;
$this->exit = 1;
}
}
protected function extractFiles($text)
{
$files = array();
if (strpos($text, '[[image:') !== false) {
if (preg_match_all('/\[\[image:(?!http)([^]\s]*)/', $text, $matches)) {
foreach($matches[1] as $image) {
$files[] = trim($image, '/');
}
}
}
if (strpos($text, '[[file:') !== false) {
if (preg_match_all('/\[\[file:(?!http)([^]\s]*)/', $text, $matches)) {
foreach($matches[1] as $file) {
$files[] = trim($file, '/');
}
}
}
return $files;
}
protected function downloadFile($fileName)
{
$destination = $this->cacheDir . '/' . $fileName;
if (file_exists($destination)) {
return $destination;
}
$spacename = $this->getOption('spacename');
$this->initCurl();
$url = "https://$spacename.wikispaces.com/space/dav/files/$fileName";
curl_setopt($this->curlHandle, CURLOPT_URL,$url);
$buffer = curl_exec($this->curlHandle);
if ($buffer === false) {
$this->error('Curl error : ' . curl_error($this->curlHandle));
return '';
}
file_put_contents($destination, $buffer);
return $destination;
}
protected function downloadPage($pageName, $version=0)
{
$spacename = $this->getOption('spacename');
$this->initCurl();
$url = empty($version)? "https://$spacename.wikispaces.com/space/dav/pages/$pageName" : "https://$spacename.wikispaces.com/space/dav/history/$pageName/$version";
curl_setopt($this->curlHandle, CURLOPT_URL,$url);
$buffer = curl_exec($this->curlHandle);
if ($buffer === false) {
$this->error('Curl error : ' . curl_error($this->curlHandle), 1);
return '';
}
return $buffer;
}
protected function importFile($file, $user)
{
$base = UtfNormal\Validator::cleanUp( wfBaseName( $file ) );
$title = Title::makeTitleSafe( NS_FILE, $base );
if ( !is_object( $title ) ) {
$this->output("{$base} could not be imported; a valid title cannot be produced\n" );
return;
}
$localFile = wfLocalFile( $title );
if ($localFile->exists()) {
if ($this->hasOption('overwrite') && $this->hasOption('force')) {
$this->output( "{$base} exists, overwriting..." );
} else {
$this->output( "{$base} exists, skipping\n" );
return;
}
}
$mwProps = new MWFileProps( MimeMagic::singleton() );
$props = $mwProps->getPropsFromPath( $file, true );
$publishOptions = [];
$handler = MediaHandler::getHandler($props['mime']);
if ( $handler ) {
$metadata = MediaWiki\quietCall( 'unserialize', $props['metadata'] );
$publishOptions['headers'] = $handler->getContentHeaders( $metadata );
} else {
$publishOptions['headers'] = [];
}
$archive = $localFile->publish( $file, 0, $publishOptions );
if ( !$archive->isGood() ) {
$this->output( "failed. (" .
$archive->getWikiText( false, false, 'en' ) .
")\n" );
}
$localFile->recordUpload2($archive->value, '', '', $props, false, $user);
}
protected function convertToMediaWiki(&$text) {
$replacements = array(
'**' => "'''", // Bold
'//**' => "'''''", // Bold and italic
'**//' => "'''''", // Bold and italic
'[[toc]]' => '',
'[[file' => '[[:File',
);
$text = strtr($text, $replacements);
// Italic
$text = preg_replace('/\/\/(.*)\/\//', '\'\'$1\'\'', $text);
// Underline
$text = preg_replace('/__(.*)__/', '<u>$1</u>', $text);
// Monospace
$text = preg_replace('/{{(.*)}}/', '<span style="font-family:monospace">$1</span>', $text);
// Escaping
$text = preg_replace('/``(.*)``/', '<nowiki>$1</nowiki>', $text);
// Convert tables
$text = preg_replace_callback('/(\|\|.+)(?=\n{2})/Us', 'ImportWikispacePages::convertTable', $text);
// Convert code blocks to be compatible with SyntaxHighlight_GeSHi extension
$text = preg_replace('/\[\[code format="(.+)"\]\](.+)\[\[code\]\]/Us', '<syntaxhighlight lang=$1>$2</syntaxhighlight>', $text);
$text = preg_replace('/\[\[code\]\](.+)\[\[code\]\]/Us', '<syntaxhighlight>$1</syntaxhighlight>', $text);
// Images
$text = preg_replace_callback('/\[\[image:([^[\s]+)\s*([^[]*)\]\]/', 'ImportWikispacePages::convertImage', $text);
}
protected static function convertImage($matches)
{
$args = explode(' ', $matches[2]);
$options = array(
'width' => '',
'height' => '',
'align' => '',
'caption' => '',
'link' => '',
);
$optionsStr = '';
foreach ($args as $arg) {
$pos = strpos($arg, '=');
$option = substr($arg, 0, $pos);
if (isset($options[$option])) {
$options[$option] = trim(substr($arg, $pos + 1), '"');
}
}
if (!empty($options['width']) && empty($options['height'])) {
$optionsStr .= '|' . $options['width'] . 'px';
}
elseif (!empty($options['height']) && empty($options['width'])) {
$optionsStr .= '|x' . $options['height'] . 'px';
}
elseif (!empty($options['width']) && !empty($options['height'])) {
$optionsStr .= '|' . $options['width'] . 'x' . $options['height'] . 'px';
}
if (!empty($options['align'])) {
$optionsStr .= '|' . $options['align'];
}
if (!empty($options['link'])) {
$optionsStr .= '|link=' . $options['link'];
}
if (!empty($options['caption'])) {
$optionsStr .= '|' . $options['caption'];
}
return "[[File:{$matches[1]}{$optionsStr}]]";
}
protected static function convertTable($matches)
{
$table = "{|\n";
$lines = explode("\n", $matches[1]);
foreach ($lines as $k => $line) {
if ($k > 0) {
$table .= "|-\n";
}
$line = '|' . trim($line, '|');
// Use bold to convert headers
$line = preg_replace('/~([^|]*)/', '\'\'\'$1\'\'\'', $line);
$table .= $line . "\n";
}
return $table . "|}\n";
}
protected function initCurl() {
if ($this->curlHandle === null) {
$user = $this->getOption('user');
$password = $this->getOption('password');
$this->curlHandle = curl_init();
// Don't check SSL certificate
curl_setopt($this->curlHandle, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($this->curlHandle, CURLOPT_RETURNTRANSFER, true);
curl_setopt($this->curlHandle, CURLOPT_USERPWD, $user . ':' . $password);
}
}
}
$maintClass = "ImportWikispacePages";
require_once RUN_MAINTENANCE_IF_MAIN;
@ve3meo
Copy link

ve3meo commented May 8, 2018

Thank you for doing this. I'm a Wikispaces Organizer and founder of the Wikispaces-Refugees forum on Groups.io but am poorly versed in MediaWiki and PHP. That didn't stop me from trying and I did succeed somewhat with an import as described starting at this posting. It imported vanilla Wikispaces pages well but not a full plate of features and syntax.

I'm not sure if this comment area is where I should be posting this feedback. Let me know where you would like it...

Tom

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment