|
var fs = require('fs-extra'); |
|
var wiki = require('nodemw'); |
|
var q = require('q'); |
|
var glob = require('glob'); |
|
var execSync = require('child_process').execSync; |
|
var logger = require('winston'); |
|
var argv = require('minimist')(process.argv.slice(2)); |
|
|
|
var client = new wiki({ |
|
server: 'wiki.server.com', |
|
port: 8080, |
|
path: '/mediawiki', |
|
debug: true, |
|
username: 'uploadmonkey', |
|
password: 'lol', |
|
userAgent: 'UploadMonkey/1.1 (http://thissitedoesntexist.com/UploadMonkey/; [email protected]) Node/0.12' |
|
}); |
|
|
|
var botLoggedIn = q.defer(); |
|
var watchMe = botLoggedIn.promise; |
|
|
|
client.logIn(function(err, data) { |
|
if(err) return; |
|
botLoggedIn.resolve(); |
|
}); |
|
|
|
var cglob = argv.glob || 'docs/*.docx'; |
|
var tags = argv.tags ? argv.tags.split(',') : []; |
|
var ignoreArticle = argv['ignore-article']; |
|
var ignoreImages = argv['ignore-images']; |
|
var ignoreOriginal = argv['ignore-original']; |
|
|
|
var stripPath = function(path) { |
|
return path.split('\\').pop().split('/').pop(); |
|
}; |
|
|
|
var stripExtension = function(file) { |
|
return file.split('.')[0]; |
|
}; |
|
|
|
var readAndReplaceImagesInFile = function(fileName) { |
|
|
|
var dirName = stripExtension(fileName); |
|
var justFileName = stripPath(dirName); |
|
|
|
// read everything into memory |
|
var doc = fs.readFileSync(dirName+'/word/document.xml'); |
|
var rels = fs.readFileSync(dirName+'/word/_rels/document.xml.rels'); |
|
var markdown = fs.readFileSync(dirName+'.txt'); |
|
|
|
// rip out the ordering of relationships from the docx |
|
var getRelationships = function(file) { |
|
var regex = /r:embed="(rId[0-9]+)"/g; |
|
var match = null; |
|
var matches = []; |
|
|
|
while( (match=regex.exec(file)) !== null) { |
|
matches.push(match[1]); |
|
} |
|
|
|
return matches; |
|
}; |
|
|
|
// rip out relationship / image pairs |
|
var getRelationshipTargets = function(file) { |
|
var regex = /Id="([\w]+)"\sType="[\w|\d|\.|\/|:]+"\sTarget="media\/([\w]+)\.\w{1,4}"/g; |
|
var match = null; |
|
var matchHash = {}; |
|
|
|
while( (match=regex.exec(file)) !== null) { |
|
matchHash[match[1]] = match[2]; |
|
} |
|
return matchHash; |
|
}; |
|
|
|
// get all possible relationships and their matching image |
|
var orderedRelationships = getRelationships(doc.toString()); |
|
var relToImg = getRelationshipTargets(rels.toString()); |
|
|
|
// naiive matching of image to position in document |
|
var currentMatch = 0; |
|
|
|
// regex replace all of the old, deprecated [[Image:]] with the newer [[File:]] and link to the image |
|
var replFile = markdown.toString().replace(/\[\[Image:]]/g, function(match, p1) { |
|
return "[[File:"+justFileName+"_"+(relToImg[orderedRelationships[currentMatch++]])+".png]]"; |
|
}); |
|
|
|
if(tags.length > 0) { |
|
replFile += '\n\n'; |
|
} |
|
|
|
tags.forEach(function(tag) { |
|
replFile += '[[Category:'+tag+']]\n'; |
|
}); |
|
|
|
return replFile; |
|
}; |
|
|
|
var upload = { |
|
|
|
// upload a file |
|
file: function(filename, text) { |
|
var realName = stripPath(stripExtension(filename)); |
|
|
|
logger.info('[article] Uploading ' + realName); |
|
client.edit(realName, text, 'summary', function(e, ret) { |
|
if(e) return; |
|
logger.info('[article] Successfully uploaded ' + realName); |
|
}); |
|
}, |
|
|
|
// upload all of the images for a file |
|
images: function(fileName) { |
|
var dirName = stripExtension(fileName); |
|
|
|
glob(dirName+'/word/media/*.png', {}, function(e, files) { |
|
|
|
files.forEach(function(filename) { |
|
var justFileName = stripPath(filename); |
|
var buffer = fs.readFileSync(filename); |
|
|
|
logger.info('[image] Uploading ' + justFileName); |
|
client.upload(justFileName, buffer, 'summary', function(e, data) { |
|
if(e) return; |
|
logger.info('[image] Successfully uploaded ' + justFileName); |
|
}); |
|
}); |
|
|
|
}); |
|
}, |
|
|
|
original: function(fileName) { |
|
var buffer = fs.readFileSync(fileName); |
|
|
|
logger.info('[original] Uploading ' + fileName); |
|
client.upload(stripPath(fileName), buffer, 'summary', function(e, data) { |
|
if(e) return; |
|
logger.info('[original] Successfully uploaded ' + fileName); |
|
}); |
|
} |
|
}; |
|
|
|
var sys = { |
|
|
|
// remove numbering.xml from all files, because it fucks up all shit right and proper |
|
precheck: function(file) { |
|
var dirName = stripExtension(file); |
|
|
|
logger.info('backing up '+file); |
|
fs.copySync(file, file+'.backup'); |
|
|
|
logger.info('unzipping '+file); |
|
execSync('unzip -o "' +file+ '" -d "' +dirName+ '"'); |
|
|
|
numberingFiles = glob.sync(dirName+'/**/numbering.*'); |
|
numberingFiles.forEach(function(file) { |
|
logger.info('removing '+file); |
|
fs.unlinkSync(file); |
|
}); |
|
|
|
logger.info('removing '+file); |
|
fs.unlinkSync(file); |
|
|
|
logger.info('rezipping '+file); |
|
execSync('cd "'+dirName+'" && zip -r "../' +stripPath(file)+ '" . && cd ../'); |
|
}, |
|
|
|
// convert docx to txt:MediaWiki using libreoffice |
|
convert: function(file) { |
|
logger.info('converting '+file); |
|
execSync('soffice --headless --convert-to txt:MediaWiki "'+file+ '" --outdir docs'); |
|
}, |
|
|
|
// convert all jp* to pngs |
|
fixImages: function(file) { |
|
var dirName = stripExtension(file); |
|
logger.info('making all jpgs into pngs for '+file); |
|
execSync('mogrify -format png "'+dirName+'/word/media/*.jp*"'); |
|
}, |
|
|
|
// rename all pngs so when uploaded there is no collision |
|
renameImages: function(file) { |
|
var dirName = stripExtension(file); |
|
var fileName = stripPath(dirName); |
|
|
|
logger.info('renaming all pngs (collision prevention) for '+file); |
|
execSync('mmv "'+dirName+'/word/media/*.png" "'+dirName+'/word/media/'+fileName+'_#1.png"'); |
|
}, |
|
|
|
// remove all jp* from the |
|
removeJpegs: function(file) { |
|
var dirName = stripExtension(file); |
|
logger.info('removing all jp*gs for '+file); |
|
|
|
jpgFiles = glob.sync(dirName+'/word/media/*.jp*'); |
|
jpgFiles.forEach(function(fileName) { |
|
logger.info('removing '+fileName); |
|
fs.unlinkSync(fileName); |
|
}); |
|
} |
|
|
|
}; |
|
|
|
// run a bunch of system calls |
|
var doSystemWorkOn = function(file) { |
|
sys.precheck(file); |
|
sys.convert(file); |
|
sys.fixImages(file); |
|
sys.renameImages(file); |
|
sys.removeJpegs(file); |
|
}; |
|
|
|
var files = glob.sync(cglob); |
|
|
|
files.forEach(function(file) { |
|
doSystemWorkOn(file); |
|
var fileText = readAndReplaceImagesInFile(file); |
|
|
|
watchMe.then(function() { |
|
|
|
if(!ignoreArticle) { |
|
upload.file(file, fileText); |
|
} |
|
|
|
if(!ignoreImages) { |
|
upload.images(file); |
|
} |
|
|
|
if(!ignoreOriginal) { |
|
upload.original(file); |
|
} |
|
|
|
}); |
|
|
|
}); |
Hello!
Could you explain how to use the Converter.js?
I am running the Convert.js with 'node Converter.js' and there is no error, but how can I convert my .docx in to a mediawiki?
My MediaWiki is on UbuntuServer 14.04.4.
Thank you and sorry for my bad English.