Skip to content

Instantly share code, notes, and snippets.

@livenson
Last active August 29, 2015 13:57
Show Gist options
  • Save livenson/9446675 to your computer and use it in GitHub Desktop.
Save livenson/9446675 to your computer and use it in GitHub Desktop.
A script for converting docx files into dokuwiki syntax
#!/usr/bin/perl
# convert docx to dokuwiki
# based on https://www.dokuwiki.org/tips:docpdf2dokuwiki
# requirements: libreoffice, HTML::WikiConverter
# running: ./todokuwiki.pl path/to/file/docx
use HTML::WikiConverter;
use File::Basename;
use File::Copy;
my $wc = new HTML::WikiConverter( dialect => 'DokuWiki' );
$txtpath = "/home/ilja/pages"; # location of generated .txt files
$imgpath = "/home/ilja/media"; # location of generated images
$file=$ARGV[0];
print "Processing ". $file ."\n";
$dirname = dirname($file);
$basename = basename($file);
$basename = lc $basename;
$file = "$dirname/$basename";
if ( $file=~ /docx*$/ ) {
# convert docx to html
`soffice --headless --convert-to html:"HTML" $file`
}
$htmfile = $file;
$htmfile =~ s/\..*//;
$htmfile = "$htmfile.html";
$newfile = $file;
$newfile =~ s/\..*//;
$newfile = "$newfile.txt";
if ( -e $htmfile ) {
open (MYFILE, ">>$newfile");
$input = $wc->html2wiki( file => $htmfile );
print MYFILE "$input";
close (MYFILE);
}
# copy parsed data to the prepared folders
$txtdirpath=$txtpath;
if ( ! -e $txtdirpath) {
system("mkdir", "$txtdirpath");
}
@txt = <$dirname/*.txt>;
foreach $txt (@txt) {
copy($txt, $txtdirpath);
}
$imgdirpath=$imgpath;
if ( ! -e $imgdirpath) {
system("mkdir", "$imgdirpath");
}
@img = <$dirname/*.png>;
foreach $img (@img) {
copy($img, $imgdirpath);
}
@img = <$dirname/*.jpg>;
foreach $img (@img) {
copy($img, $imgdirpath);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment