Created
September 5, 2011 02:09
-
-
Save silvioq/1193904 to your computer and use it in GitHub Desktop.
odf2pl
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
# vi: set cin sw=2: | |
use strict; | |
use warnings; | |
use constant mm => 25.4 / 72; | |
use constant in => 1 / 72; | |
use constant pt => 1; | |
use Archive::Extract; | |
use Data::Dumper; | |
use File::Temp qw/tempdir/; | |
use File::Spec::Functions; | |
use Mojo::DOM; | |
use DateTime; | |
my $file = $ARGV[0]; | |
usage() unless $file; | |
my $ae = Archive::Extract->new( archive => $file, type => 'zip' ); | |
my $tmpdir = tempdir( CLEANUP => 1 ); | |
$ae->extract( to => $tmpdir ); | |
my $content_file = catfile $tmpdir, 'content.xml'; | |
my $styles_file = catfile $tmpdir, 'styles.xml'; | |
my $outfile = $file; | |
if( !( $outfile =~ s/\.odt$/\.pdf/ ) ){ | |
usage(); | |
} | |
# Generate styles | |
open XML, "<" . $styles_file; | |
my $text = ""; | |
while( <XML> ){ | |
$text .= normalize_xml( $_ ) . "\n"; | |
} | |
close XML; | |
my $style_dom = Mojo::DOM->new( $text ); | |
# Generate content | |
open XML, "<" . $content_file; | |
$text = ""; | |
while( <XML> ){ | |
$text .= normalize_xml( $_ ) . "\n"; | |
} | |
close XML; | |
my $content_dom = Mojo::DOM->new( $text ); | |
# Init code | |
init_code(); | |
# Print page properties | |
my %pp = (); | |
page_properties(); | |
# Global styles | |
my %styles = (); # Text & Paragraph styles | |
my %gstyles = (); # Graphic styles | |
get_styles(); | |
# $Data::Dumper::Pad = "# "; | |
# print "# \n# Paragraph styles:\n"; | |
# print Dumper( \%styles ); | |
# print "#\n"; | |
# All elements | |
$content_dom->at( "office-body office-text" )->children->each( sub{ | |
my $e = shift; | |
get_paragraph($e) if( $e->type eq "text-p" ); | |
}); | |
# End! | |
print "\$pdf->saveas( '$outfile' );\n"; | |
text_block_code(); | |
sub normalize_xml{ | |
my $line = shift; | |
$line =~ s/\<draw\:/\<draw-/g ; | |
$line =~ s/\<text\:/\<text-/g ; | |
$line =~ s/\<style\:/\<style-/g ; | |
$line =~ s/\<office\:/\<office-/g ; | |
$line =~ s/\<\/draw\:/\<\/draw-/g; | |
$line =~ s/\<\/text\:/\<\/text-/g; | |
$line =~ s/\<\/style\:/\<\/style-/g; | |
$line =~ s/\<\/office\:/\<\/office-/g; | |
return $line; | |
} | |
sub get_page_properties{ | |
my $master = $style_dom->find( "office-master-styles style-master-page" )->first; | |
die "Can't find master syle page" unless $master; | |
my %pp; | |
my $style = $master->attrs( "style:page-layout-name" ); | |
$style_dom->find( "style-page-layout" )->each( sub{ | |
my $e = shift; | |
my $name = $e->attrs("style:name"); | |
if( $name eq $style ){ | |
my $prop = $e->find( "style-page-layout-properties" )->first; | |
die ( "Can't find page layout properties" ) unless $prop; | |
$pp{pw} = to_pixels( $prop->attrs( "fo:page-width" ) ); | |
$pp{ph} = to_pixels( $prop->attrs( "fo:page-height" ) ); | |
$pp{mt} = to_pixels( $prop->attrs( "fo:margin-top" ) ); | |
$pp{ml} = to_pixels( $prop->attrs( "fo:margin-left" ) ); | |
$pp{mr} = to_pixels( $prop->attrs( "fo:margin-right" ) ); | |
$pp{mb} = to_pixels( $prop->attrs( "fo:margin-bottom" ) ); | |
$pp{y1} = $pp{ph} - $pp{mt}; | |
$pp{x1} = $pp{ml}; | |
$pp{ww} = $pp{pw} - $pp{ml} - $pp{mr}; | |
$pp{wh} = $pp{ph} - $pp{mb} - $pp{mt}; | |
# fo:margin-top="0.7874in" fo:margin-bottom="0.7874in" fo:margin-left="0.7874in" fo:margin-right="0.7874in" | |
return 0; | |
} else { | |
return 1; | |
} | |
} ); | |
die "Can't find page style" unless $pp{pw}; | |
return %pp; | |
} | |
sub get_styles{ | |
return if %styles; | |
$style_dom->find( "style-default-style" )->each( sub{ | |
my $e = shift; | |
if( $e->attrs( "style:family" ) =~ /(paragraph)|(text)/ ){ | |
my $tp = $e->find( "style-text-properties" )->first; | |
my $pp = $e->find( "style-paragraph-properties" )->first; | |
die "Can't find style text properties for Standard style" unless $tp; | |
# die "Can't find style paragraph properties for Standard style" unless $pp; | |
$styles{Standard} = { | |
font => $tp->attrs( "style:font-name" ), | |
size => to_pixels( $tp->attrs( "fo:font-size" ) ), | |
align => $pp->attrs( "fo:text-align" ) || 'left', | |
color => $tp->attrs( "fo:color" ) || 'black', | |
}; | |
} elsif( $e->attrs( "style:family" ) eq "graphic" ){ | |
my $gp = $e->at( "style-graphic-properties" ); | |
die "Can't find style graph properties for Standard stype" unless $gp; | |
$gstyles{Standard} = { | |
'stroke-color' => $gp->attrs( "svg:stroke-color" ) || "black", | |
'fill-color' => $gp->attrs( "draw:fill-color" ) || "cyan", | |
'fill' => $gp->attrs( "draw:fill" ) || "true", | |
'draw-stroke' => $gp->attrs( "draw:stroke" ) || "none", | |
} | |
} else { | |
}; | |
} ); | |
$content_dom->find( "office-automatic-styles style-style" )->each( sub{ | |
my $e = shift; | |
my $name = $e->attrs("style:name"); | |
if( $e->attrs( "style:family" ) =~ /(paragraph)|(text)/ ){ | |
my $tp = $e->find( "style-text-properties" )->first; | |
my $pp = $e->find( "style-paragraph-properties" )->first; | |
$styles{$name} = { | |
font => $tp ? $tp->attrs( "style:font-name" ) : $styles{Standard}->{font}, | |
size => $tp && $tp->attrs( "fo:font-size" ) ? to_pixels( $tp->attrs( "fo:font-size" ) ) : | |
$styles{Standard}->{size}, | |
align => $pp ? $pp->attrs( "fo:text-align" ) || $styles{Standard}->{align} : $styles{Standard}->{align}, | |
color => $tp ? $tp->attrs( "fo:color" ) || $styles{Standard}->{color} : $styles{Standard}->{color}, | |
} | |
} elsif( $e->attrs( "style:family" ) eq "graphic" ){ | |
my $gp = $e->at( "style-graphic-properties" ); | |
die "Can't find style graph properties for $name stype" unless $gp; | |
$gstyles{$name} = { | |
'stroke-color' => $gp->attrs( "svg:stroke-color" ) || "black", | |
'draw-stroke' => $gp->attrs( "draw:stroke" ) || "none", | |
'fill-color' => $gp->attrs( "draw:fill-color" ) || "cyan", | |
'fill' => $gp->attrs( "draw:fill" ) || "true", | |
} | |
} else { | |
}; | |
} ); | |
} | |
sub to_pixels($){ | |
my $val = shift; | |
if( $val =~ s/in$// ){ | |
return int($val / in); | |
} elsif ( $val =~ s/mm$// ) { | |
return int($val / mm); | |
} elsif ( $val =~ s/pt$// ) { | |
return $val; | |
} else { | |
return $val; | |
} | |
} | |
sub get_frame($){ | |
my $frame = shift; | |
my $w = to_pixels( $frame->attrs( "svg:width" ) ); | |
my $h = to_pixels( $frame->attrs( "svg:height" ) ); | |
my $x = to_pixels( $frame->attrs( "svg:x" ) ); | |
my $y = to_pixels( $frame->attrs( "svg:y" ) ); | |
my $atype = $frame->attrs( "text:anchor-type" ); | |
my $text = $frame->at( 'text-p' ); | |
return unless $text; | |
my $style = $text->attrs( "text:style-name" ) || "Standard"; | |
die "Can't use style $style" if( !$styles{$style} ); | |
my $align = $styles{$style}->{'align'}; | |
my $size = $styles{$style}->{'size'}; | |
cr(); | |
if( $text->text ){ | |
if( $atype eq "paragraph" ){ | |
print "\$font = \$pdf->corefont( '". $styles{$style}->{font} . "' );\n"; | |
print "\$pdf->textstart;\n"; | |
print "\$pdf->textfont( \$font, " . $styles{$style}->{size} . " );\n"; | |
print_block( -t => $text->text, -x => $x + $pp{x1}, | |
-y => $y, -w => $pp{ww}, -h => $pp{wh}, | |
-align => $align, -size => $size, | |
-color => $styles{$style}->{color}, | |
-paragraph => 1 ); | |
} else { | |
die "Can't use frame when anchor type is $atype" ; | |
} | |
} | |
$text->children->each(sub{ | |
my $e = shift; | |
return unless $e->text; | |
my $style = $e->attrs( "text:style-name" ) || "Standard"; | |
die "Can't use style $style" if( !$styles{$style} ); | |
my $align = $styles{$style}->{'align'}; | |
my $size = $styles{$style}->{'size'}; | |
if( $atype eq "paragraph" ){ | |
print "\$font = \$pdf->corefont( '". $styles{$style}->{font} . "' );\n"; | |
print "\$pdf->textstart;\n"; | |
print "\$pdf->textfont( \$font, " . $styles{$style}->{size} . " );\n"; | |
print_block( -t => $e->text, -x => $x + $pp{x1}, | |
-y => $y, -w => $pp{ww}, -h => $pp{wh}, | |
-align => $align, -size => $size, | |
-color => $styles{$style}->{color}, | |
-paragraph => 1 ); | |
} else { | |
die "Can't use frame when anchor type is $atype" ; | |
} | |
} ); | |
} | |
# -x => xpos | |
# -y => ypos | |
# -w => width | |
# -h => height | |
# -advance => advance ypos | |
# -t => text | |
# -align => | |
# -size => text size | |
# -color | |
# -paragraph => (relative to paragraph) | |
# -parspace | |
sub print_block(%){ | |
my %h = @_; | |
print "print 'Printing: ". $h{-t} . "' . \"\\n\";\n"; | |
if( !$h{-t} ){ | |
print "\$ypos -= " . $h{-size} . ";\n" if $h{-advance}; | |
return; | |
} | |
# print "\$pdf->textstart;\n"; | |
print "do{\n"; | |
print " \$pdf->fillcolor( '" . $h{-color} . "' );\n" if $h{-color}; | |
print " my (\$endw,\$newy) = text_block( \$pdf->{gfx}, '". ( $h{-t} || ' ' ) . "', \n"; | |
if( $h{-paragraph} ){ | |
print " -y => \$ypos - " . $h{-y} . " - " . $h{-size} . ",\n"; | |
print " -x => \$xpos + " . $h{-x} . ", \n"; | |
} else { | |
print " -y => " . $h{-y} . ",\n"; | |
print " -x => " . $h{-x} . ", \n"; | |
} | |
print " -w => " . $h{-w} . ", \n"; | |
print " -h => " . $h{-h} . ", \n"; | |
print " -parspace => " . ( $h{-parspace} || 0 ) . ", \n"; | |
print " -align => '" . $h{-align} ."', \n"; | |
print " -lead => " . $h{-size} . " );\n"; | |
print " \$ypos = \$newy;\n" if $h{-advance}; | |
print " \$xpos = \$endw || 0;\n" if $h{-advance}; | |
print "};\n"; | |
print "\$pdf->textend;\n"; | |
} | |
# | |
sub cr{ print "\$xpos = 0;\n"; } | |
sub get_line($){ | |
my $frame = shift; | |
my $x1 = to_pixels( $frame->attrs( "svg:x1" ) ); | |
my $x2 = to_pixels( $frame->attrs( "svg:x2" ) ); | |
my $y1 = to_pixels( $frame->attrs( "svg:y1" ) ); | |
my $y2 = to_pixels( $frame->attrs( "svg:y2" ) ); | |
my $atype = $frame->attrs( "text:anchor-type" ); | |
my $style = $frame->attrs( "draw:style-name" ); | |
if( $atype eq "paragraph" ){ | |
die "Can't find $style graphic style" unless $gstyles{$style}; | |
my $dash; | |
if( $gstyles{$style}->{'draw-stroke'} eq 'dash' ){ | |
$dash = 5; | |
} else { | |
$dash = ""; | |
} | |
print "\$pdf->strokecolor( '" . $gstyles{$style}->{'stroke-color'} . "') ;\n"; | |
print "\$pdf->move( $x1 + $pp{x1}, \$ypos - $y1 );\n"; | |
print "\$pdf->linedash( $dash );\n"; | |
print "\$pdf->line( $x2 + $pp{x1}, \$ypos - $y2 );\n"; | |
print "\$pdf->stroke;\n"; | |
} | |
} | |
sub get_rect($){ | |
my $frame = shift; | |
my $atype = $frame->attrs( "text:anchor-type" ); | |
my $w = to_pixels( $frame->attrs( "svg:width" ) ); | |
my $h = to_pixels( $frame->attrs( "svg:height" ) ); | |
my $x = to_pixels( $frame->attrs( "svg:x" ) ); | |
my $y = to_pixels( $frame->attrs( "svg:y" ) ); | |
my $style = $frame->attrs( "draw:style-name" ); | |
if( $atype eq "paragraph" ){ | |
die "Can't find $style graphic style" unless $gstyles{$style}; | |
print "\$pdf->strokecolor( '" . $gstyles{$style}->{'stroke-color'} . "') ;\n"; | |
print "\$pdf->fillcolor( '" . $gstyles{$style}->{'fill-color'} . "') ;\n" if $gstyles{$style}->{'fill'} ne 'none'; | |
print "\$pdf->rect( $x + $pp{x1}, \$ypos - $y - $h, $w, $h );\n"; | |
if( $gstyles{$style}->{'fill'} ne 'none'){ | |
print "\$pdf->fillstroke;\n"; | |
} else { | |
print "\$pdf->stroke;\n"; | |
} | |
} | |
get_frame($frame); | |
} | |
sub get_paragraph($){ | |
my $p = shift; | |
my $drawed = 0; | |
my $s = $p->attrs( "text:style-name" ) || "Standard"; | |
my $ss = $styles{$s}; | |
die "Can't find $s style " unless $ss; | |
print "\$font = \$pdf->corefont( '". $ss->{font} . "' );\n"; | |
print "\$pdf->textstart;\n"; | |
print "\$pdf->textfont( \$font, " . $ss->{size} ." );\n"; | |
$p->children->each(sub{ | |
my $e = shift; | |
$drawed = 1; | |
get_frame($e) if( $e->type eq "draw-frame" ); | |
get_line($e) if( $e->type eq "draw-line" ); | |
get_rect($e) if( $e->type eq "draw-rect" ); | |
} | |
); | |
my $t = $p->text; | |
my $size = $ss->{size}; | |
cr(); | |
print_block( -t => $t, -color => $ss->{color}, | |
-align => $ss->{align}, -advance => 1, | |
-size => $size, -x => $pp{ml}, | |
-y => 0, | |
-w => $pp{ww}, -h => $pp{wh}, | |
-paragraph => 1, | |
); | |
# print "print 'Text: $t' . \"\\n\";\n"; | |
# print "do{\n"; | |
# print " my (\$endw,\$newy) = text_block( \$pdf->{gfx}, '". $t . "', \n"; | |
# print " -x => $pp{ml}, \n"; | |
# print " -y => \$ypos - $size, \n"; | |
# print " -w => $pp{ww}, -h => $pp{wh}, \n"; | |
# print " -color => '" . $ss->{color} . "',\n"; | |
# print " -align => '" . $ss->{align} . "', -lead => $size );\n"; | |
# print " \$ypos = \$newy;\n"; | |
# print "};\n"; | |
} | |
# Code section! | |
sub init_code{ | |
print "# ---------------------------------------------------\n"; | |
print "# odt to pdf. Generated at " . DateTime->now->strftime( "%Y-%m-%d %H:%M:%S" ) . "\n" ; | |
print "# ---------------------------------------------------\n"; | |
print "# vi: set cin sw=2:\n"; | |
print "use strict;\n"; | |
print "use warnings;\n"; | |
print "use utf8;\n"; | |
print "use PDF::API2::Lite;\n\n\n"; | |
print "\nmy \$pdf = PDF::API2::Lite->new;\n"; | |
print "my \$font;\n"; | |
print "my \$ypos;\n"; | |
print "my \$xpos;\n"; | |
} | |
sub page_properties{ | |
%pp = get_page_properties unless %pp; | |
print "\$pdf->page( " . $pp{pw} . ", " . $pp{ph} . ");\n"; | |
print "\$ypos = $pp{y1};\n"; | |
# print "\$pdf->transform( -translate => [" . $pp{x1} . ", " . $pp{y1} . " ] );\n"; | |
} | |
sub usage{ | |
print "Usage:\n $0 document.odt\n"; | |
exit(1); | |
} | |
sub text_block_code{ | |
print <<EOF | |
# COPYRIGHT Notice | |
# text_block() is © Rick Measham, 2004-2007. The latest version can be found in the tutorial located at http://rick.measham.id.au/pdf-api2/ | |
sub text_block { | |
my \$text_object = shift; | |
my \$text = shift; | |
my %arg = \@_; | |
my \$endw; | |
# Get the text in paragraphs | |
my \@paragraphs = split( /\\n/, \$text ); | |
# calculate width of all words | |
my \$space_width = \$text_object->advancewidth(' '); | |
my \@words = split( /\\s+/, \$text ); | |
my %width = (); | |
foreach (\@words) { | |
next if exists \$width{\$_}; | |
\$width{\$_} = \$text_object->advancewidth(\$_); | |
} | |
my \$ypos = \$arg{'-y'}; | |
my \@paragraph = split( / /, shift(\@paragraphs) ); | |
my \$first_line = 1; | |
my \$first_paragraph = 1; | |
# while we can add another line | |
while ( \$ypos >= \$arg{'-y'} - \$arg{'-h'} + \$arg{'-lead'} ) { | |
unless (\@paragraph) { | |
last unless scalar \@paragraphs; | |
\@paragraph = split( / /, shift(\@paragraphs) ); | |
\$ypos -= \$arg{'-parspace'} if \$arg{'-parspace'}; | |
last unless \$ypos >= \$arg{'-y'} - \$arg{'-h'}; | |
\$first_line = 1; | |
\$first_paragraph = 0; | |
} | |
my \$xpos = \$arg{'-x'}; | |
# while there's room on the line, add another word | |
my \@line = (); | |
my \$line_width = 0; | |
if ( \$first_line && exists \$arg{'-hang'} ) { | |
my \$hang_width = \$text_object->advancewidth( \$arg{'-hang'} ); | |
\$text_object->translate( \$xpos, \$ypos ); | |
\$text_object->text( \$arg{'-hang'} ); | |
\$xpos += \$hang_width; | |
\$line_width += \$hang_width; | |
\$arg{'-indent'} += \$hang_width if \$first_paragraph; | |
} | |
elsif ( \$first_line && exists \$arg{'-flindent'} ) { | |
\$xpos += \$arg{'-flindent'}; | |
\$line_width += \$arg{'-flindent'}; | |
} | |
elsif ( \$first_paragraph && exists \$arg{'-fpindent'} ) { | |
\$xpos += \$arg{'-fpindent'}; | |
\$line_width += \$arg{'-fpindent'}; | |
} | |
elsif ( exists \$arg{'-indent'} ) { | |
\$xpos += \$arg{'-indent'}; | |
\$line_width += \$arg{'-indent'}; | |
} | |
while ( \@paragraph | |
and \$line_width + ( scalar(\@line) * \$space_width ) + | |
\$width{ \$paragraph[0] } < \$arg{'-w'} ) | |
{ | |
\$line_width += \$width{ \$paragraph[0] }; | |
push( \@line, shift(\@paragraph) ); | |
} | |
# calculate the space width | |
my ( \$wordspace, \$align ); | |
if ( \$arg{'-align'} eq 'fulljustify' | |
or ( \$arg{'-align'} eq 'justify' and \@paragraph ) ) | |
{ | |
if ( scalar(\@line) == 1 ) { | |
\@line = split( //, \$line[0] ); | |
} | |
\$wordspace = ( \$arg{'-w'} - \$line_width ) / ( scalar(\@line) - 1 ); | |
\$align = 'justify'; | |
} | |
else { | |
\$align = ( \$arg{'-align'} eq 'justify' ) ? 'left' : \$arg{'-align'}; | |
\$wordspace = \$space_width; | |
} | |
\$line_width += \$wordspace * ( scalar(\@line) - 1 ); | |
if ( \$align eq 'justify' ) { | |
foreach my \$word (\@line) { | |
\$text_object->translate( \$xpos, \$ypos ); | |
\$text_object->text(\$word); | |
\$xpos += ( \$width{\$word} + \$wordspace ) if (\@line); | |
} | |
\$endw = \$arg{'-w'}; | |
} | |
else { | |
# calculate the left hand position of the line | |
if ( \$align eq 'right' ) { | |
\$xpos += \$arg{'-w'} - \$line_width; | |
} | |
elsif ( \$align eq 'center' ) { | |
\$xpos += ( \$arg{'-w'} / 2 ) - ( \$line_width / 2 ); | |
} | |
# render the line | |
\$text_object->translate( \$xpos, \$ypos ); | |
\$endw = \$text_object->text( join( ' ', \@line ) ); | |
} | |
\$ypos -= \$arg{'-lead'}; | |
\$first_line = 0; | |
} | |
unshift( \@paragraphs, join( ' ', \@paragraph ) ) if scalar(\@paragraph); | |
return ( \$endw, \$ypos, join( "\\n", \@paragraphs ) ) | |
} | |
EOF | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment