Skip to content

Instantly share code, notes, and snippets.

@silvioq
Created September 5, 2011 02:09
Show Gist options
  • Save silvioq/1193904 to your computer and use it in GitHub Desktop.
Save silvioq/1193904 to your computer and use it in GitHub Desktop.
odf2pl
#!/usr/bin/perl
# vi: set cin sw=2:
use strict;
use warnings;
use constant mm => 25.4 / 72;
use constant in => 1 / 72;
use constant pt => 1;
use Archive::Extract;
use Data::Dumper;
use File::Temp qw/tempdir/;
use File::Spec::Functions;
use Mojo::DOM;
use DateTime;
my $file = $ARGV[0];
usage() unless $file;
my $ae = Archive::Extract->new( archive => $file, type => 'zip' );
my $tmpdir = tempdir( CLEANUP => 1 );
$ae->extract( to => $tmpdir );
my $content_file = catfile $tmpdir, 'content.xml';
my $styles_file = catfile $tmpdir, 'styles.xml';
my $outfile = $file;
if( !( $outfile =~ s/\.odt$/\.pdf/ ) ){
usage();
}
# Generate styles
open XML, "<" . $styles_file;
my $text = "";
while( <XML> ){
$text .= normalize_xml( $_ ) . "\n";
}
close XML;
my $style_dom = Mojo::DOM->new( $text );
# Generate content
open XML, "<" . $content_file;
$text = "";
while( <XML> ){
$text .= normalize_xml( $_ ) . "\n";
}
close XML;
my $content_dom = Mojo::DOM->new( $text );
# Init code
init_code();
# Print page properties
my %pp = ();
page_properties();
# Global styles
my %styles = (); # Text & Paragraph styles
my %gstyles = (); # Graphic styles
get_styles();
# $Data::Dumper::Pad = "# ";
# print "# \n# Paragraph styles:\n";
# print Dumper( \%styles );
# print "#\n";
# All elements
$content_dom->at( "office-body office-text" )->children->each( sub{
my $e = shift;
get_paragraph($e) if( $e->type eq "text-p" );
});
# End!
print "\$pdf->saveas( '$outfile' );\n";
text_block_code();
sub normalize_xml{
my $line = shift;
$line =~ s/\<draw\:/\<draw-/g ;
$line =~ s/\<text\:/\<text-/g ;
$line =~ s/\<style\:/\<style-/g ;
$line =~ s/\<office\:/\<office-/g ;
$line =~ s/\<\/draw\:/\<\/draw-/g;
$line =~ s/\<\/text\:/\<\/text-/g;
$line =~ s/\<\/style\:/\<\/style-/g;
$line =~ s/\<\/office\:/\<\/office-/g;
return $line;
}
sub get_page_properties{
my $master = $style_dom->find( "office-master-styles style-master-page" )->first;
die "Can't find master syle page" unless $master;
my %pp;
my $style = $master->attrs( "style:page-layout-name" );
$style_dom->find( "style-page-layout" )->each( sub{
my $e = shift;
my $name = $e->attrs("style:name");
if( $name eq $style ){
my $prop = $e->find( "style-page-layout-properties" )->first;
die ( "Can't find page layout properties" ) unless $prop;
$pp{pw} = to_pixels( $prop->attrs( "fo:page-width" ) );
$pp{ph} = to_pixels( $prop->attrs( "fo:page-height" ) );
$pp{mt} = to_pixels( $prop->attrs( "fo:margin-top" ) );
$pp{ml} = to_pixels( $prop->attrs( "fo:margin-left" ) );
$pp{mr} = to_pixels( $prop->attrs( "fo:margin-right" ) );
$pp{mb} = to_pixels( $prop->attrs( "fo:margin-bottom" ) );
$pp{y1} = $pp{ph} - $pp{mt};
$pp{x1} = $pp{ml};
$pp{ww} = $pp{pw} - $pp{ml} - $pp{mr};
$pp{wh} = $pp{ph} - $pp{mb} - $pp{mt};
# fo:margin-top="0.7874in" fo:margin-bottom="0.7874in" fo:margin-left="0.7874in" fo:margin-right="0.7874in"
return 0;
} else {
return 1;
}
} );
die "Can't find page style" unless $pp{pw};
return %pp;
}
sub get_styles{
return if %styles;
$style_dom->find( "style-default-style" )->each( sub{
my $e = shift;
if( $e->attrs( "style:family" ) =~ /(paragraph)|(text)/ ){
my $tp = $e->find( "style-text-properties" )->first;
my $pp = $e->find( "style-paragraph-properties" )->first;
die "Can't find style text properties for Standard style" unless $tp;
# die "Can't find style paragraph properties for Standard style" unless $pp;
$styles{Standard} = {
font => $tp->attrs( "style:font-name" ),
size => to_pixels( $tp->attrs( "fo:font-size" ) ),
align => $pp->attrs( "fo:text-align" ) || 'left',
color => $tp->attrs( "fo:color" ) || 'black',
};
} elsif( $e->attrs( "style:family" ) eq "graphic" ){
my $gp = $e->at( "style-graphic-properties" );
die "Can't find style graph properties for Standard stype" unless $gp;
$gstyles{Standard} = {
'stroke-color' => $gp->attrs( "svg:stroke-color" ) || "black",
'fill-color' => $gp->attrs( "draw:fill-color" ) || "cyan",
'fill' => $gp->attrs( "draw:fill" ) || "true",
'draw-stroke' => $gp->attrs( "draw:stroke" ) || "none",
}
} else {
};
} );
$content_dom->find( "office-automatic-styles style-style" )->each( sub{
my $e = shift;
my $name = $e->attrs("style:name");
if( $e->attrs( "style:family" ) =~ /(paragraph)|(text)/ ){
my $tp = $e->find( "style-text-properties" )->first;
my $pp = $e->find( "style-paragraph-properties" )->first;
$styles{$name} = {
font => $tp ? $tp->attrs( "style:font-name" ) : $styles{Standard}->{font},
size => $tp && $tp->attrs( "fo:font-size" ) ? to_pixels( $tp->attrs( "fo:font-size" ) ) :
$styles{Standard}->{size},
align => $pp ? $pp->attrs( "fo:text-align" ) || $styles{Standard}->{align} : $styles{Standard}->{align},
color => $tp ? $tp->attrs( "fo:color" ) || $styles{Standard}->{color} : $styles{Standard}->{color},
}
} elsif( $e->attrs( "style:family" ) eq "graphic" ){
my $gp = $e->at( "style-graphic-properties" );
die "Can't find style graph properties for $name stype" unless $gp;
$gstyles{$name} = {
'stroke-color' => $gp->attrs( "svg:stroke-color" ) || "black",
'draw-stroke' => $gp->attrs( "draw:stroke" ) || "none",
'fill-color' => $gp->attrs( "draw:fill-color" ) || "cyan",
'fill' => $gp->attrs( "draw:fill" ) || "true",
}
} else {
};
} );
}
sub to_pixels($){
my $val = shift;
if( $val =~ s/in$// ){
return int($val / in);
} elsif ( $val =~ s/mm$// ) {
return int($val / mm);
} elsif ( $val =~ s/pt$// ) {
return $val;
} else {
return $val;
}
}
sub get_frame($){
my $frame = shift;
my $w = to_pixels( $frame->attrs( "svg:width" ) );
my $h = to_pixels( $frame->attrs( "svg:height" ) );
my $x = to_pixels( $frame->attrs( "svg:x" ) );
my $y = to_pixels( $frame->attrs( "svg:y" ) );
my $atype = $frame->attrs( "text:anchor-type" );
my $text = $frame->at( 'text-p' );
return unless $text;
my $style = $text->attrs( "text:style-name" ) || "Standard";
die "Can't use style $style" if( !$styles{$style} );
my $align = $styles{$style}->{'align'};
my $size = $styles{$style}->{'size'};
cr();
if( $text->text ){
if( $atype eq "paragraph" ){
print "\$font = \$pdf->corefont( '". $styles{$style}->{font} . "' );\n";
print "\$pdf->textstart;\n";
print "\$pdf->textfont( \$font, " . $styles{$style}->{size} . " );\n";
print_block( -t => $text->text, -x => $x + $pp{x1},
-y => $y, -w => $pp{ww}, -h => $pp{wh},
-align => $align, -size => $size,
-color => $styles{$style}->{color},
-paragraph => 1 );
} else {
die "Can't use frame when anchor type is $atype" ;
}
}
$text->children->each(sub{
my $e = shift;
return unless $e->text;
my $style = $e->attrs( "text:style-name" ) || "Standard";
die "Can't use style $style" if( !$styles{$style} );
my $align = $styles{$style}->{'align'};
my $size = $styles{$style}->{'size'};
if( $atype eq "paragraph" ){
print "\$font = \$pdf->corefont( '". $styles{$style}->{font} . "' );\n";
print "\$pdf->textstart;\n";
print "\$pdf->textfont( \$font, " . $styles{$style}->{size} . " );\n";
print_block( -t => $e->text, -x => $x + $pp{x1},
-y => $y, -w => $pp{ww}, -h => $pp{wh},
-align => $align, -size => $size,
-color => $styles{$style}->{color},
-paragraph => 1 );
} else {
die "Can't use frame when anchor type is $atype" ;
}
} );
}
# -x => xpos
# -y => ypos
# -w => width
# -h => height
# -advance => advance ypos
# -t => text
# -align =>
# -size => text size
# -color
# -paragraph => (relative to paragraph)
# -parspace
sub print_block(%){
my %h = @_;
print "print 'Printing: ". $h{-t} . "' . \"\\n\";\n";
if( !$h{-t} ){
print "\$ypos -= " . $h{-size} . ";\n" if $h{-advance};
return;
}
# print "\$pdf->textstart;\n";
print "do{\n";
print " \$pdf->fillcolor( '" . $h{-color} . "' );\n" if $h{-color};
print " my (\$endw,\$newy) = text_block( \$pdf->{gfx}, '". ( $h{-t} || ' ' ) . "', \n";
if( $h{-paragraph} ){
print " -y => \$ypos - " . $h{-y} . " - " . $h{-size} . ",\n";
print " -x => \$xpos + " . $h{-x} . ", \n";
} else {
print " -y => " . $h{-y} . ",\n";
print " -x => " . $h{-x} . ", \n";
}
print " -w => " . $h{-w} . ", \n";
print " -h => " . $h{-h} . ", \n";
print " -parspace => " . ( $h{-parspace} || 0 ) . ", \n";
print " -align => '" . $h{-align} ."', \n";
print " -lead => " . $h{-size} . " );\n";
print " \$ypos = \$newy;\n" if $h{-advance};
print " \$xpos = \$endw || 0;\n" if $h{-advance};
print "};\n";
print "\$pdf->textend;\n";
}
#
sub cr{ print "\$xpos = 0;\n"; }
sub get_line($){
my $frame = shift;
my $x1 = to_pixels( $frame->attrs( "svg:x1" ) );
my $x2 = to_pixels( $frame->attrs( "svg:x2" ) );
my $y1 = to_pixels( $frame->attrs( "svg:y1" ) );
my $y2 = to_pixels( $frame->attrs( "svg:y2" ) );
my $atype = $frame->attrs( "text:anchor-type" );
my $style = $frame->attrs( "draw:style-name" );
if( $atype eq "paragraph" ){
die "Can't find $style graphic style" unless $gstyles{$style};
my $dash;
if( $gstyles{$style}->{'draw-stroke'} eq 'dash' ){
$dash = 5;
} else {
$dash = "";
}
print "\$pdf->strokecolor( '" . $gstyles{$style}->{'stroke-color'} . "') ;\n";
print "\$pdf->move( $x1 + $pp{x1}, \$ypos - $y1 );\n";
print "\$pdf->linedash( $dash );\n";
print "\$pdf->line( $x2 + $pp{x1}, \$ypos - $y2 );\n";
print "\$pdf->stroke;\n";
}
}
sub get_rect($){
my $frame = shift;
my $atype = $frame->attrs( "text:anchor-type" );
my $w = to_pixels( $frame->attrs( "svg:width" ) );
my $h = to_pixels( $frame->attrs( "svg:height" ) );
my $x = to_pixels( $frame->attrs( "svg:x" ) );
my $y = to_pixels( $frame->attrs( "svg:y" ) );
my $style = $frame->attrs( "draw:style-name" );
if( $atype eq "paragraph" ){
die "Can't find $style graphic style" unless $gstyles{$style};
print "\$pdf->strokecolor( '" . $gstyles{$style}->{'stroke-color'} . "') ;\n";
print "\$pdf->fillcolor( '" . $gstyles{$style}->{'fill-color'} . "') ;\n" if $gstyles{$style}->{'fill'} ne 'none';
print "\$pdf->rect( $x + $pp{x1}, \$ypos - $y - $h, $w, $h );\n";
if( $gstyles{$style}->{'fill'} ne 'none'){
print "\$pdf->fillstroke;\n";
} else {
print "\$pdf->stroke;\n";
}
}
get_frame($frame);
}
sub get_paragraph($){
my $p = shift;
my $drawed = 0;
my $s = $p->attrs( "text:style-name" ) || "Standard";
my $ss = $styles{$s};
die "Can't find $s style " unless $ss;
print "\$font = \$pdf->corefont( '". $ss->{font} . "' );\n";
print "\$pdf->textstart;\n";
print "\$pdf->textfont( \$font, " . $ss->{size} ." );\n";
$p->children->each(sub{
my $e = shift;
$drawed = 1;
get_frame($e) if( $e->type eq "draw-frame" );
get_line($e) if( $e->type eq "draw-line" );
get_rect($e) if( $e->type eq "draw-rect" );
}
);
my $t = $p->text;
my $size = $ss->{size};
cr();
print_block( -t => $t, -color => $ss->{color},
-align => $ss->{align}, -advance => 1,
-size => $size, -x => $pp{ml},
-y => 0,
-w => $pp{ww}, -h => $pp{wh},
-paragraph => 1,
);
# print "print 'Text: $t' . \"\\n\";\n";
# print "do{\n";
# print " my (\$endw,\$newy) = text_block( \$pdf->{gfx}, '". $t . "', \n";
# print " -x => $pp{ml}, \n";
# print " -y => \$ypos - $size, \n";
# print " -w => $pp{ww}, -h => $pp{wh}, \n";
# print " -color => '" . $ss->{color} . "',\n";
# print " -align => '" . $ss->{align} . "', -lead => $size );\n";
# print " \$ypos = \$newy;\n";
# print "};\n";
}
# Code section!
sub init_code{
print "# ---------------------------------------------------\n";
print "# odt to pdf. Generated at " . DateTime->now->strftime( "%Y-%m-%d %H:%M:%S" ) . "\n" ;
print "# ---------------------------------------------------\n";
print "# vi: set cin sw=2:\n";
print "use strict;\n";
print "use warnings;\n";
print "use utf8;\n";
print "use PDF::API2::Lite;\n\n\n";
print "\nmy \$pdf = PDF::API2::Lite->new;\n";
print "my \$font;\n";
print "my \$ypos;\n";
print "my \$xpos;\n";
}
sub page_properties{
%pp = get_page_properties unless %pp;
print "\$pdf->page( " . $pp{pw} . ", " . $pp{ph} . ");\n";
print "\$ypos = $pp{y1};\n";
# print "\$pdf->transform( -translate => [" . $pp{x1} . ", " . $pp{y1} . " ] );\n";
}
sub usage{
print "Usage:\n $0 document.odt\n";
exit(1);
}
sub text_block_code{
print <<EOF
# COPYRIGHT Notice
# text_block() is © Rick Measham, 2004-2007. The latest version can be found in the tutorial located at http://rick.measham.id.au/pdf-api2/
sub text_block {
my \$text_object = shift;
my \$text = shift;
my %arg = \@_;
my \$endw;
# Get the text in paragraphs
my \@paragraphs = split( /\\n/, \$text );
# calculate width of all words
my \$space_width = \$text_object->advancewidth(' ');
my \@words = split( /\\s+/, \$text );
my %width = ();
foreach (\@words) {
next if exists \$width{\$_};
\$width{\$_} = \$text_object->advancewidth(\$_);
}
my \$ypos = \$arg{'-y'};
my \@paragraph = split( / /, shift(\@paragraphs) );
my \$first_line = 1;
my \$first_paragraph = 1;
# while we can add another line
while ( \$ypos >= \$arg{'-y'} - \$arg{'-h'} + \$arg{'-lead'} ) {
unless (\@paragraph) {
last unless scalar \@paragraphs;
\@paragraph = split( / /, shift(\@paragraphs) );
\$ypos -= \$arg{'-parspace'} if \$arg{'-parspace'};
last unless \$ypos >= \$arg{'-y'} - \$arg{'-h'};
\$first_line = 1;
\$first_paragraph = 0;
}
my \$xpos = \$arg{'-x'};
# while there's room on the line, add another word
my \@line = ();
my \$line_width = 0;
if ( \$first_line && exists \$arg{'-hang'} ) {
my \$hang_width = \$text_object->advancewidth( \$arg{'-hang'} );
\$text_object->translate( \$xpos, \$ypos );
\$text_object->text( \$arg{'-hang'} );
\$xpos += \$hang_width;
\$line_width += \$hang_width;
\$arg{'-indent'} += \$hang_width if \$first_paragraph;
}
elsif ( \$first_line && exists \$arg{'-flindent'} ) {
\$xpos += \$arg{'-flindent'};
\$line_width += \$arg{'-flindent'};
}
elsif ( \$first_paragraph && exists \$arg{'-fpindent'} ) {
\$xpos += \$arg{'-fpindent'};
\$line_width += \$arg{'-fpindent'};
}
elsif ( exists \$arg{'-indent'} ) {
\$xpos += \$arg{'-indent'};
\$line_width += \$arg{'-indent'};
}
while ( \@paragraph
and \$line_width + ( scalar(\@line) * \$space_width ) +
\$width{ \$paragraph[0] } < \$arg{'-w'} )
{
\$line_width += \$width{ \$paragraph[0] };
push( \@line, shift(\@paragraph) );
}
# calculate the space width
my ( \$wordspace, \$align );
if ( \$arg{'-align'} eq 'fulljustify'
or ( \$arg{'-align'} eq 'justify' and \@paragraph ) )
{
if ( scalar(\@line) == 1 ) {
\@line = split( //, \$line[0] );
}
\$wordspace = ( \$arg{'-w'} - \$line_width ) / ( scalar(\@line) - 1 );
\$align = 'justify';
}
else {
\$align = ( \$arg{'-align'} eq 'justify' ) ? 'left' : \$arg{'-align'};
\$wordspace = \$space_width;
}
\$line_width += \$wordspace * ( scalar(\@line) - 1 );
if ( \$align eq 'justify' ) {
foreach my \$word (\@line) {
\$text_object->translate( \$xpos, \$ypos );
\$text_object->text(\$word);
\$xpos += ( \$width{\$word} + \$wordspace ) if (\@line);
}
\$endw = \$arg{'-w'};
}
else {
# calculate the left hand position of the line
if ( \$align eq 'right' ) {
\$xpos += \$arg{'-w'} - \$line_width;
}
elsif ( \$align eq 'center' ) {
\$xpos += ( \$arg{'-w'} / 2 ) - ( \$line_width / 2 );
}
# render the line
\$text_object->translate( \$xpos, \$ypos );
\$endw = \$text_object->text( join( ' ', \@line ) );
}
\$ypos -= \$arg{'-lead'};
\$first_line = 0;
}
unshift( \@paragraphs, join( ' ', \@paragraph ) ) if scalar(\@paragraph);
return ( \$endw, \$ypos, join( "\\n", \@paragraphs ) )
}
EOF
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment