-
-
Save makash/c70a04bb13eb0cc7113e4bd5b8bd8a31 to your computer and use it in GitHub Desktop.
Pandoc filter which converts LaTeX \newpage commands into appropriate pagebreak markup for other formats.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
# Pandoc filter which converts paragraps containing only the LaTeX \newpage | |
# command into appropriate pagebreak markup for other formats. | |
# | |
# You will need perl version 5.10.1 or higher <https://www.perl.org/get.html> | |
# (Strawberry Perl recommended on Windows!) | |
# and a module installer <http://www.cpan.org/modules/INSTALL.html> | |
# and the Pandoc::Elements module version 0.33 or higher | |
# <https://metacpan.org/pod/Pandoc::Elements> | |
# | |
# Run with the -F option: | |
# | |
# $ pandoc -F pandoc-newpage.pl ... | |
# | |
# If you want to use an HTML class rather than an inline style | |
# set the value of the metadata key newpage_html_class to the | |
# name of the class and use CSS like this: | |
# | |
# @media all { | |
# .page-break { display: none; } | |
# } | |
# @media print { | |
# .page-break { display: block; page-break-after: always; } | |
# } | |
# | |
# | |
# Copyright 2017 Benct Philip Jonsson | |
# | |
# This is free software; you can redistribute it and/or modify it under | |
# the same terms as the Perl 5 programming language system itself. | |
# See <http://dev.perl.org/licenses/>. | |
use utf8; | |
use autodie 2.29; | |
use 5.010001; | |
use strict; | |
use warnings; | |
use warnings qw(FATAL utf8); | |
use Carp qw[ carp croak ]; | |
use Pandoc::Elements 0.33; | |
use Pandoc::Walker 0.27 qw[ action transform ]; | |
my $out_format = shift @ARGV; | |
my $json = <>; | |
my $doc = pandoc_json($json); | |
my $html_break = $doc->meta->value('newpage_html_class') // $ENV{PANDOC_NEWPAGE_HTML_CLASS}; | |
if ( ref $html_break ) { | |
croak "Metadata>newpage_html_class must be string"; | |
} | |
$html_break &&= qq[<div class="$html_break"></div>]; | |
$html_break ||= qq[<div style="page-break-after: always;"></div>]; | |
my %break_for = ( | |
html => RawBlock( html => $html_break ), | |
html5 => RawBlock( html => $html_break ), | |
## epub doesn't work, or only broken Linux readers? | |
epub => RawBlock( html => $html_break ), | |
docx => RawBlock( openxml => '<w:p><w:r><w:br w:type="page" /></w:r></w:p>' ), | |
); | |
my $break = $break_for{ $out_format }; | |
# If we don't want to do anything with this doc ' | |
unless ( defined $break ) { | |
print $json; | |
exit 0; | |
} | |
my %actions = ( | |
'RawBlock' => sub { | |
my($elem) = @_; | |
$elem->format =~ /^(?:la)?tex$/ or return; | |
$elem->content eq '\newpage' or return; | |
return $break; | |
}, | |
); | |
my $action = action \%actions; | |
# Allow applying the action recursively | |
$doc->transform($action, $action); | |
print $doc->to_json; | |
__END__ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment