Last active
October 13, 2016 09:43
-
-
Save bpj/0454621722eae86dad00947c1b5162d3 to your computer and use it in GitHub Desktop.
Pandoc filter to filter out stuff unless the output format is html/html4/html5
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
# exclude spans/divs marked html/html4/html5 unless output format is html/html4/html5 | |
use utf8; | |
use autodie 2.26; | |
use 5.010001; | |
use strict; | |
use warnings; | |
use warnings qw(FATAL utf8); | |
use Carp qw[ carp croak ]; | |
use Pandoc::Elements 0.25 qw[ attributes MetaBool Span ]; | |
use Pandoc::Filter 0.25 qw[ pandoc_filter ]; | |
my $html_spans; | |
pandoc_filter +{ | |
'Span|Div' => sub { | |
my($elem, $format) = @_; | |
return unless $elem->class =~ /(?<!\S)(html[45]?)(?!\S)/; | |
my $html = $1; | |
return [] unless $format =~ /^html5?$/; | |
return [] if 'html4' eq $html and 'html5' eq $format; | |
return [] if 'html5' eq $html and 'html5' ne $format; | |
return; | |
}, | |
'Link' => sub { | |
my($elem, $format, $meta) = @_; | |
return unless $elem->url =~ /^-(html[45]?)$/; | |
my $html = $1; | |
return [] unless $format =~ /^html5?$/; | |
return [] if 'html4' eq $html and 'html5' eq $format; | |
return [] if 'html5' eq $html and 'html5' ne $format; | |
$html_spans //= ( $meta->{html_spans} // MetaBool( $ENV{PANDOC_HTML_SPANS} // 0 ) )->metavalue; | |
if ( $html_spans ) { | |
( my $attr = $elem->keyvals )->add( class => $html ); | |
return Span attributes $attr, $elem->content; | |
} | |
return $elem->content; | |
}, | |
}; | |
__END__ | |
# # DOCUMENTATION # # | |
=encoding UTF-8 | |
=head1 NAME | |
pandoc-html-only.pl - filter out stuff unless the format is html/html4/html5 | |
=head1 VERSION | |
0.01 | |
=head1 SYNOPSIS | |
pandoc [OPTIONS] -F pandoc-html-only.pl [-M html_spans] FILE_NAME ... | |
=head1 DESCRIPTION | |
C<< pandoc-html-only.pl >> is a pandoc filter which excludes stuff | |
marked as htmlE<0x2f>html4E<0x2f>html5 unless the format is | |
htmlE<0x2f>html5. | |
=head2 Examples | |
In F<< example.md >> | |
This is always included. | |
<span class=html>This is only included in HTML.</span> | |
<div class=html> | |
It said: "[moo](-html4){#html4 .cow}[meow](-html5){.cat}". | |
</div> | |
\text{This is only visible in \LaTeX} | |
On the command line: | |
$ pandoc -F pandoc-html-only.pl example.md -t html | |
<p>This is always included.</p> | |
<p><span class="html">This is only included in HTML.</span></p> | |
<div class="html"> | |
<p>It said: "moo".</p> | |
</div> | |
<p></p> | |
$ pandoc -F pandoc-html-only.pl example.md -t html5 | |
<p>This is always included.</p> | |
<p><span class="html">This is only included in HTML.</span></p> | |
<div class="html"> | |
<p>It said: "meow".</p> | |
</div> | |
<p></p> | |
$ pandoc -F pandoc-html-only.pl example.md -t html -M html_spans | |
<p>This is always included.</p> | |
<p><span class="html">This is only included in HTML.</span></p> | |
<div class="html"> | |
<p>It said: "<span id="html4" class="cow html4">moo</span>".</p> | |
</div> | |
<p></p> | |
$ pandoc -F pandoc-html-only.pl example.md -t html5 -M html_spans | |
<p>This is always included.</p> | |
<p><span class="html">This is only included in HTML.</span></p> | |
<div class="html"> | |
<p>It said: "<span class="cat html5">meow</span>".</p> | |
</div> | |
<p></p> | |
$ pandoc -F pandoc-html-only.pl example.md -t latex | |
This is always included. | |
\text{This is only visible in \LaTeX} | |
=head2 The details | |
When running pandoc with this filter enabled | |
=over | |
=item spans and divs | |
are removed from the document if | |
=over | |
=item * | |
they have a class C<< html >> and the output format is I<< not >> | |
C<< html >> or C<< html5 >>. | |
=item * | |
they have a class C<< html4 >> and the output format is I<< not >> | |
C<< html >> (sic!). | |
=item * | |
they have a class C<< html5 >> and the output format is I<< not >> | |
C<< html5 >>. | |
=back | |
=item links | |
=over | |
=item * | |
are replaced with their content if | |
=over | |
=item * | |
they have the pseudo-URL C<< -html >> and the output format is | |
C<< html >> or C<< html5 >>. | |
=item * | |
they have the pseudo-URL C<< -html4 >> and the output format is | |
C<< html >> (sic!). | |
=item * | |
they have the pseudo-URL C<< -html5 >> and the output format is | |
C<< html5 >>. | |
=back | |
=item * | |
are treated similarly but converted into a span with the class | |
C<< html >>E<0x2f>C<< html4 >>E<0x2f>C<< html5 >> added | |
=over | |
=item * | |
if the metadata key C<< html_spans >> has a (perlish or boolean) true | |
value | |
=item * | |
or the environment variable C<< PANDOC_HTML_SPANS >> has a (perlish) | |
true value | |
=back | |
with the metadata value taking precedence over the environment variable. | |
(A perlish true value is any non-empty value other than C<< 0 >> | |
(zero).) | |
=item * | |
are otherwise removed from the document if they have one of these | |
pseudo-URLs. | |
=back | |
=back | |
=head1 REQUIREMENTS | |
=over | |
=item * | |
pandoc version E<0x3e>= 1.16 | |
L<< http:E<0x2f>E<0x2f>pandoc.org|http://pandoc.org >> | |
Installing: | |
L<< http:E<0x2f>E<0x2f>pandoc.orgE<0x2f>installing.html|http://pandoc.org/installing.html >> | |
=item * | |
perl version E<0x3e>= 5.10.1. | |
Installing: | |
L<< https:E<0x2f>E<0x2f>www.perl.orgE<0x2f>get.html|https://www.perl.org/get.html >> | |
Version check: C<< perl --version >>. | |
=item * | |
Pandoc::Elements version E<0x3e>= 0.25 | |
L<< https:E<0x2f>E<0x2f>metacpan.orgE<0x2f>podE<0x2f>Pandoc::Elements|https://metacpan.org/pod/Pandoc::Elements >>. | |
Installing: | |
L<< http:E<0x2f>E<0x2f>www.cpan.orgE<0x2f>modulesE<0x2f>INSTALL.html|http://www.cpan.org/modules/INSTALL.html >> | |
=back | |
=head1 OPTIONS | |
=over | |
=item -h, -?, --help | |
Show the documentation. | |
=back | |
=head1 BUGS | |
If a paragraph or other element contains only conditional material you | |
may get empty HTML elements left around. | |
=head1 AUTHOR | |
Benct Philip Jonsson E<0x3c>[email protected]<0x3e>, | |
L<< https:E<0x2f>E<0x2f>github.comE<0x2f>bpj|https://github.com/bpj >> | |
=head1 COPYRIGHT | |
Copyright 2016- Benct Philip Jonsson | |
=head1 LICENSE | |
This is free software; you can redistribute it andE<0x2f>or modify it | |
under the same terms as the Perl 5 programming language system itself. | |
See | |
L<< http:E<0x2f>E<0x2f>dev.perl.orgE<0x2f>licensesE<0x2f>|http://dev.perl.org/licenses/ >>. | |
=cut | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment