Created
March 27, 2015 09:54
-
-
Save bpj/2a5639a0005c0124a2d4 to your computer and use it in GitHub Desktop.
A pandoc filter to format poetry nicely for HTML
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
#---------------------------------------------------------------------- | |
# pandoc-poetry.pl | |
# ================ | |
# | |
# A pandoc filter to format poetry nicely for HTML. | |
# | |
# See <https://groups.google.com/d/msg/pandoc-discuss/_JnTJnsSK3k/SkM9tjfYyg0J> | |
# | |
# USAGE | |
# ----- | |
# | |
# pandoc -F pandoc-poetry.pl [OPTIONS] verse.md -o verse.html | |
# | |
# INPUT FORMAT | |
# ------------ | |
# | |
# ````pandoc-markdown | |
# <div class="verse"> | |
# | |
# | Line of poetry goes here | |
# | Second line here | |
# | And so on, throughout the poem, | |
# | |
# even over stanza breaks. | |
# | |
# </div> | |
# ```` | |
# The contents of paragraphs inside divs with class "verse" | |
# are 'split' at hard line breaks and the pieces are wrapped | |
# in spans with class "line". The paragraph is then itself | |
# wrapped in a div with class "stanza", which allows | |
# formatting the layout of verse with CSS | |
# | |
# XXX: | |
# Multi-line stanzas should be line-blocks | |
# but single-line stanzas must be 'normal' paras. | |
# because Pandoc doesn't recognise single-line line-blocks | |
# (arguably a bug in Pandoc! | |
# | |
# OUTPUT HTML | |
# ----------- | |
# | |
# ````html | |
# <div class="verse"> | |
# <div class="stanza"> | |
# <p> | |
# <span class="line">Line of poetry goes here</span> | |
# <span class="line">Second line here</span> | |
# <span class="line">And so on, throughout the poem,</span> | |
# </p> | |
# </div> | |
# <div class="stanza"> | |
# <p> | |
# <span class="line">even over stanza breaks.</span> | |
# </p> | |
# </div> | |
# </div> | |
# ```` | |
# Not *quite* as the OP wished, because | |
# you cannot put classes on `<p>` elements with Pandoc. | |
# Use a selector `div.stanza p` to style stanzas with CSS! | |
# | |
# Copyright 2015- Benct Philip Jonsson. | |
# | |
# This script is free software; you can redistribute it and/or | |
# modify it under the same terms as Perl itself. | |
#---------------------------------------------------------------------- | |
# use 5.014; | |
use strict; | |
use warnings; # FATAL => 'all'; | |
use utf8; # No UTF-8 I/O with JSON! | |
use autodie 2.12; | |
# no indirect; | |
# no autovivification; # Don't pullute the AST! | |
# use Getopt::Long qw[ GetOptionsFromArray :config no_ignore_case ]; | |
# The following two modules must be installed from CPAN; | |
# see <http://www.cpan.org/modules/INSTALL.html> | |
use JSON::MaybeXS qw[ decode_json encode_json ]; # Choose best available implementation. | |
use Data::Rmap qw[ rmap_hash ]; # Data structure traversal support. | |
# HELPER FUNCTIONS # {{{1}}} | |
sub is_elem { | |
my ( $tag, $elem ) = @_; | |
return !!0 unless 'HASH' eq ref $elem; | |
return !!0 unless exists $elem->{t}; | |
return !!0 unless exists $elem->{c}; | |
return !!0 unless $tag eq $elem->{t}; | |
return !!1; | |
} ## end sub is_elem | |
sub _mk_elem { # {{{2}}} | |
my($type => $contents) = @_; | |
return +{ t => $type, c => $contents }; | |
} | |
use constant NL => _mk_elem( Str => "\n" ); | |
sub _mk_stanza { | |
return _mk_elem( Div => [ [ "", ['stanza'], [] ], [ _mk_elem( Para => shift ) ] ] ); | |
} | |
sub _mk_line { | |
return( NL, _mk_elem( Span => [ [ "", ['line'], [] ], shift ] ) ); | |
} | |
# Traverse document: # {{{1}}} | |
my $to_format = shift @ARGV; | |
my $doc = decode_json do { local $/; <>; }; | |
# Change elements in-place: # {{{2}}} | |
rmap_hash { | |
return unless is_elem( Div => $_ ); | |
return unless defined $_->{c}; | |
return unless 'verse' eq $_->{c}[-2][1][0]; # First class! | |
verse($_); | |
return; | |
} $doc; | |
sub verse { | |
my($verse) = @_; # A Div with class "verse" | |
for my $stanza ( @{$verse->{c}[-1]} ) { # Loop through contents of $verse | |
is_elem( Para => $stanza ) or return; # Skip unless it's a Para | |
$stanza = do { | |
# Localize $_ so we don't clobber the reference to the $verse | |
local $_; | |
my @lines = ( my $line = [] ); # Construct a list of lists (lines) | |
for my $inline ( @{ $stanza->{c} } ) { # Loop through contents of $stanza | |
if ( is_elem( LineBreak => $inline ) ) { # We found a LineBreak | |
# Discard LB and start a new inner list/line | |
push @lines, ( $line = [] ); | |
} | |
else { | |
# Add a non-LineBreak to the current inner list/line | |
push @$line, $inline; | |
} | |
} ## end for my $inline ( @{ $stanza...}) | |
# Turn each non-empty inner list into a Span with class "line" | |
# preceded by a literal newline! | |
@lines = map { scalar(@$_) ? _mk_line( $_ ) : () } @lines; | |
push @lines, NL; # add a final literal newline | |
# Make a Para with the list of line-Spans as contents | |
# wrap it in a Div with class 'stanza' | |
# and replace the original Para with it! | |
_mk_stanza( \@lines ); | |
}; | |
} | |
} | |
print {*STDOUT} encode_json $doc; | |
# <section class="verse"> | |
# <p class="stanza"> | |
# <span class="line">Line of poetry goes here</span> | |
# <span class="line">Second line here</span> | |
# <span class="line">And so on, throughout the poem,</span> | |
# </p> | |
# <p class="stanza"> | |
# <span class="line">even over stanza breaks.</span> | |
# </p> | |
# </section> | |
__END__ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment