Last active
October 31, 2021 04:21
-
-
Save bpj/6f4c26215338e0a9775dbdfc9bb3ef58 to your computer and use it in GitHub Desktop.
Massage Markdown converted with Pandoc from OPML exported from Dynalist
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
=head1 SYNOPSIS | |
pandoc -f opml -t markdown --atx-headers [PANDOC OPTIONS] input.opml \ | |
| perl md-head2dl.pl [OPTIONS] > output.md | |
=head1 DESCRIPTION | |
Massage Markdown converted with Pandoc L<http://pandoc.org> from OPML exported from Dynalist <https://dynalist.io>. | |
There are options to remove backslashes before punctuation characters,since Pandoc doesn't see that there is | |
Markdown in the OPML and escapes all punctuation, and to specify a cutoff level, which may be 0. | |
Down to that level you get (actually preserve) headings. Below that level you get nested (unordered) | |
lists with notes as continuation paragraphs, converted from the unlimited number of heading levels which | |
Pandoc produces from OPML. You can also specify how many spaces there should be per tab/indentation level. | |
=head1 Options | |
=over | |
=item -u, --[no-]unescape | |
Remove backslashes in front of punctuation characters. | |
=item -t, --tab-stop, --tab-space I<INT> | |
The number of spaces per tab/indentation level (default 4). | |
=item -l --max-heading-level I<INT> | |
Preserve headings down to this level (default 6). Below this level headings are converted into nested unordered lists. | |
To get only nested lists set this option to 0 (zero). | |
=item -h, --help | |
Show a help message. | |
=item -m, --man, --manual | |
Show the manual. | |
=back | |
=cut | |
use utf8::all; | |
use autodie 2.29; | |
use 5.010001; | |
use strict; | |
use warnings; | |
use warnings qw(FATAL utf8); | |
use Carp qw[ carp croak ]; | |
use Text::Tabs; | |
use Getopt::Long qw[ GetOptionsFromArray :config no_ignore_case ]; | |
use Pod::Usage; | |
$tabstop = 4; | |
my $hashes_re = qr/^(\#+)\s+/; | |
my $indent = ""; | |
my $indent_level = 0; | |
my $max_heading = 0; | |
my $unescape = 1; | |
my ( $help, $man ); | |
GetOptionsFromArray( | |
\@ARGV, | |
'unescape|u!' => \$unescape, | |
'tab_stop|tab-stop|tabstop|tab_space|tab-space|tabspace|t=i' => \$tabstop, | |
'max_heading_level|max-heading-level|maxheadinglevel|l=i' => \$max_heading, | |
'help|h!', | |
'man|manual|m!', | |
) || pod2usage( 2 ); | |
pod2usage( 1 ) if ( $help ); | |
pod2usage( -exitval => 0, -verbose => 2 ) if ( $man ); | |
my $min_list = $max_heading + 1; | |
while ( <> ) { | |
chomp; | |
my $line = my $in = $_; | |
if ( $unescape ) { | |
$line =~ s/(\\\#)/\\$1/g; | |
$line =~ s/\\([[:punct:]])/$1/g; | |
} | |
if ( $in =~ /$hashes_re/ ) { | |
my $level = length $1; | |
if ( $level >= $min_list ) { | |
$line =~ s/$hashes_re//; | |
$indent_level = $level - $min_list; | |
$indent = "\t" x $indent_level; | |
$line = expand "$indent-\t$line"; | |
$indent = expand "$indent\t"; | |
} | |
else { | |
$indent = ""; | |
$indent_level = 0; | |
} | |
say $line; | |
} | |
elsif ( $line =~ /\S/ ) { | |
say $indent . $line; | |
} | |
else { | |
say $line; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment