Created
April 24, 2017 13:58
-
-
Save rmaicle/a3819006fd61b25d7b53eb9b3c8b508b to your computer and use it in GitHub Desktop.
Converts asciidoc generated HTML to markdown
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Convert asciidoc to markdown | |
# Argument(s): | |
# - filename - assumed to have an extension of .adoc | |
# | |
# TODO: Lists | |
filename="$1.md" | |
filename="${filename//-/_}" | |
asciidoc -b html5 -o tmp.html "$1".txt | |
cp -f tmp.html original.html | |
# Store copy of HTML with deleted HTML header part for debugging | |
sed -i '/<head>/,/<\/head>/d' original.html | |
# HTML replacements | |
# ================= | |
# Delete HTML header and closing elements | |
sed -i '/DOCTYPE/,/<\/head>/d' tmp.html | |
sed -i 's/<body class="article">//g' tmp.html | |
sed -i '/<\/body>/,/<\/head>/d' tmp.html | |
cp -f tmp.html no_headers.html | |
# Definition lists | |
sed -i 's/<dt class="hdlist1">/†/' tmp.html | |
sed -i 's/<\/dt>/‡/' tmp.html | |
sed -ir 's/<dd>/»/' tmp.html | |
sed -ir 's/<\/dd>/«/' tmp.html | |
# Code blocks | |
# =========== | |
# Put <pre> on its own line | |
sed -i 's/^<pre>\(.*\)/<pre>\n\n~~~\n\1/' tmp.html | |
# Put </pre> on its own line | |
sed -i '/<\/pre>$/,/^<\/div><\/div>$/ N; {s/<\/pre>/\n~~~\n\n<\/pre>/}' tmp.html | |
# Delete HTML block for code blocks | |
sed -i '/^<div class="listingblock">/,/^<pre>/d' tmp.html | |
sed -i '/^<div class="literalblock">/,/^<pre>/d' tmp.html | |
sed -i '/^<\/pre>/,/^<\/div><\/div>/d' tmp.html | |
# Pandoc strips code span HTML elements | |
# so we pre-process them here, delimited using ͼ and ͽ | |
# <span class="monospaced"> | |
# </span> | |
sed -ir '/<span class="monospaced">/,/<\/span>/ s/<span class="monospaced">\(.*\)<\/span>/ͼ\1ͽ/g' tmp.html | |
cp -f tmp.html pre_pandoc.html | |
pandoc -f html -t markdown_github+blank_before_header+all_symbols_escapable+blank_before_blockquote+definition_lists+fenced_code_blocks+footnotes+pipe_tables+yaml_metadata_block tmp.html -o "$filename" | |
cp -f "$filename" no_fixes.md | |
# Fixes | |
# ===== | |
# Put texts between † and ‡ on its own line | |
#sed -i 's/\(†.*‡\)/\n\1\n/' "$filename" | |
sed -i 's/‡[[:space:]]†/‡\n†/' "$filename" | |
# Delete extra spaces before and after the delimiters | |
#sed -i 's/†[[:space:]]/†/' "$filename" | |
#sed -i 's/[[:space:]]‡/‡/' "$filename" | |
# Re-combine lines | |
sed -ir '/†/{N;N;s/†\n\(.*\)\n‡/†\1‡/}' "$filename" | |
sed -ir '/^‡$/d' "$filename" | |
cp -f "$filename" delimited.md | |
# Delete spaces between » and «. | |
# These delimiters must be on its own line. | |
sed -i 's/^[[:space:]]»$/»/' "$filename" | |
sed -i 's/^[[:space:]]«$/«/' "$filename" | |
sed -i 's/^«[[:space:]]/«/' "$filename" | |
# Delete emtpy line before « and append an empty line after | |
sed -i '/^$/{N;s/^\n«$/«\n/}' "$filename" | |
# Compress two empty lines to a single line | |
sed -i '/^$/N;/^\n$/D' "$filename" | |
# Paragraphs within » and « after the first one must be | |
# 'terminated' or formatted as another dd element | |
#sed -i '/»/,/«/ N; {s/^\n\(.*\)/\n: \1/ }' "$filename" | |
# -- the above delimits too many paragraphs which should not be | |
# yaml | |
# ==== | |
sed -i '/``` content/ s//~~~/' "$filename" | |
sed -i '/```/ s//~~~/' "$filename" | |
sed -i '/~~~~ content/ s//~~~/' "$filename" | |
sed -i '/~~~~/ s//~~~/' "$filename" | |
# Code spans | |
# ========== | |
# Wrap code span between temporary delimiter characters | |
# Must consider successive code spans. | |
#sed -i 's/<span class="monospaced">/†/g' "$filename" | |
#sed -i 's/<\/span>/‡/g' "$filename" | |
#sed -ir 's/<span class="monospaced">\(.*\)<\/span>/†\1‡/g' "$filename" | |
#sed -ir '/<span class="monospaced">/,/<\/span>/ s/<span class="monospaced">\(.*\)<\/span>/†\1‡/g' "$filename" | |
# Delete successive delimiters | |
sed -i 's/†[[:space:]]†/†/' "$filename" | |
sed -i 's/‡[[:space:]]‡/‡/' "$filename" | |
# Delete consecutive delimiters | |
sed -i 's/††/†/' "$filename" | |
sed -i 's/‡‡/‡/' "$filename" | |
# Delete spaces | |
# ============= | |
# Delete space before a definition list term | |
#sed -i 's/^ -'/-/ "$filename" | |
# Delete trailing whitespace at end of each line | |
#sed -in '/^---$/,/^---$/ !{ /\s*$/ s///g }' "$filename" | |
# Convert < and > characters within code spans | |
# ============================================ | |
# Convert code span between custom delimeters containing > and < | |
# to verbatim equivalent | |
sed -i '/†/,/‡/ {s/</\</g}' "$filename" | |
sed -i '/†/,/‡/ {s/>/\>/g}' "$filename" | |
# Convert text containing > and < to escaped equivalent | |
sed -i '/†/,/‡/ !{/</ s//\\</g}' "$filename" | |
sed -i '/†/,/‡/ !{/>/ s//\\>/g}' "$filename" | |
# Convert temporary dt delimiters | |
# ====================================== | |
sed -i 's/†/\`/g' "$filename" | |
sed -i 's/‡/\`/g' "$filename" | |
# Convert temporary code span delimiters | |
# ====================================== | |
sed -ir 's/ͼ/`/g' "$filename" | |
sed -ir 's/ͽ/`/g' "$filename" | |
# Delete temporary dd delimiters » « | |
# ================================== | |
sed -i '/^»$/ N; s/^»\n\(.*\)$/: \1/' "$filename" | |
sed -i '/^«$/N;/\n/D' "$filename" | |
title=`cat $filename | sed q` | |
title=${title%%(*} | |
#prefix="/mnt/work/projects/_github/rmaicle/rmaicle.github.io.2017/_source/documentations/" | |
directory=`pwd` | |
directory=${directory#*documentations/} | |
directory="/doc/"$directory"/"$filename | |
directory=${directory%%.md} | |
echo "$directory" | |
> x.md | |
echo "---" >> x.md | |
echo "title: $title" >> x.md | |
echo "layout: documentation" >> x.md | |
echo "categories: [documentation]" >> x.md | |
echo "tags: [git]" >> x.md | |
echo "draft: true" >> x.md | |
echo "published: true" >> x.md | |
echo "permalink: $directory" >> x.md | |
echo "group: git" >> x.md | |
echo "---" >> x.md | |
echo "" >> x.md | |
cat x.md "$filename" > tmp.md | |
mv tmp.md "$filename" | |
# Cleanup | |
#rm -f ./tmp.html | |
rm -f x.md | |
rm -f *.mdn | |
rm -f *.mdr |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment