Skip to content

Instantly share code, notes, and snippets.

@rmaicle
Created April 24, 2017 13:58
Show Gist options
  • Save rmaicle/a3819006fd61b25d7b53eb9b3c8b508b to your computer and use it in GitHub Desktop.
Save rmaicle/a3819006fd61b25d7b53eb9b3c8b508b to your computer and use it in GitHub Desktop.
Converts asciidoc generated HTML to markdown
#!/bin/bash
# Convert asciidoc to markdown
# Argument(s):
# - filename - assumed to have an extension of .adoc
#
# TODO: Lists
filename="$1.md"
filename="${filename//-/_}"
asciidoc -b html5 -o tmp.html "$1".txt
cp -f tmp.html original.html
# Store copy of HTML with deleted HTML header part for debugging
sed -i '/<head>/,/<\/head>/d' original.html
# HTML replacements
# =================
# Delete HTML header and closing elements
sed -i '/DOCTYPE/,/<\/head>/d' tmp.html
sed -i 's/<body class="article">//g' tmp.html
sed -i '/<\/body>/,/<\/head>/d' tmp.html
cp -f tmp.html no_headers.html
# Definition lists
sed -i 's/<dt class="hdlist1">/†/' tmp.html
sed -i 's/<\/dt>/‡/' tmp.html
sed -ir 's/<dd>/»/' tmp.html
sed -ir 's/<\/dd>/«/' tmp.html
# Code blocks
# ===========
# Put <pre> on its own line
sed -i 's/^<pre>\(.*\)/<pre>\n\n~~~\n\1/' tmp.html
# Put </pre> on its own line
sed -i '/<\/pre>$/,/^<\/div><\/div>$/ N; {s/<\/pre>/\n~~~\n\n<\/pre>/}' tmp.html
# Delete HTML block for code blocks
sed -i '/^<div class="listingblock">/,/^<pre>/d' tmp.html
sed -i '/^<div class="literalblock">/,/^<pre>/d' tmp.html
sed -i '/^<\/pre>/,/^<\/div><\/div>/d' tmp.html
# Pandoc strips code span HTML elements
# so we pre-process them here, delimited using ͼ and ͽ
# <span class="monospaced">
# </span>
sed -ir '/<span class="monospaced">/,/<\/span>/ s/<span class="monospaced">\(.*\)<\/span>/ͼ\1ͽ/g' tmp.html
cp -f tmp.html pre_pandoc.html
pandoc -f html -t markdown_github+blank_before_header+all_symbols_escapable+blank_before_blockquote+definition_lists+fenced_code_blocks+footnotes+pipe_tables+yaml_metadata_block tmp.html -o "$filename"
cp -f "$filename" no_fixes.md
# Fixes
# =====
# Put texts between † and ‡ on its own line
#sed -i 's/\(†.*‡\)/\n\1\n/' "$filename"
sed -i 's/‡[[:space:]]†/‡\n†/' "$filename"
# Delete extra spaces before and after the delimiters
#sed -i 's/†[[:space:]]/†/' "$filename"
#sed -i 's/[[:space:]]‡/‡/' "$filename"
# Re-combine lines
sed -ir '/†/{N;N;s/†\n\(.*\)\n‡/†\1‡/}' "$filename"
sed -ir '/^‡$/d' "$filename"
cp -f "$filename" delimited.md
# Delete spaces between » and «.
# These delimiters must be on its own line.
sed -i 's/^[[:space:]]»$/»/' "$filename"
sed -i 's/^[[:space:]]«$/«/' "$filename"
sed -i 's/^«[[:space:]]/«/' "$filename"
# Delete emtpy line before « and append an empty line after
sed -i '/^$/{N;s/^\n«$/«\n/}' "$filename"
# Compress two empty lines to a single line
sed -i '/^$/N;/^\n$/D' "$filename"
# Paragraphs within » and « after the first one must be
# 'terminated' or formatted as another dd element
#sed -i '/»/,/«/ N; {s/^\n\(.*\)/\n: \1/ }' "$filename"
# -- the above delimits too many paragraphs which should not be
# yaml
# ====
sed -i '/``` content/ s//~~~/' "$filename"
sed -i '/```/ s//~~~/' "$filename"
sed -i '/~~~~ content/ s//~~~/' "$filename"
sed -i '/~~~~/ s//~~~/' "$filename"
# Code spans
# ==========
# Wrap code span between temporary delimiter characters
# Must consider successive code spans.
#sed -i 's/<span class="monospaced">/†/g' "$filename"
#sed -i 's/<\/span>/‡/g' "$filename"
#sed -ir 's/<span class="monospaced">\(.*\)<\/span>/†\1‡/g' "$filename"
#sed -ir '/<span class="monospaced">/,/<\/span>/ s/<span class="monospaced">\(.*\)<\/span>/†\1‡/g' "$filename"
# Delete successive delimiters
sed -i 's/†[[:space:]]†/†/' "$filename"
sed -i 's/‡[[:space:]]‡/‡/' "$filename"
# Delete consecutive delimiters
sed -i 's/††/†/' "$filename"
sed -i 's/‡‡/‡/' "$filename"
# Delete spaces
# =============
# Delete space before a definition list term
#sed -i 's/^ -'/-/ "$filename"
# Delete trailing whitespace at end of each line
#sed -in '/^---$/,/^---$/ !{ /\s*$/ s///g }' "$filename"
# Convert < and > characters within code spans
# ============================================
# Convert code span between custom delimeters containing &gt; and &lt;
# to verbatim equivalent
sed -i '/†/,/‡/ {s/&lt;/\</g}' "$filename"
sed -i '/†/,/‡/ {s/&gt;/\>/g}' "$filename"
# Convert text containing &gt; and &lt; to escaped equivalent
sed -i '/†/,/‡/ !{/&lt;/ s//\\</g}' "$filename"
sed -i '/†/,/‡/ !{/&gt;/ s//\\>/g}' "$filename"
# Convert temporary dt delimiters
# ======================================
sed -i 's/†/\`/g' "$filename"
sed -i 's/‡/\`/g' "$filename"
# Convert temporary code span delimiters
# ======================================
sed -ir 's/ͼ/`/g' "$filename"
sed -ir 's/ͽ/`/g' "$filename"
# Delete temporary dd delimiters » «
# ==================================
sed -i '/^»$/ N; s/^»\n\(.*\)$/: \1/' "$filename"
sed -i '/^«$/N;/\n/D' "$filename"
title=`cat $filename | sed q`
title=${title%%(*}
#prefix="/mnt/work/projects/_github/rmaicle/rmaicle.github.io.2017/_source/documentations/"
directory=`pwd`
directory=${directory#*documentations/}
directory="/doc/"$directory"/"$filename
directory=${directory%%.md}
echo "$directory"
> x.md
echo "---" >> x.md
echo "title: $title" >> x.md
echo "layout: documentation" >> x.md
echo "categories: [documentation]" >> x.md
echo "tags: [git]" >> x.md
echo "draft: true" >> x.md
echo "published: true" >> x.md
echo "permalink: $directory" >> x.md
echo "group: git" >> x.md
echo "---" >> x.md
echo "" >> x.md
cat x.md "$filename" > tmp.md
mv tmp.md "$filename"
# Cleanup
#rm -f ./tmp.html
rm -f x.md
rm -f *.mdn
rm -f *.mdr
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment