Skip to content

Instantly share code, notes, and snippets.

@creativecoder
Forked from dsanson/any2pandoc.sh
Last active December 13, 2015 23:19
Show Gist options
  • Save creativecoder/4990796 to your computer and use it in GitHub Desktop.
Save creativecoder/4990796 to your computer and use it in GitHub Desktop.
#!/bin/sh
# any2markdown.sh
#
# A shell script that converts documents to markdown
#
# https://gist.github.com/creativecoder/4990796
#
# Depends on:
# pandoc: http://johnmacfarlane.net/pandoc/
# a utility for converting lots of things to lots of things
# html2text: https://github.com/aaronsw/html2text
# a utilty for converting html to markdown text; used instead
# of pandoc for better handling of nested html lists
# textutil: a built-in OS X utility for converting lots of things to
# lots of things.
# pdftohtml: http://pdftohtml.sourceforge.net/
# a utility for converting pdf to html
#
# Forked from:
# https://gist.github.com/dsanson/1181510
for file in "$@"
do
base="${file%%.*}"
ext="${file#*.}"
case $ext in
doc | docx | webarchive | rtf | rtfd | odt )
if [ ! $(which textutil) ]; then
echo "textutil not found:"
echo " unable to process doc, docx, webarchive, rtf, rtfd, or odt files"
exit
fi
# Convert text with Courier New font to <code></code>
# Convert ordered lists to unordered lists
textutil -format "$ext" -convert "html" -stdout "$file" \
| perl -pe 's:<span class="s1">(.*?)</span>:<code>$1</code>:g' \
| perl -pe 's:(<li class="li3">.*?<span.*?</span>)(.*?)<:$1<code>$2</code><:g' \
| perl -pe 's:<ol:<ul:g' \
| perl -pe 's:</ol>:</ul>:g' \
| html2text > "${base}.md"
;;
pdf )
if [ ! $(which pdftohtml) ]; then
echo "pdftohtml not found: unable to process pdf files."
exit
fi
pdftohtml -noframes -stdout "$file" \
| html2text > "${base}.md"
;;
tex )
pandoc -f latex -s "$file" -o "${base}.md"
;;
* )
pandoc -s "$file" -o "${base}.md"
;;
esac
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment