Last active
December 19, 2015 00:58
-
-
Save PatrickLerner/5871954 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
OPTIND=1 | |
ruby=0 | |
url="" | |
open=0 | |
notvertical=0 | |
download=0 | |
show_help () { | |
echo " Usage:" | |
echo " -u URL" | |
echo "" | |
echo " Optional:" | |
echo " -n (not vertical, i.e. horizontal)" | |
echo " -o (open with Kindle for Mac after generation)" | |
echo " -r (include furigana)" | |
echo " -h (show help)" | |
echo " -d (download mp3)" | |
echo "" | |
echo "" | |
echo " This script was only tested on a Mac." | |
echo " (most of it should work on Linux just fine)" | |
echo "" | |
echo " Required packages:" | |
echo " - pandoc" | |
echo " - kindlegen" | |
echo " - wget" | |
echo "" | |
echo " Optional packages:" | |
echo " - Kindle for Mac" | |
echo "" | |
echo " Author: Patrick Lerner - [email protected]" | |
} | |
while getopts "h?rondu:" opt; do | |
case "$opt" in | |
h|\?) | |
show_help | |
exit 0 | |
;; | |
r) ruby=1 | |
;; | |
n) notvertical=1 | |
;; | |
u) url=$OPTARG | |
;; | |
o) open=1 | |
;; | |
d) download=1 | |
;; | |
esac | |
done | |
if [[ -z "$url" ]] ; then | |
echo "Call me with a valid url, dude." | |
exit 1 | |
fi | |
temp_dir=$(mktemp -d -t tmp.XXXXXXXXXX) | |
function finish { | |
rm -rf "${temp_dir}" | |
} | |
trap finish EXIT | |
echo "body { font-family: serif; } h2 { font-size: 120%; font-weight: bold; padding-top: 2em; margin-right: 1em; margin-left: 1em; } p { text-indent: 1em; } #newsDate { font-size: 90%; font-weight:bold; line-height: 1.5; }" >"${temp_dir}/file.css" | |
if [[ "$notvertical" -eq 0 ]] ; then | |
echo "body { -webkit-writing-mode: vertical-rl; } #newsDate { padding-top: 10em; text-indent: -4em;} " >>"${temp_dir}/file.css" | |
fi | |
removeRuby () { | |
utf8sed 's/<ruby>(.*?)<rt>(.*?)<\/rt><\/ruby>/$1/g' | |
} | |
getContentWithTitle() { | |
utf8sed 's/\n//' | | |
utf8sed 's/.*<div id="newstitle">(.*?)<\/div>.*<div id="newsarticle">(.*?)<\/div>.*/$1\n$2/s' | | |
utf8sed 's/<[\/]?(a|span).*?>//sg' | | |
utf8sed 's/^[ ]+//g' | | |
utf8sed "s/ [ ]+/\n/g" | | |
utf8sed 'tr/0-9/0-9/' | | |
utf8sed 's/h2/h2/g' | |
} | |
getTitle() { | |
utf8sed 's/\n//' | | |
utf8sed 's/.*<div id="newstitle">.*?<h2>(.*?)<\/h2>.*?<\/div>.*/$1/s' | | |
utf8sed 's/<[\/]?(a|span).*?>//sg' | | |
utf8sed 's/^[ ]+//g' | | |
utf8sed "s/ [ ]+/\n/g" | | |
utf8sed 'tr/0-9/0-9/' | | |
utf8sed 's/h2/h2/g' | |
} | |
content="$(wget -q -O - "${url}")" | |
title="$(echo "$content" | getTitle | removeRuby)" | |
# opf file | |
echo "<?xml version=\"1.0\" encoding=\"UTF-8\"?> | |
<package version=\"3.0\" xmlns=\"http://www.idpf.org/2007/opf\" | |
unique-identifier=\"BookId\"> | |
<metadata xmlns:dc=\"http://purl.org/dc/elements/1.1/\" | |
xmlns:dcterms=\"http://purl.org/dc/terms/\"> | |
<dc:title>${title}</dc:title> | |
<dc:contributor>NHK</dc:contributor> | |
<dc:language>ja</dc:language> | |
<dc:publisher>NHK</dc:publisher> | |
</metadata> | |
<manifest> | |
<item id=\"style\" href=\"file.css\" media-type=\"text/css\" /> | |
<item id=\"titlepage\" href=\"file.html\" media-type=\"application/xhtml+xml\" /> | |
</manifest> | |
<spine toc=\"tocncx\" page-progression-direction=\"rtl\"> | |
<itemref idref=\"titlepage\" /> | |
</spine> | |
</package>" >"${temp_dir}/file.opf" | |
# opf file end | |
fileContent="$(echo "$content" | getContentWithTitle)" | |
if [[ "$ruby" -eq "0" ]] ; then | |
fileContent=$(echo "$fileContent" | removeRuby) | |
fi | |
echo "$fileContent" > "${temp_dir}/file.proto.md" | |
pandoc "${temp_dir}/file.proto.md" -c "${temp_dir}/file.css" -o "${temp_dir}/file.html" | |
fileContent=$(cat "${temp_dir}/file.html") | |
echo "$fileContent" | | |
sed "s/<title>/<title>${title}/" | | |
sed "s/<\/head>/<link rel=\"Schema\.DC\" href=\"http:\/\/purl\.org\/dc\/elements\/1\.1\/\" \/><meta name=\"DC\.Title\" content=\"${title}\" \/><meta name=\"DC\.Creator\" content=\"NHK\" \/><meta name=\"DC\.Publisher\" content=\"NHK\" \/><\/head>/" | | |
sed "s/<html /<html xml:lang=\"ja\" /" > "${temp_dir}/file.html" | |
kindlegen "${temp_dir}/file.opf" | |
cp "${temp_dir}/file.mobi" "${title}.mobi" | |
if [[ "$open" -eq "1" ]] ; then | |
killall Kindle | |
rm -rf "$HOME/Library/Application Support/Kindle/My Kindle Content/${title}.mobi" | |
open "${title}.mobi" | |
fi | |
if [[ "$download" -eq "1" ]] ; then | |
wget -c "$(echo "${url}" | sed 's/html/mp3/')" -O "${title}.mp3" | |
fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
perl -Mutf8 -CSAD -pe "$1" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment