Created
August 8, 2013 21:46
-
-
Save abdullahkhalids/6189074 to your computer and use it in GitHub Desktop.
This bash script takes links from an rss feed and sends .mobi files to an address (usually your kindle email) for easy reading. It has some Kippt specific tuneups, so might not easily work with other feeds.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
#this bash script takes links from an rss feed and sends | |
#.mobi files to an address (usually your kindle email) | |
#for easy reading. | |
#Usage is to set the rss url below and run the script | |
#Dependencies | |
# https://launchpad.net/ubuntu/precise/+package/html-xml-utils | |
# Calibre | |
# possibly more | |
# this script has Kippt specific tuneups. Might not work for other feeds | |
# creates files in the same directory. Leave them be. | |
### Define some constants | |
rssUrl="" | |
rssFile="rssFile" | |
newLinksFile="newLinks" | |
processListFile="processList" | |
processedListFile="processedList" | |
rawLinksFile="rawLinks" | |
downloadFolder="html" | |
### Define some functions | |
getRss () { | |
# Gets an rss feed and writes to a file | |
rssUrl=$1 | |
rssFile=$2 | |
wget --no-verbose --output-document=$rssFile $rssUrl | |
} | |
extractLinks () { | |
# Extracts links from an rss file in a clean format and writes to file | |
rssFile=$1 | |
linksFile=$2 | |
rawLinksFile=$3 | |
hxextract link $rssFile > $rawLinksFile | |
# 1. remove kippt.com link 2. remove the closing tag and put on new lines 3. remove the opening tag | |
sed 's/<link>http:\/\/kippt.com<\/link>//' $rawLinksFile | sed 's/<\/link>/\n/g' | sed 's/<link>//g' > $linksFile | |
} | |
isProcessed () { | |
# Checks if the url is present in the file | |
url=$1 | |
linksFile=$2 | |
alreadyProcessed="0" | |
while read aurl; do | |
if [ $url == $aurl ] | |
then | |
alreadyProcessed="1" | |
fi | |
done < $linksFile | |
} | |
addProcesses () { | |
# Takes urls in newlinks file and adds them to the processList file | |
# if not already not in the processedList file. | |
newLinksFile=$1 | |
processListFile=$2 | |
processedListFile=$3 | |
while read newurl; do | |
isProcessed $newurl $processListFile | |
if [ $alreadyProcessed == "0" ] | |
then | |
isProcessed $newurl $processedListFile | |
if [ $alreadyProcessed == "0" ] | |
then | |
echo "$newurl" >> $processListFile | |
fi | |
fi | |
done < $newLinksFile | |
} | |
processUrl () { | |
# Takes a url and tries to send it to the kindle | |
# exit code is 0 only if sent | |
url=$1 | |
#Get the page off the web | |
echo "Getting url $url" | |
wget -e robots=off --no-verbose --page-requisites --convert-links --directory-prefix=html --adjust-extension --html-extension --no-directories --ignore-tags=a,area,iframe,script $url | |
# if [ $? != 0 ] | |
# then | |
# echo "Failed to get link $url" | |
# exit 1 | |
# fi | |
#Get the html name | |
htmlFileName=$(ls $downloadFolder | grep .htm) | |
oneFileCheck=$(ls $downloadFolder | grep $htmlFileName) | |
if [ ! $oneFileCheck ] | |
then | |
echo "Leads to multiple html files $url" | |
exit 2 | |
fi | |
htmlFilePath="./$downloadFolder/$htmlFileName" | |
echo "Path to html file is $htmlFilePath" | |
#Convert it to .mobi | |
echo "Begin conversion to .mobi" | |
title=$(hxextract title $htmlFilePath | sed 's/<[^>]*>//g' | sed 's/[!@#\$%^&*()".:?;]//g' | sed "s/'//g" | sed ':a;N;$!ba;s/\n/ /g' | sed 's/ //g') | |
mobiFile="$title.mobi" | |
echo "Mobi file will be name $mobiFile" | |
ebook-convert $htmlFilePath $mobiFile --output-profile=kindle --dont-compress --no-inline-toc --pretty-print --max-levels=0 | |
if [ $? != 0 ] | |
then | |
echo "ebook-convert failed on $url" | |
exit 3 | |
fi | |
#Send to kindle | |
echo "Sending to your Kindle" | |
calibre-smtp --relay smtp.gmail.com \ | |
--port 587 \ | |
--username abd.kindle \ | |
--password testable \ | |
--attachment $mobiFile \ | |
--subject "" \ | |
[email protected] \ | |
[email protected] \ | |
"." | |
if [ $? != 0 ] | |
then | |
echo "Could not send by email $url" | |
exit 4 | |
fi | |
echo "Done" | |
# fix this | |
rm -r html | |
# rm $mobiFile | |
} | |
runProcesses () { | |
# Takes a process list and processes all links in it | |
processListFile=$1 | |
processedListFile=$2 | |
while read url; do | |
processUrl $url | |
# if successful, add to processed list | |
# echo "$url" | |
if [ $? == 0 ] | |
then | |
echo "$url" >> $processedListFile | |
sed -i "\;$url;d" $processListFile | |
fi | |
done < $processListFile | |
} | |
### Begin script | |
echo "Let's start..." | |
# Make sure files exist | |
echo "Creating files needed" | |
if [ ! -f $processListFile ] | |
then | |
printf "%s" "" >> $processListFile | |
fi | |
if [ ! -f $processedListFile ] | |
then | |
printf "%s" "" >> $processedListFile | |
fi | |
rm -r $downloadFolder | |
# Get rss file | |
echo "Getting rss" | |
getRss $rssUrl $rssFile | |
# if fail to get file, exit | |
if [ $? != 0 ] | |
then | |
echo "Failed to get rss file" | |
exit 1 | |
fi | |
# Process the rss to extract the links | |
echo "Extracting links" | |
extractLinks $rssFile $newLinksFile $rawLinksFile | |
# Add links to to be processed list | |
echo "Finding new links" | |
addProcesses $newLinksFile $processListFile $processedListFile | |
# Now start processing | |
echo "Begin run processes" | |
runProcesses $processListFile $processedListFile |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment