Skip to content

Instantly share code, notes, and snippets.

@abdullahkhalids
Created August 8, 2013 21:46
Show Gist options
  • Save abdullahkhalids/6189074 to your computer and use it in GitHub Desktop.
Save abdullahkhalids/6189074 to your computer and use it in GitHub Desktop.
This bash script takes links from an rss feed and sends .mobi files to an address (usually your kindle email) for easy reading. It has some Kippt specific tuneups, so might not easily work with other feeds.
#!/bin/bash
#this bash script takes links from an rss feed and sends
#.mobi files to an address (usually your kindle email)
#for easy reading.
#Usage is to set the rss url below and run the script
#Dependencies
# https://launchpad.net/ubuntu/precise/+package/html-xml-utils
# Calibre
# possibly more
# this script has Kippt specific tuneups. Might not work for other feeds
# creates files in the same directory. Leave them be.
### Define some constants
rssUrl=""
rssFile="rssFile"
newLinksFile="newLinks"
processListFile="processList"
processedListFile="processedList"
rawLinksFile="rawLinks"
downloadFolder="html"
### Define some functions
getRss () {
# Gets an rss feed and writes to a file
rssUrl=$1
rssFile=$2
wget --no-verbose --output-document=$rssFile $rssUrl
}
extractLinks () {
# Extracts links from an rss file in a clean format and writes to file
rssFile=$1
linksFile=$2
rawLinksFile=$3
hxextract link $rssFile > $rawLinksFile
# 1. remove kippt.com link 2. remove the closing tag and put on new lines 3. remove the opening tag
sed 's/<link>http:\/\/kippt.com<\/link>//' $rawLinksFile | sed 's/<\/link>/\n/g' | sed 's/<link>//g' > $linksFile
}
isProcessed () {
# Checks if the url is present in the file
url=$1
linksFile=$2
alreadyProcessed="0"
while read aurl; do
if [ $url == $aurl ]
then
alreadyProcessed="1"
fi
done < $linksFile
}
addProcesses () {
# Takes urls in newlinks file and adds them to the processList file
# if not already not in the processedList file.
newLinksFile=$1
processListFile=$2
processedListFile=$3
while read newurl; do
isProcessed $newurl $processListFile
if [ $alreadyProcessed == "0" ]
then
isProcessed $newurl $processedListFile
if [ $alreadyProcessed == "0" ]
then
echo "$newurl" >> $processListFile
fi
fi
done < $newLinksFile
}
processUrl () {
# Takes a url and tries to send it to the kindle
# exit code is 0 only if sent
url=$1
#Get the page off the web
echo "Getting url $url"
wget -e robots=off --no-verbose --page-requisites --convert-links --directory-prefix=html --adjust-extension --html-extension --no-directories --ignore-tags=a,area,iframe,script $url
# if [ $? != 0 ]
# then
# echo "Failed to get link $url"
# exit 1
# fi
#Get the html name
htmlFileName=$(ls $downloadFolder | grep .htm)
oneFileCheck=$(ls $downloadFolder | grep $htmlFileName)
if [ ! $oneFileCheck ]
then
echo "Leads to multiple html files $url"
exit 2
fi
htmlFilePath="./$downloadFolder/$htmlFileName"
echo "Path to html file is $htmlFilePath"
#Convert it to .mobi
echo "Begin conversion to .mobi"
title=$(hxextract title $htmlFilePath | sed 's/<[^>]*>//g' | sed 's/[!@#\$%^&*()".:?;]//g' | sed "s/'//g" | sed ':a;N;$!ba;s/\n/ /g' | sed 's/ //g')
mobiFile="$title.mobi"
echo "Mobi file will be name $mobiFile"
ebook-convert $htmlFilePath $mobiFile --output-profile=kindle --dont-compress --no-inline-toc --pretty-print --max-levels=0
if [ $? != 0 ]
then
echo "ebook-convert failed on $url"
exit 3
fi
#Send to kindle
echo "Sending to your Kindle"
calibre-smtp --relay smtp.gmail.com \
--port 587 \
--username abd.kindle \
--password testable \
--attachment $mobiFile \
--subject "" \
[email protected] \
[email protected] \
"."
if [ $? != 0 ]
then
echo "Could not send by email $url"
exit 4
fi
echo "Done"
# fix this
rm -r html
# rm $mobiFile
}
runProcesses () {
# Takes a process list and processes all links in it
processListFile=$1
processedListFile=$2
while read url; do
processUrl $url
# if successful, add to processed list
# echo "$url"
if [ $? == 0 ]
then
echo "$url" >> $processedListFile
sed -i "\;$url;d" $processListFile
fi
done < $processListFile
}
### Begin script
echo "Let's start..."
# Make sure files exist
echo "Creating files needed"
if [ ! -f $processListFile ]
then
printf "%s" "" >> $processListFile
fi
if [ ! -f $processedListFile ]
then
printf "%s" "" >> $processedListFile
fi
rm -r $downloadFolder
# Get rss file
echo "Getting rss"
getRss $rssUrl $rssFile
# if fail to get file, exit
if [ $? != 0 ]
then
echo "Failed to get rss file"
exit 1
fi
# Process the rss to extract the links
echo "Extracting links"
extractLinks $rssFile $newLinksFile $rawLinksFile
# Add links to to be processed list
echo "Finding new links"
addProcesses $newLinksFile $processListFile $processedListFile
# Now start processing
echo "Begin run processes"
runProcesses $processListFile $processedListFile
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment