abdullahkhalids · August 8, 2013 21:46
diff --git a/send2kindle b/send2kindle
 #!/bin/bash

 #this bash script takes links from an rss feed and sends
 #.mobi files to an address (usually your kindle email)
 #for easy reading.

 #Usage is to set the rss url below and run the script

 #Dependencies
 # https://launchpad.net/ubuntu/precise/+package/html-xml-utils
 # Calibre
 # possibly more

 # this script has Kippt specific tuneups. Might not work for other feeds
 # creates files in the same directory. Leave them be.


 ### Define some constants
 rssUrl=""

 rssFile="rssFile"
 newLinksFile="newLinks"
 processListFile="processList"
 processedListFile="processedList"
 rawLinksFile="rawLinks"
 downloadFolder="html"


 ### Define some functions

 getRss () {
 # Gets an rss feed and writes to a file
 rssUrl=$1
 rssFile=$2
 wget --no-verbose --output-document=$rssFile $rssUrl
 }

 extractLinks () {
 # Extracts links from an rss file in a clean format and writes to file
 rssFile=$1
 linksFile=$2
 rawLinksFile=$3

 hxextract link $rssFile > $rawLinksFile
 # 1. remove kippt.com link 2. remove the closing tag and put on new lines 3. remove the opening tag
 sed 's/<link>http:\/\/kippt.com<\/link>//' $rawLinksFile | sed 's/<\/link>/\n/g' | sed 's/<link>//g'  > $linksFile
 }

 isProcessed () {
 # Checks if the url is present in the file
 url=$1
 linksFile=$2

 alreadyProcessed="0"
  while read aurl; do
   if [ $url == $aurl ]
    then
     alreadyProcessed="1"
   fi
  done < $linksFile
 }

 addProcesses () {
 # Takes urls in newlinks file and adds them to the processList file
 # if not already not in the processedList file. 
 newLinksFile=$1
 processListFile=$2
 processedListFile=$3
 while read newurl; do
  isProcessed $newurl $processListFile
  if [ $alreadyProcessed == "0" ]
   then
      isProcessed $newurl $processedListFile
    if [ $alreadyProcessed == "0" ]
     then
      echo "$newurl" >> $processListFile
    fi
  fi
 done < $newLinksFile
 }

 processUrl () {
 # Takes a url and tries to send it to the kindle
 # exit code is 0 only if sent
 url=$1
 #Get the page off the web
 echo "Getting url $url"
 wget -e robots=off --no-verbose --page-requisites --convert-links --directory-prefix=html --adjust-extension --html-extension --no-directories  --ignore-tags=a,area,iframe,script $url
 # if [ $? != 0 ]
 #  then
 #   echo "Failed to get link $url"
 #   exit 1
 # fi

 #Get the html name
 htmlFileName=$(ls $downloadFolder | grep .htm)
 oneFileCheck=$(ls $downloadFolder | grep $htmlFileName)
 if [ ! $oneFileCheck ]
  then
   echo "Leads to multiple html files $url"
   exit 2
 fi
 htmlFilePath="./$downloadFolder/$htmlFileName"
 echo "Path to html file is $htmlFilePath"

 #Convert it to .mobi
 echo "Begin conversion to .mobi"
 title=$(hxextract title $htmlFilePath | sed 's/<[^>]*>//g' | sed 's/[!@#\$%^&*()".:?;]//g' | sed "s/'//g" |  sed ':a;N;$!ba;s/\n/ /g' | sed 's/ //g')
 mobiFile="$title.mobi"
 echo "Mobi file will be name $mobiFile"
 ebook-convert $htmlFilePath $mobiFile --output-profile=kindle --dont-compress --no-inline-toc --pretty-print --max-levels=0

 if [ $? != 0 ] 
  then
   echo "ebook-convert failed on $url"
   exit 3
 fi

 #Send to kindle
 echo "Sending to your Kindle"
 calibre-smtp --relay smtp.gmail.com \
              --port 587  \
              --username abd.kindle \
              --password testable \
              --attachment $mobiFile \
              --subject "" \
              [email protected] \
              [email protected] \
              "."

 if [ $? != 0 ]
  then
   echo "Could not send by email $url"
   exit 4
 fi
 echo "Done"

 # fix this
 rm -r html
 # rm $mobiFile
 }

 runProcesses () {
 # Takes a process list and processes all links in it
 processListFile=$1
 processedListFile=$2
 while read url; do
  processUrl $url
  # if successful, add to processed list
 #  echo "$url"
  if [ $? == 0 ]
   then
    echo "$url" >> $processedListFile
    sed -i "\;$url;d" $processListFile
  fi
 done < $processListFile
 }

 ### Begin script
 echo "Let's start..."

 # Make sure files exist
 echo "Creating files needed"

 if [ ! -f $processListFile ]
 then
  printf "%s" "" >> $processListFile
 fi

 if [ ! -f $processedListFile ]
 then
  printf "%s" "" >> $processedListFile
 fi

 rm -r $downloadFolder

 # Get rss file
 echo "Getting rss"
 getRss $rssUrl $rssFile

 # if fail to get file, exit
 if [ $? != 0 ]
 then
  echo "Failed to get rss file"
  exit 1
 fi

 # Process the rss to extract the links
 echo "Extracting links"
 extractLinks $rssFile $newLinksFile $rawLinksFile

 # Add links to to be processed list
 echo "Finding new links"
 addProcesses $newLinksFile $processListFile $processedListFile

 # Now start processing
 echo "Begin run processes"
 runProcesses $processListFile $processedListFile
	#!/bin/bash

	#this bash script takes links from an rss feed and sends
	#.mobi files to an address (usually your kindle email)
	#for easy reading.

	#Usage is to set the rss url below and run the script

	#Dependencies
	# https://launchpad.net/ubuntu/precise/+package/html-xml-utils
	# Calibre
	# possibly more

	# this script has Kippt specific tuneups. Might not work for other feeds
	# creates files in the same directory. Leave them be.


	### Define some constants
	rssUrl=""

	rssFile="rssFile"
	newLinksFile="newLinks"
	processListFile="processList"
	processedListFile="processedList"
	rawLinksFile="rawLinks"
	downloadFolder="html"


	### Define some functions

	getRss () {
	# Gets an rss feed and writes to a file
	rssUrl=$1
	rssFile=$2
	wget --no-verbose --output-document=$rssFile $rssUrl
	}

	extractLinks () {
	# Extracts links from an rss file in a clean format and writes to file
	rssFile=$1
	linksFile=$2
	rawLinksFile=$3

	hxextract link $rssFile > $rawLinksFile
	# 1. remove kippt.com link 2. remove the closing tag and put on new lines 3. remove the opening tag
	sed 's/<link>http:\/\/kippt.com<\/link>//' $rawLinksFile \| sed 's/<\/link>/\n/g' \| sed 's/<link>//g' > $linksFile
	}

	isProcessed () {
	# Checks if the url is present in the file
	url=$1
	linksFile=$2

	alreadyProcessed="0"
	while read aurl; do
	if [ $url == $aurl ]
	then
	alreadyProcessed="1"
	fi
	done < $linksFile
	}

	addProcesses () {
	# Takes urls in newlinks file and adds them to the processList file
	# if not already not in the processedList file.
	newLinksFile=$1
	processListFile=$2
	processedListFile=$3
	while read newurl; do
	isProcessed $newurl $processListFile
	if [ $alreadyProcessed == "0" ]
	then
	isProcessed $newurl $processedListFile
	if [ $alreadyProcessed == "0" ]
	then
	echo "$newurl" >> $processListFile
	fi
	fi
	done < $newLinksFile
	}

	processUrl () {
	# Takes a url and tries to send it to the kindle
	# exit code is 0 only if sent
	url=$1
	#Get the page off the web
	echo "Getting url $url"
	wget -e robots=off --no-verbose --page-requisites --convert-links --directory-prefix=html --adjust-extension --html-extension --no-directories --ignore-tags=a,area,iframe,script $url
	# if [ $? != 0 ]
	# then
	# echo "Failed to get link $url"
	# exit 1
	# fi

	#Get the html name
	htmlFileName=$(ls $downloadFolder \| grep .htm)
	oneFileCheck=$(ls $downloadFolder \| grep $htmlFileName)
	if [ ! $oneFileCheck ]
	then
	echo "Leads to multiple html files $url"
	exit 2
	fi
	htmlFilePath="./$downloadFolder/$htmlFileName"
	echo "Path to html file is $htmlFilePath"

	#Convert it to .mobi
	echo "Begin conversion to .mobi"
	title=$(hxextract title $htmlFilePath \| sed 's/<[^>]>//g' \| sed 's/[!@#\$%^&()".:?;]//g' \| sed "s/'//g" \| sed ':a;N;$!ba;s/\n/ /g' \| sed 's/ //g')
	mobiFile="$title.mobi"
	echo "Mobi file will be name $mobiFile"
	ebook-convert $htmlFilePath $mobiFile --output-profile=kindle --dont-compress --no-inline-toc --pretty-print --max-levels=0

	if [ $? != 0 ]
	then
	echo "ebook-convert failed on $url"
	exit 3
	fi

	#Send to kindle
	echo "Sending to your Kindle"
	calibre-smtp --relay smtp.gmail.com \
	--port 587 \
	--username abd.kindle \
	--password testable \
	--attachment $mobiFile \
	--subject "" \
	[email protected] \
	[email protected] \
	"."

	if [ $? != 0 ]
	then
	echo "Could not send by email $url"
	exit 4
	fi
	echo "Done"

	# fix this
	rm -r html
	# rm $mobiFile
	}

	runProcesses () {
	# Takes a process list and processes all links in it
	processListFile=$1
	processedListFile=$2
	while read url; do
	processUrl $url
	# if successful, add to processed list
	# echo "$url"
	if [ $? == 0 ]
	then
	echo "$url" >> $processedListFile
	sed -i "\;$url;d" $processListFile
	fi
	done < $processListFile
	}

	### Begin script
	echo "Let's start..."

	# Make sure files exist
	echo "Creating files needed"

	if [ ! -f $processListFile ]
	then
	printf "%s" "" >> $processListFile
	fi

	if [ ! -f $processedListFile ]
	then
	printf "%s" "" >> $processedListFile
	fi

	rm -r $downloadFolder

	# Get rss file
	echo "Getting rss"
	getRss $rssUrl $rssFile

	# if fail to get file, exit
	if [ $? != 0 ]
	then
	echo "Failed to get rss file"
	exit 1
	fi

	# Process the rss to extract the links
	echo "Extracting links"
	extractLinks $rssFile $newLinksFile $rawLinksFile

	# Add links to to be processed list
	echo "Finding new links"
	addProcesses $newLinksFile $processListFile $processedListFile

	# Now start processing
	echo "Begin run processes"
	runProcesses $processListFile $processedListFile