leoherzog · March 26, 2025 18:31
diff --git a/*Set-up.md b/*Set-up.md
diff --git a/crontab -e b/crontab -e
 # runs the script every 5m
 */5 * * * * sh $HOME/parse.sh >> $HOME/parse.log
diff --git a/parse.sh b/parse.sh
 #!/bin/bash
 # dependencies: wget, libxml-xpath-perl, xmlstarlet, imagemagick, and dropbox_uploader.sh from https://github.com/andreafabrizi/Dropbox-Uploader

 # check for dependencies
 command -v wget >/dev/null 2>&1 || { echo "$(tput setaf 1)Please install $(tput bold)wget$(tput sgr0)$(tput setaf 1) and make sure that the $(tput bold)wget$(tput sgr0)$(tput setaf 1) command is working$(tput sgr0)" >&2; exit 1; }
 command -v xpath >/dev/null 2>&1 || { echo "$(tput setaf 1)Please install $(tput bold)libxml-xpath-perl$(tput sgr0)$(tput setaf 1) and make sure that the $(tput bold)xpath$(tput sgr0)$(tput setaf 1) command is working$(tput sgr0)" >&2; exit 1; }
 command -v xmlstarlet >/dev/null 2>&1 || { echo "$(tput setaf 1)Please install $(tput bold)xmlstarlet$(tput sgr0)$(tput setaf 1) and make sure that the $(tput bold)xmlstarlet$(tput sgr0)$(tput setaf 1) command is working$(tput sgr0)" >&2; exit 1; }
 command -v mogrify >/dev/null 2>&1 || { echo "$(tput setaf 1)Please install $(tput bold)imagemagick$(tput sgr0)$(tput setaf 1) and make sure that the $(tput bold)mogrify$(tput sgr0)$(tput setaf 1) command is working$(tput sgr0)" >&2; exit 1; }

 # clean up from any old broken jobs
 rm -rf tmp

 # let's go

 feedurl='https://what-if.xkcd.com/feed.atom'

 mkdir tmp
 cd tmp

 wget -q -O feed.tmp "$feedurl"

 # edit these if you want to use a different feed
 # for example, engadget's feed at http://www.engadget.com/rss-full.xml would be
 # feedname=$(cat feed.tmp | xpath -q -e "//rss/channel/title/text()")
 # entrytitle=$(cat feed.tmp | xpath -q -e "//rss/channel/item/title/text()")
 # entryurl=$(cat feed.tmp | xpath -q -e "//rss/channel/item/link/text()")
 # etc...
 feedname=$(cat feed.tmp | xpath -q -e "//feed/title/text()")
 entrytitle=$(cat feed.tmp | xpath -q -e "//feed/entry[1]/title/text()")
 entryurl=$(cat feed.tmp | xpath -q -e "//feed/entry[1]/id/text()")
 entrycontent=$(cat feed.tmp | xpath -q -e "//feed/entry[1]/content/text()")
 entrydate=$(cat feed.tmp | xpath -q -e "//feed/entry[1]/published/text()")
 # i used these to make sure i was extracting properly
 #echo "$feedname"
 #echo "$entrytitle"
 #echo "$entryurl"
 #echo "$entrycontent"
 #echo "$entrydate"
 #echo "$entrynumber"

 # check to make sure the feed we downloaded is legit
 if [ -s feed.tmp ]; then
  echo "$(tput setaf 2)Fetched feed$(tput sgr0)"
 else
  echo "$(tput setaf 1)Problem fetching the feed. Quitting...$(tput sgr0)"
  cd ..
  rm -rf tmp
  exit 1
 fi

 # time to build a new html doc
 echo "<!DOCTYPE html><html><head><meta charset=\"utf-8\"><meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" /></head><body>" > tmp.html
 # grab the feed's content, feed it through the xmlstarlet fixer, replace all src="/blah/foo/pic.png" with src="pic.png", and change all png references to jpg :(
 echo "$entrycontent" | xmlstarlet unesc | sed -e 's#src="[^"]*/#src="#g' | sed -e 's#.png#.jpg#g' >> tmp.html
 echo "<p><i>Published in <a href=\"$entryurl\" target=\"_blank\">$feedname</a> at $entrydate</i></p>" >> tmp.html
 echo "</body></html>" >> tmp.html
 if [ -s tmp.html ]; then
    echo "$(tput setaf 2)New temporary HTML file built$(tput sgr0)"
  else
    echo "$(tput setaf 1)Problem building temporary HTML file. Quitting...$(tput sgr0)"
    cd ..
    rm -rf tmp
    exit 1
 fi

 # if there's no difference from last time to this time, clean up and exit
 if cmp -s "tmp.html" "../lastfetch.html"; then
  echo "$(tput setaf 3)The fetched/built html file is the same as last time. Quitting...$(tput sgr0)"
  cd ..
  rm -rf tmp
  exit 1
 fi

 # if you've reached this point in the script, then the downloaded and built file is new
 # copy this new one we've built to be the "last fetched" html...
 cp tmp.html ../lastfetch.html
 # ...and start the mobi building process
 mv tmp.html "$feedname: $entrytitle.html"

 # wget all of the images from the entry page
 wget --quiet --page-requisites --span-hosts --convert-links --no-directories "$entryurl"

 # kindles suck at turning transparent pngs into jpgs
 mogrify -colorspace sRGB -background white -format jpg -alpha remove ./*.png
 rm -rf ./*.png

 # move back up, build the book with kindlegen, move the book out of tmp, and delete the tmp directory
 # https://smile.amazon.com/gp/feature.html?docId=1000765211

 cd ..

 # download the kindlegen binary if you don't have it in the working directory
 if [ ! -s kindlegen ]; then
    echo "$(tput setaf 1)Kindlegen binary not found. Downloading now...$(tput sgr0)"
    wget http://kindlegen.s3.amazonaws.com/kindlegen_linux_2.6_i386_v2_9.tar.gz -O kindlegen.tar.gz
    tar -xzf kindlegen.tar.gz -C tmp
    rm kindlegen.tar.gz
    mv tmp/kindlegen .
 fi

 ./kindlegen -c0 -o "$entrytitle".mobi tmp/"$feedname: $entrytitle.html"
 mv tmp/"$entrytitle".mobi .
 rm -rf tmp

 if [ -s "$entrytitle".mobi ]; then
  echo "$(tput setaf 2)Book built!$(tput sgr0)"
 else
  echo "$(tput setaf 1)Something went wrong!$(tput sgr0)"
  exit 1
 fi

 # send a pushbullet notification that it worked, if you want
 #pbtitle="New $feedname Article Generated"
 #pbbody="Article Name: $entrytitle at $entryurl"

 # you'll need a pushbullet api key from https://www.pushbullet.com/#settings/account
 # curl --silent \
 #   --header 'Access-Token: YOUR_PUSHBULLET_API_KEY' \
 #   --header 'Content-Type: application/json' \
 #   --data-binary '{"title":"'"$pbtitle"'","body":"'"$pbbody"'","type":"note"}' \
 #   --request POST \
 #   https://api.pushbullet.com/v2/pushes > /dev/null

 # upload the new file to dropbox folder
 ./dropbox_uploader.sh upload "$entrytitle".mobi "/To Kindle"
 echo "$(tput setaf 2)Book File Uploaded!$(tput sgr0)"
 echo "$(tput setaf 2)All done!$(tput sgr0)"
	# runs the script every 5m
	/5 * * * sh $HOME/parse.sh >> $HOME/parse.log
	#!/bin/bash
	# dependencies: wget, libxml-xpath-perl, xmlstarlet, imagemagick, and dropbox_uploader.sh from https://github.com/andreafabrizi/Dropbox-Uploader

	# check for dependencies
	command -v wget >/dev/null 2>&1 \|\| { echo "$(tput setaf 1)Please install $(tput bold)wget$(tput sgr0)$(tput setaf 1) and make sure that the $(tput bold)wget$(tput sgr0)$(tput setaf 1) command is working$(tput sgr0)" >&2; exit 1; }
	command -v xpath >/dev/null 2>&1 \|\| { echo "$(tput setaf 1)Please install $(tput bold)libxml-xpath-perl$(tput sgr0)$(tput setaf 1) and make sure that the $(tput bold)xpath$(tput sgr0)$(tput setaf 1) command is working$(tput sgr0)" >&2; exit 1; }
	command -v xmlstarlet >/dev/null 2>&1 \|\| { echo "$(tput setaf 1)Please install $(tput bold)xmlstarlet$(tput sgr0)$(tput setaf 1) and make sure that the $(tput bold)xmlstarlet$(tput sgr0)$(tput setaf 1) command is working$(tput sgr0)" >&2; exit 1; }
	command -v mogrify >/dev/null 2>&1 \|\| { echo "$(tput setaf 1)Please install $(tput bold)imagemagick$(tput sgr0)$(tput setaf 1) and make sure that the $(tput bold)mogrify$(tput sgr0)$(tput setaf 1) command is working$(tput sgr0)" >&2; exit 1; }

	# clean up from any old broken jobs
	rm -rf tmp

	# let's go

	feedurl='https://what-if.xkcd.com/feed.atom'

	mkdir tmp
	cd tmp

	wget -q -O feed.tmp "$feedurl"

	# edit these if you want to use a different feed
	# for example, engadget's feed at http://www.engadget.com/rss-full.xml would be
	# feedname=$(cat feed.tmp \| xpath -q -e "//rss/channel/title/text()")
	# entrytitle=$(cat feed.tmp \| xpath -q -e "//rss/channel/item/title/text()")
	# entryurl=$(cat feed.tmp \| xpath -q -e "//rss/channel/item/link/text()")
	# etc...
	feedname=$(cat feed.tmp \| xpath -q -e "//feed/title/text()")
	entrytitle=$(cat feed.tmp \| xpath -q -e "//feed/entry[1]/title/text()")
	entryurl=$(cat feed.tmp \| xpath -q -e "//feed/entry[1]/id/text()")
	entrycontent=$(cat feed.tmp \| xpath -q -e "//feed/entry[1]/content/text()")
	entrydate=$(cat feed.tmp \| xpath -q -e "//feed/entry[1]/published/text()")
	# i used these to make sure i was extracting properly
	#echo "$feedname"
	#echo "$entrytitle"
	#echo "$entryurl"
	#echo "$entrycontent"
	#echo "$entrydate"
	#echo "$entrynumber"

	# check to make sure the feed we downloaded is legit
	if [ -s feed.tmp ]; then
	echo "$(tput setaf 2)Fetched feed$(tput sgr0)"
	else
	echo "$(tput setaf 1)Problem fetching the feed. Quitting...$(tput sgr0)"
	cd ..
	rm -rf tmp
	exit 1
	fi

	# time to build a new html doc
	echo "<!DOCTYPE html><html><head><meta charset=\"utf-8\"><meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" /></head><body>" > tmp.html
	# grab the feed's content, feed it through the xmlstarlet fixer, replace all src="/blah/foo/pic.png" with src="pic.png", and change all png references to jpg :(
	echo "$entrycontent" \| xmlstarlet unesc \| sed -e 's#src="[^"]*/#src="#g' \| sed -e 's#.png#.jpg#g' >> tmp.html
	echo "<p><i>Published in <a href=\"$entryurl\" target=\"_blank\">$feedname</a> at $entrydate</i></p>" >> tmp.html
	echo "</body></html>" >> tmp.html
	if [ -s tmp.html ]; then
	echo "$(tput setaf 2)New temporary HTML file built$(tput sgr0)"
	else
	echo "$(tput setaf 1)Problem building temporary HTML file. Quitting...$(tput sgr0)"
	cd ..
	rm -rf tmp
	exit 1
	fi

	# if there's no difference from last time to this time, clean up and exit
	if cmp -s "tmp.html" "../lastfetch.html"; then
	echo "$(tput setaf 3)The fetched/built html file is the same as last time. Quitting...$(tput sgr0)"
	cd ..
	rm -rf tmp
	exit 1
	fi

	# if you've reached this point in the script, then the downloaded and built file is new
	# copy this new one we've built to be the "last fetched" html...
	cp tmp.html ../lastfetch.html
	# ...and start the mobi building process
	mv tmp.html "$feedname: $entrytitle.html"

	# wget all of the images from the entry page
	wget --quiet --page-requisites --span-hosts --convert-links --no-directories "$entryurl"

	# kindles suck at turning transparent pngs into jpgs
	mogrify -colorspace sRGB -background white -format jpg -alpha remove ./*.png
	rm -rf ./*.png

	# move back up, build the book with kindlegen, move the book out of tmp, and delete the tmp directory
	# https://smile.amazon.com/gp/feature.html?docId=1000765211

	cd ..

	# download the kindlegen binary if you don't have it in the working directory
	if [ ! -s kindlegen ]; then
	echo "$(tput setaf 1)Kindlegen binary not found. Downloading now...$(tput sgr0)"
	wget http://kindlegen.s3.amazonaws.com/kindlegen_linux_2.6_i386_v2_9.tar.gz -O kindlegen.tar.gz
	tar -xzf kindlegen.tar.gz -C tmp
	rm kindlegen.tar.gz
	mv tmp/kindlegen .
	fi

	./kindlegen -c0 -o "$entrytitle".mobi tmp/"$feedname: $entrytitle.html"
	mv tmp/"$entrytitle".mobi .
	rm -rf tmp

	if [ -s "$entrytitle".mobi ]; then
	echo "$(tput setaf 2)Book built!$(tput sgr0)"
	else
	echo "$(tput setaf 1)Something went wrong!$(tput sgr0)"
	exit 1
	fi

	# send a pushbullet notification that it worked, if you want
	#pbtitle="New $feedname Article Generated"
	#pbbody="Article Name: $entrytitle at $entryurl"

	# you'll need a pushbullet api key from https://www.pushbullet.com/#settings/account
	# curl --silent \
	# --header 'Access-Token: YOUR_PUSHBULLET_API_KEY' \
	# --header 'Content-Type: application/json' \
	# --data-binary '{"title":"'"$pbtitle"'","body":"'"$pbbody"'","type":"note"}' \
	# --request POST \
	# https://api.pushbullet.com/v2/pushes > /dev/null

	# upload the new file to dropbox folder
	./dropbox_uploader.sh upload "$entrytitle".mobi "/To Kindle"
	echo "$(tput setaf 2)Book File Uploaded!$(tput sgr0)"
	echo "$(tput setaf 2)All done!$(tput sgr0)"