Created
December 2, 2017 18:09
-
-
Save fiskr/de3f9e6b902fb89021b747cd1433ce0b to your computer and use it in GitHub Desktop.
Download How We Roll episodes from RSS feed with better titles.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Related one-line script: https://gist.github.com/fiskr/b0c01bbcbd2aefaa726d46bea30a3307 | |
dataFile="urls.dat" | |
rssUrl="https://rss.simplecast.com/podcasts/3236/rss" # How We Roll podcast | |
# Download the RSS feed for episodes of the How We Roll Podcast | |
# then, retrieve from the HTML just the episode titles and URLs | |
# then, exclude subtitles, since we don't need those | |
# then, parse the URL from the attribute of the node | |
# then, delete all node tags, e.g. <title> | |
# then, get rid of unnecessary, leading whitespace | |
# then, get the title and URL separated only by a "|" character | |
# then, break them up into individual lines for each URL, in the format of "title | url" | |
# finally, clean up any lines that had no URLs in them by removing up to the last "X | Y" instance | |
curl $rssUrl | \ | |
grep -E "title|mp3" | \ | |
grep -v subtitle | \ | |
perl -pe 's/.*?url="([^"]+)".*/$1/g' | \ | |
perl -pe 's/<.*?>//g' | \ | |
perl -pe 's/^ //g' | \ | |
perl -pe 's/\n/ | /g' | \ | |
perl -pe 's/mp3 \| /mp3 \n/g' | \ | |
perl -pe 's/.*? \| ([^\|]+? \| [^\|]+)$/$1/g' \ | |
> $dataFile \ | |
IFS=$'\n' # make newlines the only separator | |
for line in $(cat ./$dataFile); do | |
echo "$line" | |
fileName="$(echo $line | perl -pe 's/^(.+?) \|.*?$/$1/g' | perl -pe 's/[\s]/_/g' | perl -pe 's/_-_/-/g' | perl -pe 's/__/_/g' | perl -pe 's/_-/-/g' | perl -pe 's/_$//g').mp3" | |
echo "Filename: $fileName" | |
url="$(echo $line | perl -pe 's/.*? \| (.*?.mp3)[\s]*?$/$1/g')" | |
echo "URL: $url" | |
wget -O $fileName $url | |
done | |
# Delete the temporary file with titles and URLs | |
rm $dataFile |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment