fiskr · December 2, 2017 18:09
diff --git a/expanded_download-HowWeRoll.sh b/expanded_download-HowWeRoll.sh
 #!/bin/bash
 # Related one-line script: https://gist.github.com/fiskr/b0c01bbcbd2aefaa726d46bea30a3307
 dataFile="urls.dat"
 rssUrl="https://rss.simplecast.com/podcasts/3236/rss" # How We Roll podcast

 # Download the RSS feed for episodes of the How We Roll Podcast
 # then, retrieve from the HTML just the episode titles and URLs
 # then, exclude subtitles, since we don't need those
 # then, parse the URL from the attribute of the node
 # then, delete all node tags, e.g. <title>
 # then, get rid of unnecessary, leading whitespace
 # then, get the title and URL separated only by a "|" character
 # then, break them up into individual lines for each URL, in the format of "title | url"
 # finally, clean up any lines that had no URLs in them by removing up to the last "X | Y" instance
 curl $rssUrl | \
 	grep -E "title|mp3" | \
  grep -v subtitle | \
 	perl -pe 's/.*?url="([^"]+)".*/$1/g' 	| \
  perl -pe 's/<.*?>//g' 	| \
  perl -pe 's/^      //g'  | \
  perl -pe 's/\n/ | /g' | \
  perl -pe 's/mp3 \| /mp3 \n/g' | \
 	perl -pe 's/.*? \| ([^\|]+? \| [^\|]+)$/$1/g' \
 	> $dataFile \

 IFS=$'\n' # make newlines the only separator
 for line in $(cat ./$dataFile); do
    echo "$line"
 		fileName="$(echo $line | perl -pe 's/^(.+?) \|.*?$/$1/g' | perl -pe 's/[\s]/_/g' | perl -pe 's/_-_/-/g' | perl -pe 's/__/_/g' | perl -pe 's/_-/-/g' | perl -pe 's/_$//g').mp3"
 		echo "Filename: $fileName"
 		url="$(echo $line | perl -pe 's/.*? \| (.*?.mp3)[\s]*?$/$1/g')"
 		echo "URL: $url"
 		wget -O $fileName $url
 done

 # Delete the temporary file with titles and URLs
 rm $dataFile
	#!/bin/bash
	# Related one-line script: https://gist.github.com/fiskr/b0c01bbcbd2aefaa726d46bea30a3307
	dataFile="urls.dat"
	rssUrl="https://rss.simplecast.com/podcasts/3236/rss" # How We Roll podcast

	# Download the RSS feed for episodes of the How We Roll Podcast
	# then, retrieve from the HTML just the episode titles and URLs
	# then, exclude subtitles, since we don't need those
	# then, parse the URL from the attribute of the node
	# then, delete all node tags, e.g. <title>
	# then, get rid of unnecessary, leading whitespace
	# then, get the title and URL separated only by a "\|" character
	# then, break them up into individual lines for each URL, in the format of "title \| url"
	# finally, clean up any lines that had no URLs in them by removing up to the last "X \| Y" instance
	curl $rssUrl \| \
	grep -E "title\|mp3" \| \
	grep -v subtitle \| \
	perl -pe 's/.?url="([^"]+)"./$1/g' \| \
	perl -pe 's/<.*?>//g' \| \
	perl -pe 's/^ //g' \| \
	perl -pe 's/\n/ \| /g' \| \
	perl -pe 's/mp3 \\| /mp3 \n/g' \| \
	perl -pe 's/.*? \\| ([^\\|]+? \\| [^\\|]+)$/$1/g' \
	> $dataFile \

	IFS=$'\n' # make newlines the only separator
	for line in $(cat ./$dataFile); do
	echo "$line"
	fileName="$(echo $line \| perl -pe 's/^(.+?) \\|.*?$/$1/g' \| perl -pe 's/[\s]/_/g' \| perl -pe 's/_-_/-/g' \| perl -pe 's/__/_/g' \| perl -pe 's/_-/-/g' \| perl -pe 's/_$//g').mp3"
	echo "Filename: $fileName"
	url="$(echo $line \| perl -pe 's/.? \\| (.?.mp3)[\s]*?$/$1/g')"
	echo "URL: $url"
	wget -O $fileName $url
	done

	# Delete the temporary file with titles and URLs
	rm $dataFile