ariankordi · December 16, 2018 09:56
diff --git a/crunchyroll-lmao.sh b/crunchyroll-lmao.sh
 #!/bin/sh
 # This script scrapes Crunchyroll to get an m3u8 and caption file.
 # It then uses ffmpeg to download + mux it all into a single mp4.

 # It uses awk for EVERYTHING. Sorry? I really wanted to use Python or something
 # but that would make it into just a Python thing, or Node or whatever.
 # This was painful though because I hate awk.

 # See if argv has enough params
 if [ $# -lt  2 ]; then
 	echo "Usage:" $0 "[Enter the Crunchyroll episode URL here, the one you use to view it with in the browser]" "[Put output mp4 file here]"
 	exit 0
 fi

 # Let's proceed using $1 as the URL
 # IF THIS WORKS, this will be two (https) urls glued together, the first one as the m3u8 and the second one as the English (US) caption file
 urls=$(wget -qO- $1 | awk '{split($0, a, ".media = "); split(a[2], b, "hls\",\"audio_lang\":\"jaJP\",\"hardsub_lang\":null,\"url\":\""); split(b[2], c, "\",\"res"); gsub(/\\/, "", c[1]); split($0, d, "\"language\":\"enUS\",\"url\":\""); split(d[2], e, "\",\"title\":\"En"); gsub(/\\/, "", e[1]); printf c[1]; printf e[1]}')
 # If the above doesn't work, then it will probably be a line feed, but let's check the length anyway
 if [ ${#urls} -lt 2 ]; then
 	echo "Oops, the awk didn't work."
 	echo "This could be because:"
 	printf "\t* The video didn't have BOTH Japanese dub/no-sub AND English (US) subtitles\n"
 	printf "\t* The awk just plain sucks and failed to parse the page\n"
 	printf "\t* Crunchyroll changed something on their page\n"
 	printf "\t* You didn't link the right page\n"
 	printf "\t* You don't have awk or wget installed for some reason (wget is more lightweight than curl)\n"
 	printf "\t* Or, the page just failed to load (try diagnosing this with wget)\n"
 	exit 1
 fi

 # $urls is valid at this point, so let's separate the URLs...
 # $url1 will be the m3u8, and $url2 will be the captions.
 # We're using awk to separate these as well because I'm stupid.
 url1=$(echo $urls | awk '{split($0, a, "https://"); printf "https://" a[2]}')
 url2=$(echo $urls | awk '{split($0, a, "https://"); printf "https://" a[3]}')
 # We don't need $urls anymore, so unset it...
 unset urls

 # $2 is the output MP4.
 # Let's print some bold text informing you that it's running ffmpeg now.
 printf "\n\033[1mGot URLs, running the underlying ffmpeg now!\033[0m\n\n"

 # okay now just run ffmpeg and die
 ffmpeg -i $url1 -i $url2 -c copy -c:s mov_text $2
	#!/bin/sh
	# This script scrapes Crunchyroll to get an m3u8 and caption file.
	# It then uses ffmpeg to download + mux it all into a single mp4.

	# It uses awk for EVERYTHING. Sorry? I really wanted to use Python or something
	# but that would make it into just a Python thing, or Node or whatever.
	# This was painful though because I hate awk.

	# See if argv has enough params
	if [ $# -lt 2 ]; then
	echo "Usage:" $0 "[Enter the Crunchyroll episode URL here, the one you use to view it with in the browser]" "[Put output mp4 file here]"
	exit 0
	fi

	# Let's proceed using $1 as the URL
	# IF THIS WORKS, this will be two (https) urls glued together, the first one as the m3u8 and the second one as the English (US) caption file
	urls=$(wget -qO- $1 \| awk '{split($0, a, ".media = "); split(a[2], b, "hls\",\"audio_lang\":\"jaJP\",\"hardsub_lang\":null,\"url\":\""); split(b[2], c, "\",\"res"); gsub(/\\/, "", c[1]); split($0, d, "\"language\":\"enUS\",\"url\":\""); split(d[2], e, "\",\"title\":\"En"); gsub(/\\/, "", e[1]); printf c[1]; printf e[1]}')
	# If the above doesn't work, then it will probably be a line feed, but let's check the length anyway
	if [ ${#urls} -lt 2 ]; then
	echo "Oops, the awk didn't work."
	echo "This could be because:"
	printf "\t* The video didn't have BOTH Japanese dub/no-sub AND English (US) subtitles\n"
	printf "\t* The awk just plain sucks and failed to parse the page\n"
	printf "\t* Crunchyroll changed something on their page\n"
	printf "\t* You didn't link the right page\n"
	printf "\t* You don't have awk or wget installed for some reason (wget is more lightweight than curl)\n"
	printf "\t* Or, the page just failed to load (try diagnosing this with wget)\n"
	exit 1
	fi

	# $urls is valid at this point, so let's separate the URLs...
	# $url1 will be the m3u8, and $url2 will be the captions.
	# We're using awk to separate these as well because I'm stupid.
	url1=$(echo $urls \| awk '{split($0, a, "https://"); printf "https://" a[2]}')
	url2=$(echo $urls \| awk '{split($0, a, "https://"); printf "https://" a[3]}')
	# We don't need $urls anymore, so unset it...
	unset urls

	# $2 is the output MP4.
	# Let's print some bold text informing you that it's running ffmpeg now.
	printf "\n\033[1mGot URLs, running the underlying ffmpeg now!\033[0m\n\n"

	# okay now just run ffmpeg and die
	ffmpeg -i $url1 -i $url2 -c copy -c:s mov_text $2