Created
December 16, 2018 09:56
-
-
Save ariankordi/ac7abb601c07955dd0306393b18e8de8 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
# This script scrapes Crunchyroll to get an m3u8 and caption file. | |
# It then uses ffmpeg to download + mux it all into a single mp4. | |
# It uses awk for EVERYTHING. Sorry? I really wanted to use Python or something | |
# but that would make it into just a Python thing, or Node or whatever. | |
# This was painful though because I hate awk. | |
# See if argv has enough params | |
if [ $# -lt 2 ]; then | |
echo "Usage:" $0 "[Enter the Crunchyroll episode URL here, the one you use to view it with in the browser]" "[Put output mp4 file here]" | |
exit 0 | |
fi | |
# Let's proceed using $1 as the URL | |
# IF THIS WORKS, this will be two (https) urls glued together, the first one as the m3u8 and the second one as the English (US) caption file | |
urls=$(wget -qO- $1 | awk '{split($0, a, ".media = "); split(a[2], b, "hls\",\"audio_lang\":\"jaJP\",\"hardsub_lang\":null,\"url\":\""); split(b[2], c, "\",\"res"); gsub(/\\/, "", c[1]); split($0, d, "\"language\":\"enUS\",\"url\":\""); split(d[2], e, "\",\"title\":\"En"); gsub(/\\/, "", e[1]); printf c[1]; printf e[1]}') | |
# If the above doesn't work, then it will probably be a line feed, but let's check the length anyway | |
if [ ${#urls} -lt 2 ]; then | |
echo "Oops, the awk didn't work." | |
echo "This could be because:" | |
printf "\t* The video didn't have BOTH Japanese dub/no-sub AND English (US) subtitles\n" | |
printf "\t* The awk just plain sucks and failed to parse the page\n" | |
printf "\t* Crunchyroll changed something on their page\n" | |
printf "\t* You didn't link the right page\n" | |
printf "\t* You don't have awk or wget installed for some reason (wget is more lightweight than curl)\n" | |
printf "\t* Or, the page just failed to load (try diagnosing this with wget)\n" | |
exit 1 | |
fi | |
# $urls is valid at this point, so let's separate the URLs... | |
# $url1 will be the m3u8, and $url2 will be the captions. | |
# We're using awk to separate these as well because I'm stupid. | |
url1=$(echo $urls | awk '{split($0, a, "https://"); printf "https://" a[2]}') | |
url2=$(echo $urls | awk '{split($0, a, "https://"); printf "https://" a[3]}') | |
# We don't need $urls anymore, so unset it... | |
unset urls | |
# $2 is the output MP4. | |
# Let's print some bold text informing you that it's running ffmpeg now. | |
printf "\n\033[1mGot URLs, running the underlying ffmpeg now!\033[0m\n\n" | |
# okay now just run ffmpeg and die | |
ffmpeg -i $url1 -i $url2 -c copy -c:s mov_text $2 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment