Created
October 13, 2013 17:37
-
-
Save marek-saji/6965017 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| # (c)2012, Marek `saji` Augustynowicz | |
| # Licensed under MIT License, http://marek-saji.mit-license.org | |
| # | |
| # Pass URL(s) as script's parameter(s). If no parameters passed, | |
| # script seeks for `urls` file in current directory and read input | |
| # from there. If no `urls` is to be found, read from stdin. | |
| # | |
| # Script tries to interpret lines as URLs and inteligently downloads them | |
| # (using youtube-dl(1), where it makes sense). If input line does not look like | |
| # a URL (does not contain protocol), line will be evaluated. | |
| # Lines containing URLs are removed after sucessufull download. | |
| YOUTUBEDL_RETRIES=3 | |
| SCRIPTNAME="$( basename "$0" .sh )" | |
| ## | |
| # printf an error message | |
| # | |
| # Params: | |
| # - same as printf(1) | |
| errorf () | |
| { | |
| FMT=$1 | |
| shift | |
| printf "\e[31mERROR\e[m $FMT\n" "$@" 1>&2 | |
| } | |
| ## | |
| # printf a success message | |
| # | |
| # Params: | |
| # - same as printf(1) | |
| successf () | |
| { | |
| FMT=$1 | |
| shift | |
| printf "\e[32mSUCCESS\e[m $FMT\n" "$@" | |
| } | |
| ## | |
| # printf a info message | |
| # | |
| # Params: | |
| # - same as printf(1) | |
| infof () | |
| { | |
| FMT=$1 | |
| shift | |
| printf "\e[33mINFO\e[m $FMT\n" "$@" | |
| } | |
| ## | |
| # printf a header | |
| # | |
| # Params: | |
| # - same as printf(1) | |
| headerf () | |
| { | |
| FMT=$1 | |
| shift | |
| printf "\e[30m\e[46m★ $FMT\e[m\n" "$@" | |
| } | |
| # call the script recusrsively for each argument | |
| if [ "$#" -gt "1" ] | |
| then | |
| STATUS=0 | |
| for FILE in "$@" | |
| do | |
| "$0" "$FILE" | |
| THIS_STATUS=$? | |
| if [ "$THIS_STATUS" -ne "0" ] | |
| then | |
| STATUS="$THIS_STATUS" | |
| #break | |
| fi | |
| done | |
| exit $STATUS | |
| fi | |
| ## | |
| # Set screen(1) title | |
| # | |
| # Params | |
| # - $1: Optional additional title | |
| # | |
| # Uses: | |
| # - $PWD | |
| settitle () | |
| { | |
| # set screen(1) window name | |
| if [ "$TERM" == "screen" ] | |
| then | |
| TITLE="($( dirs +0 ))" | |
| if [ -n "$1" ] | |
| then | |
| TITLE="$1 $TITLE" | |
| fi | |
| echo -ne "\ek$SCRIPTNAME $TITLE\e\\" | |
| fi | |
| } | |
| ## | |
| # Main function | |
| # | |
| # Params: | |
| # - $1: URL to download | |
| # | |
| # Uses: | |
| # - $FILE path to file, where URL came from, optional | |
| # - $BROKEN_URLS to store incorrect URLs | |
| get () | |
| { | |
| LINE="$1" | |
| headerf "%s" "$LINE" | |
| settitle | |
| # ignore empty lines and comments | |
| if [ -z "$LINE" ] || [ "#" = "${LINE:0:1}" ] | |
| then | |
| return | |
| fi | |
| # evaluate lines that do not look like url (start with a protocol) | |
| if ( echo "$LINE" | grep -vPq "^[a-z]+://" ) | |
| then | |
| infof "Evaluated." | |
| eval "${LINE}" | |
| return | |
| fi | |
| # check whether there is enough free space (at least 1G) | |
| DF=$( /bin/df . | tail -n1 | awk -F\ '{ print $4; }' ) | |
| DF_LIMIT=$(( 1 * 1024 * 1024 )) | |
| if [ $DF -lt $DF_LIMIT ] | |
| then | |
| errorf "Less than 1G free space." | |
| df -h . | |
| exit 3 | |
| fi | |
| # handle google's malware-detecting url | |
| if ( echo "$LINE" | grep -Pq '^ *(https?://)?(www\.)?google.[a-z.]+/url\?' ) | |
| then | |
| URL="$( curl --silent "$LINE" | grep -oP "[a-z]+://[^'\"]+" | tail -n1 )" | |
| infof "Resolved as \`%s'" "$URL" | |
| else | |
| URL="$LINE" | |
| fi | |
| # rtmp stream | |
| if ( echo "$LINE" | grep -Pq '^ *rtmp://' ) | |
| then | |
| FILENAME="$( basename "$LINE" )" | |
| infof "It's a RTMP stream. Will download to “%s”." "$FILENAME" | |
| rtmpdump -r "$LINE" > "$FILENAME" | |
| return | |
| fi | |
| HEADERS="$( curl --head --silent --location $CURL_HEAD_OPTIONS "$URL" )" | |
| # non-200s | |
| if ! ( echo "$HEADERS" | grep -q '^HTTP.*200' ) | |
| then | |
| if ( echo "$HEADERS" | grep -q '^HTTP' ) | |
| then | |
| errorf "Got %s" "$( echo "$HEADERS" | grep '^HTTP' )" | |
| else | |
| errorf "Unable to connect?" | |
| fi | |
| BROKEN_URLS="$BROKEN_URLS\n$URL" | |
| return | |
| fi | |
| MIME="$( echo "$HEADERS" | grep '^Content-Type' | tail -n1 | cut -d\ -f2 | cut -d\; -f1 | sed -re 's/^\s+|\s+$//g' )" | |
| if [ "$MIME" = "text/html" ] | |
| then | |
| TITLE="$( youtube-dl --get-title --max-download=1 "$URL" 2>/dev/null )" | |
| echo $TITLE | |
| if [ -n "$TITLE" ] | |
| then | |
| infof "It's a “%s” video" "$TITLE" | |
| settitle "$TITLE" | |
| for STRIKE in $( seq $YOUTUBEDL_RETRIES ) | |
| do | |
| if ( youtube-dl --continue --console-title --restrict-filenames --output "%(uploader)s - %(title)s %(id)s.%(ext)s" $YOUTUBEDL_OPTIONS "$URL" ) | |
| then | |
| completed "$LINE" "$FILE" | |
| break | |
| elif [ $STRIKE -eq $YOUTUBEDL_RETRIES ] | |
| then | |
| infof "Strike %d, you are out" $STRIKE | |
| errorf "Unable to download the video" | |
| BROKEN_URLS="$BROKEN_URLS\n$URL" | |
| else | |
| infof "Strike %d, retrying…" $STRIKE | |
| fi | |
| done | |
| else | |
| DIR="$( echo "$URL" | sed -e 's/\// /g' )" | |
| infof "It's “%s”. Will download to “%s” directory." "$MIME" "$DIR" | |
| mkdir "$DIR" | |
| cd "$DIR" | |
| if ( wget --continue --convert-links --no-check-certificate --page-requisites $WGET_RECURSIVE_OPTIONS "$URL" ) | |
| then | |
| completed "$LINE" "$FILE" | |
| fi | |
| cd - | |
| fi | |
| else | |
| if [ -n "$MIME" ] | |
| then | |
| infof "It's “%s”. Will download the file." "$MIME" | |
| settitle "$( basename "$LINE" )" | |
| else | |
| errorf "Got empty MIME type. ☹ Will try to download the file anyway." | |
| fi | |
| if ( curl --location --remote-name --remote-header-name -C - $CURL_OPTIONS "$URL" ) | |
| then | |
| completed "$LINE" "$FILE" | |
| elif [ -z "$MIME" ] | |
| then | |
| BROKEN_URLS="$BROKEN_URLS\n$URL" | |
| fi | |
| fi | |
| printf "\n" | |
| } | |
| ## | |
| # Called, when url is fully retrieved | |
| # | |
| # Params: | |
| # - $1: URL of a file | |
| # - $2: path to a file URL is taken from, optional | |
| completed () | |
| { | |
| successf "DONE" | |
| URL="$1" | |
| FILE="$2" | |
| if [ -n "$FILE" ] | |
| then | |
| TMP="$( mktemp )" | |
| grep -v "$URL" "$FILE" > "$TMP" | |
| OLD_WC=$( wc -l "$FILE" | cut -d\ -f1 ) | |
| EXPECTED_WC=$(( OLD_WC - 1 )) | |
| NEW_WC=$( wc -l "$TMP" | cut -d\ -f1 ) | |
| if [ "$EXPECTED_WC" != "$NEW_WC" ] | |
| then | |
| errorf "Something happended while removing url from queue file" | |
| fi | |
| cp "$FILE" "$FILE~" | |
| mv "$TMP" "$FILE" | |
| fi | |
| } | |
| # include configuration file | |
| if [ -f ".getshrc" ] | |
| then | |
| . .getshrc | |
| fi | |
| # determine URLs source | |
| if [ -n "$1" ] | |
| then | |
| if ( echo "$1" | grep -qP '^[a-z]+://' ) | |
| then | |
| get "$1" | |
| exit $? | |
| else | |
| if [ -d "$1" ] | |
| then | |
| cd "$1" | |
| FILE="./urls" | |
| else | |
| cd "$( dirname "$1" )" | |
| FILE="$( basename "$1" )" | |
| fi | |
| if [ -f .getshrc ] | |
| then | |
| . .getshrc | |
| fi | |
| fi | |
| elif [ -f "./urls" ] | |
| then | |
| FILE="./urls" | |
| else | |
| FILE=/dev/stdin | |
| infof "Reading from standard input.\n" | |
| fi | |
| # main loop | |
| BROKEN_URLS="" | |
| infof "Reading from \`%s', %d lines" "$FILE" "$( wc -l "$FILE" | cut -d\ -f1 )" | |
| cat "$FILE" | while read URL | |
| do | |
| get "$URL" "$FILE" | |
| infof "%s files left in the queue" "$( wc -l "$FILE" )" | |
| done | |
| if [ -n "$BROKEN_URLS" ] | |
| then | |
| errorf "\nFOUND BROKEN URLs:$BROKEN_URLS\n" | |
| fi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment