Skip to content

Instantly share code, notes, and snippets.

@marek-saji
Created October 13, 2013 17:37
Show Gist options
  • Select an option

  • Save marek-saji/6965017 to your computer and use it in GitHub Desktop.

Select an option

Save marek-saji/6965017 to your computer and use it in GitHub Desktop.
#!/bin/bash
# (c)2012, Marek `saji` Augustynowicz
# Licensed under MIT License, http://marek-saji.mit-license.org
#
# Pass URL(s) as script's parameter(s). If no parameters passed,
# script seeks for `urls` file in current directory and read input
# from there. If no `urls` is to be found, read from stdin.
#
# Script tries to interpret lines as URLs and inteligently downloads them
# (using youtube-dl(1), where it makes sense). If input line does not look like
# a URL (does not contain protocol), line will be evaluated.
# Lines containing URLs are removed after sucessufull download.
YOUTUBEDL_RETRIES=3
SCRIPTNAME="$( basename "$0" .sh )"
##
# printf an error message
#
# Params:
# - same as printf(1)
errorf ()
{
FMT=$1
shift
printf "\e[31mERROR\e[m $FMT\n" "$@" 1>&2
}
##
# printf a success message
#
# Params:
# - same as printf(1)
successf ()
{
FMT=$1
shift
printf "\e[32mSUCCESS\e[m $FMT\n" "$@"
}
##
# printf a info message
#
# Params:
# - same as printf(1)
infof ()
{
FMT=$1
shift
printf "\e[33mINFO\e[m $FMT\n" "$@"
}
##
# printf a header
#
# Params:
# - same as printf(1)
headerf ()
{
FMT=$1
shift
printf "\e[30m\e[46m★ $FMT\e[m\n" "$@"
}
# call the script recusrsively for each argument
if [ "$#" -gt "1" ]
then
STATUS=0
for FILE in "$@"
do
"$0" "$FILE"
THIS_STATUS=$?
if [ "$THIS_STATUS" -ne "0" ]
then
STATUS="$THIS_STATUS"
#break
fi
done
exit $STATUS
fi
##
# Set screen(1) title
#
# Params
# - $1: Optional additional title
#
# Uses:
# - $PWD
settitle ()
{
# set screen(1) window name
if [ "$TERM" == "screen" ]
then
TITLE="($( dirs +0 ))"
if [ -n "$1" ]
then
TITLE="$1 $TITLE"
fi
echo -ne "\ek$SCRIPTNAME $TITLE\e\\"
fi
}
##
# Main function
#
# Params:
# - $1: URL to download
#
# Uses:
# - $FILE path to file, where URL came from, optional
# - $BROKEN_URLS to store incorrect URLs
get ()
{
LINE="$1"
headerf "%s" "$LINE"
settitle
# ignore empty lines and comments
if [ -z "$LINE" ] || [ "#" = "${LINE:0:1}" ]
then
return
fi
# evaluate lines that do not look like url (start with a protocol)
if ( echo "$LINE" | grep -vPq "^[a-z]+://" )
then
infof "Evaluated."
eval "${LINE}"
return
fi
# check whether there is enough free space (at least 1G)
DF=$( /bin/df . | tail -n1 | awk -F\ '{ print $4; }' )
DF_LIMIT=$(( 1 * 1024 * 1024 ))
if [ $DF -lt $DF_LIMIT ]
then
errorf "Less than 1G free space."
df -h .
exit 3
fi
# handle google's malware-detecting url
if ( echo "$LINE" | grep -Pq '^ *(https?://)?(www\.)?google.[a-z.]+/url\?' )
then
URL="$( curl --silent "$LINE" | grep -oP "[a-z]+://[^'\"]+" | tail -n1 )"
infof "Resolved as \`%s'" "$URL"
else
URL="$LINE"
fi
# rtmp stream
if ( echo "$LINE" | grep -Pq '^ *rtmp://' )
then
FILENAME="$( basename "$LINE" )"
infof "It's a RTMP stream. Will download to “%s”." "$FILENAME"
rtmpdump -r "$LINE" > "$FILENAME"
return
fi
HEADERS="$( curl --head --silent --location $CURL_HEAD_OPTIONS "$URL" )"
# non-200s
if ! ( echo "$HEADERS" | grep -q '^HTTP.*200' )
then
if ( echo "$HEADERS" | grep -q '^HTTP' )
then
errorf "Got %s" "$( echo "$HEADERS" | grep '^HTTP' )"
else
errorf "Unable to connect?"
fi
BROKEN_URLS="$BROKEN_URLS\n$URL"
return
fi
MIME="$( echo "$HEADERS" | grep '^Content-Type' | tail -n1 | cut -d\ -f2 | cut -d\; -f1 | sed -re 's/^\s+|\s+$//g' )"
if [ "$MIME" = "text/html" ]
then
TITLE="$( youtube-dl --get-title --max-download=1 "$URL" 2>/dev/null )"
echo $TITLE
if [ -n "$TITLE" ]
then
infof "It's a “%s” video" "$TITLE"
settitle "$TITLE"
for STRIKE in $( seq $YOUTUBEDL_RETRIES )
do
if ( youtube-dl --continue --console-title --restrict-filenames --output "%(uploader)s - %(title)s %(id)s.%(ext)s" $YOUTUBEDL_OPTIONS "$URL" )
then
completed "$LINE" "$FILE"
break
elif [ $STRIKE -eq $YOUTUBEDL_RETRIES ]
then
infof "Strike %d, you are out" $STRIKE
errorf "Unable to download the video"
BROKEN_URLS="$BROKEN_URLS\n$URL"
else
infof "Strike %d, retrying…" $STRIKE
fi
done
else
DIR="$( echo "$URL" | sed -e 's/\// /g' )"
infof "It's “%s”. Will download to “%s” directory." "$MIME" "$DIR"
mkdir "$DIR"
cd "$DIR"
if ( wget --continue --convert-links --no-check-certificate --page-requisites $WGET_RECURSIVE_OPTIONS "$URL" )
then
completed "$LINE" "$FILE"
fi
cd -
fi
else
if [ -n "$MIME" ]
then
infof "It's “%s”. Will download the file." "$MIME"
settitle "$( basename "$LINE" )"
else
errorf "Got empty MIME type. ☹ Will try to download the file anyway."
fi
if ( curl --location --remote-name --remote-header-name -C - $CURL_OPTIONS "$URL" )
then
completed "$LINE" "$FILE"
elif [ -z "$MIME" ]
then
BROKEN_URLS="$BROKEN_URLS\n$URL"
fi
fi
printf "\n"
}
##
# Called, when url is fully retrieved
#
# Params:
# - $1: URL of a file
# - $2: path to a file URL is taken from, optional
completed ()
{
successf "DONE"
URL="$1"
FILE="$2"
if [ -n "$FILE" ]
then
TMP="$( mktemp )"
grep -v "$URL" "$FILE" > "$TMP"
OLD_WC=$( wc -l "$FILE" | cut -d\ -f1 )
EXPECTED_WC=$(( OLD_WC - 1 ))
NEW_WC=$( wc -l "$TMP" | cut -d\ -f1 )
if [ "$EXPECTED_WC" != "$NEW_WC" ]
then
errorf "Something happended while removing url from queue file"
fi
cp "$FILE" "$FILE~"
mv "$TMP" "$FILE"
fi
}
# include configuration file
if [ -f ".getshrc" ]
then
. .getshrc
fi
# determine URLs source
if [ -n "$1" ]
then
if ( echo "$1" | grep -qP '^[a-z]+://' )
then
get "$1"
exit $?
else
if [ -d "$1" ]
then
cd "$1"
FILE="./urls"
else
cd "$( dirname "$1" )"
FILE="$( basename "$1" )"
fi
if [ -f .getshrc ]
then
. .getshrc
fi
fi
elif [ -f "./urls" ]
then
FILE="./urls"
else
FILE=/dev/stdin
infof "Reading from standard input.\n"
fi
# main loop
BROKEN_URLS=""
infof "Reading from \`%s', %d lines" "$FILE" "$( wc -l "$FILE" | cut -d\ -f1 )"
cat "$FILE" | while read URL
do
get "$URL" "$FILE"
infof "%s files left in the queue" "$( wc -l "$FILE" )"
done
if [ -n "$BROKEN_URLS" ]
then
errorf "\nFOUND BROKEN URLs:$BROKEN_URLS\n"
fi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment