Skip to content

Instantly share code, notes, and snippets.

@akanehara
Last active November 25, 2016 09:59
Show Gist options
  • Save akanehara/f1c5251e645252931c62570ea5ca90a7 to your computer and use it in GitHub Desktop.
Save akanehara/f1c5251e645252931c62570ea5ca90a7 to your computer and use it in GitHub Desktop.
GNU Parallel と cURLで axel と似たようなことできないかやってみた
#!/bin/bash
SCRIPT_NAME="$(basename $0)"
usage()
{
echo "Parallel HTTP downloader"
echo "Usage: ${SCRIPT_NAME} url [division]"
}
[ $# -lt 1 ] && { usage; exit 1; }
URL=$1
PROCESS=${2-4} # 並列数
#==========================================
# 保存ファイル名の決定
PATH_OF_URL=$(echo ${URL} | perl -MURI::Split=uri_split,uri_join -nle '($s, $a, $p, $q, $f)=uri_split($_);print "$p"')
FILENAME_OF_URL="$(basename ${PATH_OF_URL})"
if [ '/' == ${FILENAME_OF_URL} ]
then FILENAME=$(echo $URL perl -MURI::Escape -ple '$_=uri_escape $_;')
else FILENAME=${FILENAME_OF_URL}
fi
echo "Save as ${FILENAME}."
#==========================================
# コンテンツ全長
LENGTH=$(curl -s -X GET ${URL} -HRange:bytes=0-0 -o /dev/null -D - | sed -n -E -e '/^Content-Range/s/^.*bytes.*\/([0-9]+).*$/\1/p')
[ -z ${LENGTH} ] && { echo "Can not accept header 'Range'." >&2; exit 2; }
# Rangeリクエストヘッダを受けつけないなら終了
echo "${LENGTH} bytes total."
#==========================================
# コンテンツ全長と分割数からRangeのリストを作る関数
chunks()
{
awk -v LENGTH=$1 \
-v DIVISION=$2 \
'BEGIN {
CHUNK_SIZE = int(LENGTH / DIVISION);
END_BYTE = LENGTH - 1;
cur = 0;
i = 1;
do {
a = cur;
b = cur + (CHUNK_SIZE - 1);
if (END_BYTE < b) b = END_BYTE;
printf("%d\t%d-%d\n", i, a, b);
system(""); # Same fflush() of gAWK;
cur = b + 1;
i++;
} while (cur <= END_BYTE)
}'
}
#==========================================
# 一時保存ディレクトリ
TEMP_DIR="/var/tmp/${SCRIPT_NAME}.$$"
mkdir -p "${TEMP_DIR}"
trap 'rm -rf "'${TEMP_DIR}'"' EXIT
#==========================================
# 並列ダウンロード・結合・後処理
chunks ${LENGTH} ${PROCESS} | parallel -j${PROCESS} --joblog - --colsep '\t' -q curl -s -X GET ${URL} -HRange:bytes={2} -o "${TEMP_DIR}/${FILENAME}.{1}"
if [ -e "${FILENAME}" ]; then
echo "File ${FILENAME} exists. overrite it? [y/n]" >&2
read ans
[ "${ans}" != 'y' ] && [ "${ans}" != 'Y' ] && { echo "no effect." >&2; exit 1; }
fi
{ ls -d "${TEMP_DIR}/"* | xargs cat; } > "${FILENAME}"
echo "Done."
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment