Last active
November 25, 2016 09:59
-
-
Save akanehara/f1c5251e645252931c62570ea5ca90a7 to your computer and use it in GitHub Desktop.
GNU Parallel と cURLで axel と似たようなことできないかやってみた
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
SCRIPT_NAME="$(basename $0)" | |
usage() | |
{ | |
echo "Parallel HTTP downloader" | |
echo "Usage: ${SCRIPT_NAME} url [division]" | |
} | |
[ $# -lt 1 ] && { usage; exit 1; } | |
URL=$1 | |
PROCESS=${2-4} # 並列数 | |
#========================================== | |
# 保存ファイル名の決定 | |
PATH_OF_URL=$(echo ${URL} | perl -MURI::Split=uri_split,uri_join -nle '($s, $a, $p, $q, $f)=uri_split($_);print "$p"') | |
FILENAME_OF_URL="$(basename ${PATH_OF_URL})" | |
if [ '/' == ${FILENAME_OF_URL} ] | |
then FILENAME=$(echo $URL perl -MURI::Escape -ple '$_=uri_escape $_;') | |
else FILENAME=${FILENAME_OF_URL} | |
fi | |
echo "Save as ${FILENAME}." | |
#========================================== | |
# コンテンツ全長 | |
LENGTH=$(curl -s -X GET ${URL} -HRange:bytes=0-0 -o /dev/null -D - | sed -n -E -e '/^Content-Range/s/^.*bytes.*\/([0-9]+).*$/\1/p') | |
[ -z ${LENGTH} ] && { echo "Can not accept header 'Range'." >&2; exit 2; } | |
# Rangeリクエストヘッダを受けつけないなら終了 | |
echo "${LENGTH} bytes total." | |
#========================================== | |
# コンテンツ全長と分割数からRangeのリストを作る関数 | |
chunks() | |
{ | |
awk -v LENGTH=$1 \ | |
-v DIVISION=$2 \ | |
'BEGIN { | |
CHUNK_SIZE = int(LENGTH / DIVISION); | |
END_BYTE = LENGTH - 1; | |
cur = 0; | |
i = 1; | |
do { | |
a = cur; | |
b = cur + (CHUNK_SIZE - 1); | |
if (END_BYTE < b) b = END_BYTE; | |
printf("%d\t%d-%d\n", i, a, b); | |
system(""); # Same fflush() of gAWK; | |
cur = b + 1; | |
i++; | |
} while (cur <= END_BYTE) | |
}' | |
} | |
#========================================== | |
# 一時保存ディレクトリ | |
TEMP_DIR="/var/tmp/${SCRIPT_NAME}.$$" | |
mkdir -p "${TEMP_DIR}" | |
trap 'rm -rf "'${TEMP_DIR}'"' EXIT | |
#========================================== | |
# 並列ダウンロード・結合・後処理 | |
chunks ${LENGTH} ${PROCESS} | parallel -j${PROCESS} --joblog - --colsep '\t' -q curl -s -X GET ${URL} -HRange:bytes={2} -o "${TEMP_DIR}/${FILENAME}.{1}" | |
if [ -e "${FILENAME}" ]; then | |
echo "File ${FILENAME} exists. overrite it? [y/n]" >&2 | |
read ans | |
[ "${ans}" != 'y' ] && [ "${ans}" != 'Y' ] && { echo "no effect." >&2; exit 1; } | |
fi | |
{ ls -d "${TEMP_DIR}/"* | xargs cat; } > "${FILENAME}" | |
echo "Done." |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment