Created
November 21, 2021 07:36
-
-
Save wsntxxn/698b8444e84fb702c9cad39d171cefdd to your computer and use it in GitHub Desktop.
Download audioset video dataset with yt-dlp and ffmpeg.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
# Please first configure your ~/.config/youtube-dl/config | |
# Specifically add aria2c for parallel download | |
# e.g., | |
# --external-downloader aria2c | |
# --external-downloader-args '-c -j 3 -x 3 -s 3 -k 1M' | |
if [[ $# < 1 ]]; then | |
echo -e "Input: [.csv file] [download path] [proxy]" | |
echo -e "e.g: flists/balanced_train_segments.csv video/balanced_train socks5://127.0.0.1:1080" | |
exit | |
fi | |
inp=$1 | |
download_path=${2:-"."} | |
proxy=${3:-"socks5://127.0.0.1:1080"} | |
export http_proxy=$proxy | |
export https_proxy=$proxy | |
echo "download video into ${download_path}" | |
if [ ! -d ${download_path} ]; then | |
mkdir -p ${download_path} | |
fi | |
fetch_clip() { | |
echo "Fetching $1 ..." | |
outname="$1" | |
if [ -f "${download_path}/${outname}.mp4" ]; then | |
return | |
fi | |
yt-dlp https://youtube.com/watch?v=$1 \ | |
-f 'bestvideo[ext=mp4][height<=480]/best[ext=mp4][height<=480]/best' --no-warnings --output "${download_path}/$outname.%(ext)s" | |
} | |
trim_clip(){ | |
yid=$1 | |
downloaded_file="${download_path}/${yid}.mp4" | |
start=$2 | |
end=$3 | |
# outname=$(echo $downloaded_file | awk -F[./] '{print $(NF-1)}') | |
outname=${yid}_${start}_${end} | |
ffmpeg -y -loglevel quiet -i "$downloaded_file" \ | |
-ss "$start" -to "$end" "${download_path}/${outname}.mp4" | |
} | |
export -f fetch_clip | |
export -f trim_clip | |
for link in $(grep "^[^#;]" $inp | awk -F, '{print $1}' | sort -u | shuf); do | |
line=$(grep -- "${link}" $inp) | |
yid=$(echo $line | awk -F", " '{print $1}') | |
start=$(echo $line | awk -F", " '{print $2}') | |
end=$(echo $line | awk -F", " '{print $3}') | |
# Check if file exists | |
if ls -- "${download_path}/${yid}_${start}_${end}.mp4" >/dev/null 2>&1; then | |
echo "Found file ${yid}"; | |
else | |
fetch_clip $yid; | |
sleep 10; | |
fi | |
# Split up into parts | |
if [ -f "${download_path}/${yid}.mp4" ]; then | |
trim_clip $yid $start $end | |
rm -- "${download_path}/${yid}.mp4" | |
fi | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment