Skip to content

Instantly share code, notes, and snippets.

@wsntxxn
Created November 21, 2021 07:36
Show Gist options
  • Save wsntxxn/698b8444e84fb702c9cad39d171cefdd to your computer and use it in GitHub Desktop.
Save wsntxxn/698b8444e84fb702c9cad39d171cefdd to your computer and use it in GitHub Desktop.
Download audioset video dataset with yt-dlp and ffmpeg.
#!/usr/bin/env bash
# Please first configure your ~/.config/youtube-dl/config
# Specifically add aria2c for parallel download
# e.g.,
# --external-downloader aria2c
# --external-downloader-args '-c -j 3 -x 3 -s 3 -k 1M'
if [[ $# < 1 ]]; then
echo -e "Input: [.csv file] [download path] [proxy]"
echo -e "e.g: flists/balanced_train_segments.csv video/balanced_train socks5://127.0.0.1:1080"
exit
fi
inp=$1
download_path=${2:-"."}
proxy=${3:-"socks5://127.0.0.1:1080"}
export http_proxy=$proxy
export https_proxy=$proxy
echo "download video into ${download_path}"
if [ ! -d ${download_path} ]; then
mkdir -p ${download_path}
fi
fetch_clip() {
echo "Fetching $1 ..."
outname="$1"
if [ -f "${download_path}/${outname}.mp4" ]; then
return
fi
yt-dlp https://youtube.com/watch?v=$1 \
-f 'bestvideo[ext=mp4][height<=480]/best[ext=mp4][height<=480]/best' --no-warnings --output "${download_path}/$outname.%(ext)s"
}
trim_clip(){
yid=$1
downloaded_file="${download_path}/${yid}.mp4"
start=$2
end=$3
# outname=$(echo $downloaded_file | awk -F[./] '{print $(NF-1)}')
outname=${yid}_${start}_${end}
ffmpeg -y -loglevel quiet -i "$downloaded_file" \
-ss "$start" -to "$end" "${download_path}/${outname}.mp4"
}
export -f fetch_clip
export -f trim_clip
for link in $(grep "^[^#;]" $inp | awk -F, '{print $1}' | sort -u | shuf); do
line=$(grep -- "${link}" $inp)
yid=$(echo $line | awk -F", " '{print $1}')
start=$(echo $line | awk -F", " '{print $2}')
end=$(echo $line | awk -F", " '{print $3}')
# Check if file exists
if ls -- "${download_path}/${yid}_${start}_${end}.mp4" >/dev/null 2>&1; then
echo "Found file ${yid}";
else
fetch_clip $yid;
sleep 10;
fi
# Split up into parts
if [ -f "${download_path}/${yid}.mp4" ]; then
trim_clip $yid $start $end
rm -- "${download_path}/${yid}.mp4"
fi
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment