Skip to content

Instantly share code, notes, and snippets.

@glubsy
Forked from cheadrian/Youtube_Livestream_Parts_download.py
Last active February 27, 2021 18:36
Show Gist options
  • Save glubsy/6e9b3061e074f528ea7153647f9fe615 to your computer and use it in GitHub Desktop.
Save glubsy/6e9b3061e074f528ea7153647f9fe615 to your computer and use it in GitHub Desktop.
Download Youtube Live streamed video from the start. These scripts are not updated anymore, checkout https://github.com/glubsy/livestream_saver instead!
#!/bin/zsh
# Join parts generated by the script: https://gist.github.com/cheadrian/b661fb68a6a87ea64069e641cef68c3e#gistcomment-3495351
mkdir -p vid_fix
base_dir=$(pwd)
vid_dir="${base_dir}/stream_download_test/vid/"
aud_dir="${base_dir}/stream_download_test/aud/"
# number of files to skip from the start and end
skip_start=58
skip_end=0
# List audio and video source files and format it for ffmpeg
if [[ ! -f ${vid_dir}vid_list.txt ]]; then
# ignore the file being created, otherwise it will be included!
ls -v ${vid_dir} | grep -v ".txt" > ${vid_dir}vid_list.txt;
sed -i "s/.*/file '&'/" ${vid_dir}vid_list.txt;
if [[ "${skip_start}" -ne 0 ]]; then
# Remove the Nth lines from the start
echo "Discarding ${skip_start} video files from the first line...";
sed -i "1,${skip_start}d" ${vid_dir}vid_list.txt;
fi
if [ "${skip_end}" -ne 0 ]; then
# Remove the Nth lines at the end
echo "Discarding ${skip_end} video files at the end...";
sed -i "$(($(wc -l < ${vid_dir}vid_list.txt)-${skip_end})),\$d" ${vid_dir}vid_list.txt;
fi
fi
if [[ ! -f ${aud_dir}aud_list.txt ]]; then
# ignore the file being created, otherwise it will be included!
ls -v ${aud_dir} | grep -v ".txt" > ${aud_dir}aud_list.txt;
sed -i "s/.*/file '&'/" ${aud_dir}aud_list.txt;
if [[ "${skip_start}" -ne 0 ]]; then
# Remove the Nth lines from the start
echo "Discarding ${skip_start} audio files from the first line...";
sed -i "1,${skip_start}d" ${aud_dir}aud_list.txt;
fi
if [ "${skip_end}" -ne 0 ]; then
# Remove the Nth lines at the end
echo "Discarding ${skip_end} audio files at the end...";
sed -i "$(($(wc -l < ${vid_dir}vid_list.txt)-${skip_end})),\$d" ${aud_dir}aud_list.txt;
fi
fi
# Convert video pieces to better format (in case we need)
# for f in ${vid_dir}*.mp4; do ffmpeg -i "${f}" -an -c:v libx264 -crf 18 ${f#${vid_dir}}; done;
# Concatenate into one video file
video_output="output.ts"
if [ ! -e ${vid_dir}${video_output} ]; then
cd ${vid_dir};
# ffmpeg -f concat -i vid_list.txt -c:v libx264 -crf 16 -an -f mpegts output.mpg;
ffmpeg -hide_banner -auto_convert 1 -f concat -i vid_list.txt -c copy -bsf h264_mp4toannexb -an -f mpegts ${video_output};
cd ${base_dir};
else
echo "${video_output} already exists. Skipping creation.";
fi
# Concatenate and encode into mp3 to remove timestamps embedded in the original files, doesn't seem to work with m4a format
audio_output="output.mp3"
if [ ! -e ${aud_dir}${audio_output} ]; then
cd ${aud_dir};
# ffmpeg -hide_banner -f concat -i aud_list.txt -c copy ${audio_output};
ffmpeg -hide_banner -f concat -i aud_list.txt -acodec libmp3lame -b:a 128k output.mp3;
cd ${base_dir};
else
echo "${audio_output} already exists. Skipping creation.";
fi
# ffmpeg -i ${vid_dir}output.mp4 -i ${aud_dir}output.m4a -filter_complex \
# "[0:v][1:a] concat=n=2:v=1:a=1 [outv] [outa]" -map "[outv]" -map "[outa]" out.mp4
# Mux for final file. Specifying 60 fps seems to be required here to ignore original timestamps in video stream
final_output="stream_output_final.mp4"
echo "Muxing final file ${final_output}"
ffmpeg -hide_banner -i "${vid_dir}output.ts" -r:0 60 -vn -i "${aud_dir}${audio_output}" -c:v libx264 -crf 18 -c:a copy ${final_output};
# =============================
# Archived notes:
# =============================
## Easier way could be with a bash for loop
# for f in ./*.wav; do echo "file '$f'" >> mylist.txt; done
## or
# printf "file '%s'\n" ./*.wav > mylist.txt
## Store files in an array
# files=(*.c)
# for f in “${files[@]}”; do cmd “$f”; done
## Or just use glob
# for f in *.c; do cmd “$f”; done
#!/usr/bin/env python3
# With the help of this script you can download parts from the Youtube Video
# that is live streamed, from the start of the stream
# https://gist.github.com/glubsy/6e9b3061e074f528ea7153647f9fe615
import urllib.request
import urllib.error
from os import makedirs, sep, listdir
# Note: you need to be logged in to get the URL, we do not use cookies directly here.
# E.G: "https://r4---sn-gqn-p5ns.googlevideo.com/videoplayback?expire=1603041842& ..... 2.20201016.02.00&sq=..."
# The sound link should contain: &mime=audio in it.
# Here's an example from NASA LIVE:
# VIDEO: https://r5---sn-gqn-p5ns.googlevideo.com/videoplayback?expire=1603165657&ei=eQmOX8TeFtS07gO1xLWwDA&ip=x.x.x.x&id=DDU-rZs-Ic4.1&itag=137&aitags=133%2C134%2C135%2C136%2C137%2C160&source=yt_live_broadcast&requiressl=yes&mh=PU&mm=44%2C29&mn=sn-gqn-p5ns%2Csn-c0q7lnsl&ms=lva%2Crdu&mv=m&mvi=5&pl=20&initcwndbps=1350000&vprv=1&live=1&hang=1&noclen=1&mime=video%2Fmp4&gir=yes&mt=1603143920&fvip=5&keepalive=yes&fexp=23915654&c=WEB&sparams=expire%2Cei%2Cip%2Cid%2Caitags%2Csource%2Crequiressl%2Cvprv%2Clive%2Chang%2Cnoclen%2Cmime%2Cgir&sig=AOq0QJ8wRQIgQMnxy1Yk3HLTpqbOGmjZYH1CXCTNx6u6PgngAVGi4EQCIQDWyaye-u_KGyVQ0HRUsyKVaAzyXbmzDqOGVGpIyP7VtA%3D%3D&lsparams=mh%2Cmm%2Cmn%2Cms%2Cmv%2Cmvi%2Cpl%2Cinitcwndbps&lsig=AG3C_xAwRAIgR5QVZh23NcLE2nRpo5IT-axGEfUCJrXKMmJHjXQdkCYCIFLsIFacvPpy98zaNSB0RfXswacyc-Ru3sYeEjTFym43&alr=yes&cpn=LlPCcTsE_3Xao9Xh&cver=2.20201016.02.00&sq=2504043&rn=13&rbuf=21958
# AUDIO: https://r5---sn-gqn-p5ns.googlevideo.com/videoplayback?expire=1603165657&ei=eQmOX8TeFtS07gO1xLWwDA&ip=x.x.x.x&id=DDU-rZs-Ic4.1&itag=140&source=yt_live_broadcast&requiressl=yes&mh=PU&mm=44%2C29&mn=sn-gqn-p5ns%2Csn-c0q7lnsl&ms=lva%2Crdu&mv=m&mvi=5&pl=20&initcwndbps=1350000&vprv=1&live=1&hang=1&noclen=1&mime=audio%2Fmp4&gir=yes&mt=1603143920&fvip=5&keepalive=yes&fexp=23915654&c=WEB&sparams=expire%2Cei%2Cip%2Cid%2Citag%2Csource%2Crequiressl%2Cvprv%2Clive%2Chang%2Cnoclen%2Cmime%2Cgir&sig=AOq0QJ8wRAIgWFTZLV1G33cKJoitlK7dUgNg1KuXyvC6F9F7Lc6x3gcCIHaGjehjvVAjUd6cqMnTLtBq9pPRfQWXM3bwI1qQYqpx&lsparams=mh%2Cmm%2Cmn%2Cms%2Cmv%2Cmvi%2Cpl%2Cinitcwndbps&lsig=AG3C_xAwRAIgR5QVZh23NcLE2nRpo5IT-axGEfUCJrXKMmJHjXQdkCYCIFLsIFacvPpy98zaNSB0RfXswacyc-Ru3sYeEjTFym43&alr=yes&cpn=LlPCcTsE_3Xao9Xh&cver=2.20201016.02.00&sq=2504045&rn=20&rbuf=17971
# Use MPV or VLC to play the parts. ffmpeg to re-encode / re-mux and then concatenate.
# You can copy the entire link here, it will be split automatically below
video_link = ""
# video_link = "VIDEO LINK THE END -> &sq=" # Look for the substring mime=video to make sure
video_link = f'{video_link.split(r"&sq=")[0]}&sq='
# You can copy the entire link here, it will be split automatically below
sound_link = ""
# sound_link = "AUDIO LINK THE END -> &sq= " # Look for the substring mime=audio to make sure
sound_link = f'{sound_link.split(r"&sq=")[0]}&sq='
# On Youtube, each segment can be equivalent to 1 to several seconds of video (depending on latency settings)
# The itag determines the quality. 140 for audio seems best, 135 for video means 480p.
# See https://github.com/pytube/pytube/blob/master/pytube/itags.py for reference.
# The boadcastID follows the videoID. Format is currently: "&id=videoID.broadcastID&itags="
YT_HASH = video_link.split("&id=")[1].split('.')[0]
print(f'Capturing video with Hash ID: {YT_HASH}')
rootpath = f'stream_capture_{YT_HASH}'
vidpath = f'{rootpath}{sep}vid'
audpath = f'{rootpath}{sep}aud'
# the sequence numbers to begin download from.
seg = 1
try:
makedirs(vidpath, 0o766)
makedirs(audpath, 0o766)
except FileExistsError as e:
# If we resume, get the latest chunk file we already have
seg = max([int(f[:f.index('.')]) for f in listdir(vidpath)], default=1)
if seg > 1:
# Step back one file just in case the latest chunk got only partially
# downloaded (we want to overwrite it, for good measure)
seg -= 1
print(f'Starting from segment: {seg}')
padding = 10
try:
while True:
video_url = f'{video_link}{seg}'
sound_url = f'{sound_link}{seg}'
video_output_file = vidpath + sep + f'{seg:0{padding}}.mp4'
audio_output_file = audpath + sep + f'{seg:0{padding}}.m4a'
urllib.request.urlretrieve(video_url, video_output_file)
urllib.request.urlretrieve(sound_url, audio_output_file)
print(f"Downloaded part {seg}")
seg += 1
except urllib.error.URLError as e:
print(f'network error {e}')
except (IOError) as e:
print(f'file error: {e}')
#!/bin/bash
# To be used after using the manual_download.py script
# and right before mrwnwttk's merge.py script
# from https://github.com/mrwnwttk/youtube_stream_capture
# CHANGE THIS!
# Point to the script located in the submodule, relative to this script's location,
# otherwise point to where you have installed "youtube_stream_capture"
MERGE_SCRIPT="$(dirname $(realpath $0))/youtube_stream_capture/merge.py"
# Make sure the capture directory is there
# FIXME if there is more than one result, it won't work.
CAP_DIR=$(find . -maxdepth 1 -type d -iname 'stream_capture*');
if [[ ${CAP_DIR} == '' ]]; then
echo "Error getting youtube hash from \"stream_capture_HASH_ID\" directory. Make sure it is present.";
exit;
elif [[ $(find "${CAP_DIR}" -maxdepth 1 -type d -iname 'aud') == ''
|| $(find "${CAP_DIR}" -maxdepth 1 -type d -iname 'vid') == '' ]]; then
echo "aud or vid directory not found in ${CAP_DIR}";
exit;
fi
# Get the Youtube Hash ID from the directory name if present
YT_HASH="${CAP_DIR##./stream_capture_}";
if [[ "${YT_HASH}" == "./stream_capture" ]]; then
YT_HASH="AAAAAAAAAAA";
echo "Could not detect youtube hash ID in capture dirname, using default ${YT_HASH}";
else
echo "Detected youtube hash ID is $YT_HASH";
fi
# Create directory expected by merge.py
target_dirname="segments_${YT_HASH}";
mkdir -p "${target_dirname}";
# Create symlinks to our previously downloaded chunks
cp -s $(pwd)/stream_capture_${YT_HASH}/aud/* ${target_dirname};
cp -s $(pwd)/stream_capture_${YT_HASH}/vid/* ${target_dirname};
# Add the hash ID after the digits of each file, as expected by merge.py
AUDIO_REGEX='s/(\d*)\.m4a/$1_'${YT_HASH}'_audio\.ts/';
VIDEO_REGEX='s/(\d*)\.mp4/$1_'${YT_HASH}'_video\.ts/';
perl-rename "${AUDIO_REGEX}" ${target_dirname}/*;
perl-rename "${VIDEO_REGEX}" ${target_dirname}/*;
# Optionally, remove any leading padding zeros we added
perl-rename 's/(.*\/)0*(\d*_.*)/$1$2/' ${target_dirname}/*;
# Call the merge script with bogus youtube URL since it expects one anyway
python "${MERGE_SCRIPT}" "https://www.youtube.com/watch?v=${YT_HASH}";
if [[ $? -eq 0 ]]; then
echo "Removing temporary directory with symlinks \"${target_dirname}\"..."
rm -r ${target_dirname};
fi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment