Forked from cheadrian/Youtube_Livestream_Parts_download.py
Last active
February 27, 2021 18:36
-
-
Save glubsy/6e9b3061e074f528ea7153647f9fe615 to your computer and use it in GitHub Desktop.
Download Youtube Live streamed video from the start. These scripts are not updated anymore, checkout https://github.com/glubsy/livestream_saver instead!
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/zsh | |
# Join parts generated by the script: https://gist.github.com/cheadrian/b661fb68a6a87ea64069e641cef68c3e#gistcomment-3495351 | |
mkdir -p vid_fix | |
base_dir=$(pwd) | |
vid_dir="${base_dir}/stream_download_test/vid/" | |
aud_dir="${base_dir}/stream_download_test/aud/" | |
# number of files to skip from the start and end | |
skip_start=58 | |
skip_end=0 | |
# List audio and video source files and format it for ffmpeg | |
if [[ ! -f ${vid_dir}vid_list.txt ]]; then | |
# ignore the file being created, otherwise it will be included! | |
ls -v ${vid_dir} | grep -v ".txt" > ${vid_dir}vid_list.txt; | |
sed -i "s/.*/file '&'/" ${vid_dir}vid_list.txt; | |
if [[ "${skip_start}" -ne 0 ]]; then | |
# Remove the Nth lines from the start | |
echo "Discarding ${skip_start} video files from the first line..."; | |
sed -i "1,${skip_start}d" ${vid_dir}vid_list.txt; | |
fi | |
if [ "${skip_end}" -ne 0 ]; then | |
# Remove the Nth lines at the end | |
echo "Discarding ${skip_end} video files at the end..."; | |
sed -i "$(($(wc -l < ${vid_dir}vid_list.txt)-${skip_end})),\$d" ${vid_dir}vid_list.txt; | |
fi | |
fi | |
if [[ ! -f ${aud_dir}aud_list.txt ]]; then | |
# ignore the file being created, otherwise it will be included! | |
ls -v ${aud_dir} | grep -v ".txt" > ${aud_dir}aud_list.txt; | |
sed -i "s/.*/file '&'/" ${aud_dir}aud_list.txt; | |
if [[ "${skip_start}" -ne 0 ]]; then | |
# Remove the Nth lines from the start | |
echo "Discarding ${skip_start} audio files from the first line..."; | |
sed -i "1,${skip_start}d" ${aud_dir}aud_list.txt; | |
fi | |
if [ "${skip_end}" -ne 0 ]; then | |
# Remove the Nth lines at the end | |
echo "Discarding ${skip_end} audio files at the end..."; | |
sed -i "$(($(wc -l < ${vid_dir}vid_list.txt)-${skip_end})),\$d" ${aud_dir}aud_list.txt; | |
fi | |
fi | |
# Convert video pieces to better format (in case we need) | |
# for f in ${vid_dir}*.mp4; do ffmpeg -i "${f}" -an -c:v libx264 -crf 18 ${f#${vid_dir}}; done; | |
# Concatenate into one video file | |
video_output="output.ts" | |
if [ ! -e ${vid_dir}${video_output} ]; then | |
cd ${vid_dir}; | |
# ffmpeg -f concat -i vid_list.txt -c:v libx264 -crf 16 -an -f mpegts output.mpg; | |
ffmpeg -hide_banner -auto_convert 1 -f concat -i vid_list.txt -c copy -bsf h264_mp4toannexb -an -f mpegts ${video_output}; | |
cd ${base_dir}; | |
else | |
echo "${video_output} already exists. Skipping creation."; | |
fi | |
# Concatenate and encode into mp3 to remove timestamps embedded in the original files, doesn't seem to work with m4a format | |
audio_output="output.mp3" | |
if [ ! -e ${aud_dir}${audio_output} ]; then | |
cd ${aud_dir}; | |
# ffmpeg -hide_banner -f concat -i aud_list.txt -c copy ${audio_output}; | |
ffmpeg -hide_banner -f concat -i aud_list.txt -acodec libmp3lame -b:a 128k output.mp3; | |
cd ${base_dir}; | |
else | |
echo "${audio_output} already exists. Skipping creation."; | |
fi | |
# ffmpeg -i ${vid_dir}output.mp4 -i ${aud_dir}output.m4a -filter_complex \ | |
# "[0:v][1:a] concat=n=2:v=1:a=1 [outv] [outa]" -map "[outv]" -map "[outa]" out.mp4 | |
# Mux for final file. Specifying 60 fps seems to be required here to ignore original timestamps in video stream | |
final_output="stream_output_final.mp4" | |
echo "Muxing final file ${final_output}" | |
ffmpeg -hide_banner -i "${vid_dir}output.ts" -r:0 60 -vn -i "${aud_dir}${audio_output}" -c:v libx264 -crf 18 -c:a copy ${final_output}; | |
# ============================= | |
# Archived notes: | |
# ============================= | |
## Easier way could be with a bash for loop | |
# for f in ./*.wav; do echo "file '$f'" >> mylist.txt; done | |
## or | |
# printf "file '%s'\n" ./*.wav > mylist.txt | |
## Store files in an array | |
# files=(*.c) | |
# for f in “${files[@]}”; do cmd “$f”; done | |
## Or just use glob | |
# for f in *.c; do cmd “$f”; done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# With the help of this script you can download parts from the Youtube Video | |
# that is live streamed, from the start of the stream | |
# https://gist.github.com/glubsy/6e9b3061e074f528ea7153647f9fe615 | |
import urllib.request | |
import urllib.error | |
from os import makedirs, sep, listdir | |
# Note: you need to be logged in to get the URL, we do not use cookies directly here. | |
# E.G: "https://r4---sn-gqn-p5ns.googlevideo.com/videoplayback?expire=1603041842& ..... 2.20201016.02.00&sq=..." | |
# The sound link should contain: &mime=audio in it. | |
# Here's an example from NASA LIVE: | |
# VIDEO: https://r5---sn-gqn-p5ns.googlevideo.com/videoplayback?expire=1603165657&ei=eQmOX8TeFtS07gO1xLWwDA&ip=x.x.x.x&id=DDU-rZs-Ic4.1&itag=137&aitags=133%2C134%2C135%2C136%2C137%2C160&source=yt_live_broadcast&requiressl=yes&mh=PU&mm=44%2C29&mn=sn-gqn-p5ns%2Csn-c0q7lnsl&ms=lva%2Crdu&mv=m&mvi=5&pl=20&initcwndbps=1350000&vprv=1&live=1&hang=1&noclen=1&mime=video%2Fmp4&gir=yes&mt=1603143920&fvip=5&keepalive=yes&fexp=23915654&c=WEB&sparams=expire%2Cei%2Cip%2Cid%2Caitags%2Csource%2Crequiressl%2Cvprv%2Clive%2Chang%2Cnoclen%2Cmime%2Cgir&sig=AOq0QJ8wRQIgQMnxy1Yk3HLTpqbOGmjZYH1CXCTNx6u6PgngAVGi4EQCIQDWyaye-u_KGyVQ0HRUsyKVaAzyXbmzDqOGVGpIyP7VtA%3D%3D&lsparams=mh%2Cmm%2Cmn%2Cms%2Cmv%2Cmvi%2Cpl%2Cinitcwndbps&lsig=AG3C_xAwRAIgR5QVZh23NcLE2nRpo5IT-axGEfUCJrXKMmJHjXQdkCYCIFLsIFacvPpy98zaNSB0RfXswacyc-Ru3sYeEjTFym43&alr=yes&cpn=LlPCcTsE_3Xao9Xh&cver=2.20201016.02.00&sq=2504043&rn=13&rbuf=21958 | |
# AUDIO: https://r5---sn-gqn-p5ns.googlevideo.com/videoplayback?expire=1603165657&ei=eQmOX8TeFtS07gO1xLWwDA&ip=x.x.x.x&id=DDU-rZs-Ic4.1&itag=140&source=yt_live_broadcast&requiressl=yes&mh=PU&mm=44%2C29&mn=sn-gqn-p5ns%2Csn-c0q7lnsl&ms=lva%2Crdu&mv=m&mvi=5&pl=20&initcwndbps=1350000&vprv=1&live=1&hang=1&noclen=1&mime=audio%2Fmp4&gir=yes&mt=1603143920&fvip=5&keepalive=yes&fexp=23915654&c=WEB&sparams=expire%2Cei%2Cip%2Cid%2Citag%2Csource%2Crequiressl%2Cvprv%2Clive%2Chang%2Cnoclen%2Cmime%2Cgir&sig=AOq0QJ8wRAIgWFTZLV1G33cKJoitlK7dUgNg1KuXyvC6F9F7Lc6x3gcCIHaGjehjvVAjUd6cqMnTLtBq9pPRfQWXM3bwI1qQYqpx&lsparams=mh%2Cmm%2Cmn%2Cms%2Cmv%2Cmvi%2Cpl%2Cinitcwndbps&lsig=AG3C_xAwRAIgR5QVZh23NcLE2nRpo5IT-axGEfUCJrXKMmJHjXQdkCYCIFLsIFacvPpy98zaNSB0RfXswacyc-Ru3sYeEjTFym43&alr=yes&cpn=LlPCcTsE_3Xao9Xh&cver=2.20201016.02.00&sq=2504045&rn=20&rbuf=17971 | |
# Use MPV or VLC to play the parts. ffmpeg to re-encode / re-mux and then concatenate. | |
# You can copy the entire link here, it will be split automatically below | |
video_link = "" | |
# video_link = "VIDEO LINK THE END -> &sq=" # Look for the substring mime=video to make sure | |
video_link = f'{video_link.split(r"&sq=")[0]}&sq=' | |
# You can copy the entire link here, it will be split automatically below | |
sound_link = "" | |
# sound_link = "AUDIO LINK THE END -> &sq= " # Look for the substring mime=audio to make sure | |
sound_link = f'{sound_link.split(r"&sq=")[0]}&sq=' | |
# On Youtube, each segment can be equivalent to 1 to several seconds of video (depending on latency settings) | |
# The itag determines the quality. 140 for audio seems best, 135 for video means 480p. | |
# See https://github.com/pytube/pytube/blob/master/pytube/itags.py for reference. | |
# The boadcastID follows the videoID. Format is currently: "&id=videoID.broadcastID&itags=" | |
YT_HASH = video_link.split("&id=")[1].split('.')[0] | |
print(f'Capturing video with Hash ID: {YT_HASH}') | |
rootpath = f'stream_capture_{YT_HASH}' | |
vidpath = f'{rootpath}{sep}vid' | |
audpath = f'{rootpath}{sep}aud' | |
# the sequence numbers to begin download from. | |
seg = 1 | |
try: | |
makedirs(vidpath, 0o766) | |
makedirs(audpath, 0o766) | |
except FileExistsError as e: | |
# If we resume, get the latest chunk file we already have | |
seg = max([int(f[:f.index('.')]) for f in listdir(vidpath)], default=1) | |
if seg > 1: | |
# Step back one file just in case the latest chunk got only partially | |
# downloaded (we want to overwrite it, for good measure) | |
seg -= 1 | |
print(f'Starting from segment: {seg}') | |
padding = 10 | |
try: | |
while True: | |
video_url = f'{video_link}{seg}' | |
sound_url = f'{sound_link}{seg}' | |
video_output_file = vidpath + sep + f'{seg:0{padding}}.mp4' | |
audio_output_file = audpath + sep + f'{seg:0{padding}}.m4a' | |
urllib.request.urlretrieve(video_url, video_output_file) | |
urllib.request.urlretrieve(sound_url, audio_output_file) | |
print(f"Downloaded part {seg}") | |
seg += 1 | |
except urllib.error.URLError as e: | |
print(f'network error {e}') | |
except (IOError) as e: | |
print(f'file error: {e}') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# To be used after using the manual_download.py script | |
# and right before mrwnwttk's merge.py script | |
# from https://github.com/mrwnwttk/youtube_stream_capture | |
# CHANGE THIS! | |
# Point to the script located in the submodule, relative to this script's location, | |
# otherwise point to where you have installed "youtube_stream_capture" | |
MERGE_SCRIPT="$(dirname $(realpath $0))/youtube_stream_capture/merge.py" | |
# Make sure the capture directory is there | |
# FIXME if there is more than one result, it won't work. | |
CAP_DIR=$(find . -maxdepth 1 -type d -iname 'stream_capture*'); | |
if [[ ${CAP_DIR} == '' ]]; then | |
echo "Error getting youtube hash from \"stream_capture_HASH_ID\" directory. Make sure it is present."; | |
exit; | |
elif [[ $(find "${CAP_DIR}" -maxdepth 1 -type d -iname 'aud') == '' | |
|| $(find "${CAP_DIR}" -maxdepth 1 -type d -iname 'vid') == '' ]]; then | |
echo "aud or vid directory not found in ${CAP_DIR}"; | |
exit; | |
fi | |
# Get the Youtube Hash ID from the directory name if present | |
YT_HASH="${CAP_DIR##./stream_capture_}"; | |
if [[ "${YT_HASH}" == "./stream_capture" ]]; then | |
YT_HASH="AAAAAAAAAAA"; | |
echo "Could not detect youtube hash ID in capture dirname, using default ${YT_HASH}"; | |
else | |
echo "Detected youtube hash ID is $YT_HASH"; | |
fi | |
# Create directory expected by merge.py | |
target_dirname="segments_${YT_HASH}"; | |
mkdir -p "${target_dirname}"; | |
# Create symlinks to our previously downloaded chunks | |
cp -s $(pwd)/stream_capture_${YT_HASH}/aud/* ${target_dirname}; | |
cp -s $(pwd)/stream_capture_${YT_HASH}/vid/* ${target_dirname}; | |
# Add the hash ID after the digits of each file, as expected by merge.py | |
AUDIO_REGEX='s/(\d*)\.m4a/$1_'${YT_HASH}'_audio\.ts/'; | |
VIDEO_REGEX='s/(\d*)\.mp4/$1_'${YT_HASH}'_video\.ts/'; | |
perl-rename "${AUDIO_REGEX}" ${target_dirname}/*; | |
perl-rename "${VIDEO_REGEX}" ${target_dirname}/*; | |
# Optionally, remove any leading padding zeros we added | |
perl-rename 's/(.*\/)0*(\d*_.*)/$1$2/' ${target_dirname}/*; | |
# Call the merge script with bogus youtube URL since it expects one anyway | |
python "${MERGE_SCRIPT}" "https://www.youtube.com/watch?v=${YT_HASH}"; | |
if [[ $? -eq 0 ]]; then | |
echo "Removing temporary directory with symlinks \"${target_dirname}\"..." | |
rm -r ${target_dirname}; | |
fi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment