Last active
August 30, 2023 19:17
-
-
Save NewAlexandria/c2d0ee0160631e4d0e88b0ce4b97c917 to your computer and use it in GitHub Desktop.
POC code for extracting segment data from a HowTube.com HAR session.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/ruby | |
require 'json' | |
seg_rx = /segment[0-9]+\.ts/ | |
file_num_rx = /^segment([0-9]+)/ | |
pwd = "#{`pwd`.strip}" | |
clean_sources = false | |
clean_joins = false | |
# acts on the latest HAR file, if none provided | |
filename = ((ARGV[0].match(/\.har$/)[1] ? ARGV[0] : nil) rescue nil) || Dir.glob("*.har").max_by { |f| File.mtime(f) } | |
target_file = filename.split('/').last | |
puts target_file | |
har = JSON.parse(File.read(filename)); puts har.size | |
parts = har.dig('log', 'entries').map{|e| { url: (e.dig('request', 'url') rescue nil), length: (e.dig('response','headers').find{|h| h['name'] == 'Content-Length' }.dig('value') rescue nil).to_i } } | |
segs = parts.group_by{|s| s[:url].match(seg_rx)[0].to_s }; puts segs.size | |
# video | |
puts 'download video' | |
vid_segs = segs.map{|sg| sg.last.max_by{|v| v[:length].to_i } } | |
vid_urls = vid_segs.map{|s| s[:url] } | |
vid_urls.map{|url| system("yt-dlp #{url}") } | |
vid_ts_files = Dir.children('.').select{|f| f.start_with? 'segment' }.sort | |
vid_ts_filepaths_sorted = vid_ts_files.map{|f| { f:f, idx:f.match(file_num_rx)[1].to_i } }.sort_by{|h| h[:idx] }.map{|h| "#{pwd}/#{h[:f]}" }.map{|f| '"'+f+'"' }; vid_ts_filepaths_sorted.size | |
puts 'join video' | |
system("cat #{vid_ts_filepaths_sorted.join(' ')} > #{target_file}.all_video.ts") | |
system("ffmpeg -i #{target_file}.all_video.ts -acodec copy -vcodec copy #{target_file}.all_video.mp4") | |
system("mkdir -p #{target_file}--video") | |
system("rm -rf #{target_file}--video/*") | |
system("mv segment* #{target_file}--video/") | |
# audio | |
puts 'download audio' | |
aud_segs = segs.map{|sg| sg.last.min_by{|v| v[:length].to_i } } | |
aud_urls = aud_segs.map{|s| s[:url] } | |
aud_urls.map{|url| system("yt-dlp #{url}") } | |
aud_ts_files = Dir.children('.').select{|f| f.start_with? 'segment' }.sort | |
aud_ts_filepaths_sorted = aud_ts_files.map{|f| { f:f, idx:f.match(file_num_rx)[1].to_i } }.sort_by{|h| h[:idx] }.map{|h| "#{pwd}/#{h[:f]}" }.map{|f| '"'+f+'"' } | |
puts 'join audio' | |
system("cat #{aud_ts_filepaths_sorted.join(' ')} > #{target_file}.all_audio.ts") | |
system("ffmpeg -i #{target_file}.all_audio.ts -acodec copy -vcodec copy #{target_file}.all_audio.mp4") | |
system("mv #{target_file}.all_audio.mp4 #{target_file}.all_audio.mp3 ") | |
system("mkdir -p #{target_file}--audio") | |
system("rm -rf #{target_file}--audio/*") | |
system("mv segment* #{target_file}--audio/") | |
# join | |
puts 'join all' | |
system("ffmpeg -i #{target_file}.all_video.mp4 -i #{target_file}.all_audio.mp3 -c copy #{target_file}.g.mp4") | |
# clean | |
puts 'clean' | |
if clean_joins | |
system("rm -rf #{target_file}.all_video.mp4") | |
system("rm -rf #{target_file}.all_video.ts") | |
system("rm -rf #{target_file}.all_audio.mp3") | |
system("rm -rf #{target_file}.all_audio.ts") | |
end | |
if clean_sources | |
system("rm -rf #{target_file}--audio") | |
system("rm -rf #{target_file}--video") | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment