Created
December 30, 2021 06:49
-
-
Save Zerui18/411ae29b74570c5553606f62762fa647 to your computer and use it in GitHub Desktop.
Multi-threaded/parted download for videos on google drive, cookies support for Firefox (can also be hardcoded or modified to work with other browsers).
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!python3 | |
import requests | |
from multiprocessing.pool import ThreadPool | |
import argparse | |
import browser_cookie3 as bc | |
from tqdm import tqdm | |
SPLITS = 8 | |
MB = 1000000 | |
HEADERS = { | |
'User-Agent' : 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:95.0) Gecko/20100101 Firefox/95.0' , | |
'Accept' : 'video/webm,video/ogg,video/*;q=0.9,application/ogg;q=0.7,audio/*;q=0.6,*/*;q=0.5' , | |
'Accept-Language' : 'en-US,en;q=0.5' , | |
'Connection' : 'keep-alive' , | |
'Referer' : 'https://youtube.googleapis.com/' , | |
'Sec-Fetch-Dest' : 'video' , | |
'Sec-Fetch-Mode' : 'no-cors' , | |
'Sec-Fetch-Site' : 'cross-site' , | |
'TE' : 'trailers' | |
} | |
# Add cookies | |
cookies_jar = bc.firefox(domain_name='.google.com') | |
cookies = ';'.join([f'{cookie.name}={cookie.value}' for cookie in cookies_jar]) | |
HEADERS['Cookie'] = cookies | |
def download_part(url, start, end): | |
headers = HEADERS.copy() | |
headers['Range'] = f'bytes={start}-{end}' | |
response = requests.get(url, headers=headers) | |
return response.content | |
def download(url, filename, chunk_size, n_threads): | |
# make a request to determine the content-length | |
print('Initial request') | |
response = requests.get(url, headers=HEADERS, stream=True) | |
response.close() | |
length = int(response.headers.get('content-length')) | |
assert length > 0, f'Empty response with code {response.status_code}!' | |
print(f'File size is {length / MB:.2f}mb.') | |
# determine the chunks to download | |
starts = list(range(0, length, chunk_size)) | |
ends = [s-1 for s in starts[1:]] + [length-1] | |
args = list(zip([url] * len(starts), starts, ends)) | |
print(f'Downloading as {len(args)} chunks.') | |
# download with --n-threads threads | |
with ThreadPool(n_threads) as pool: | |
with open(filename, 'wb') as f: | |
for chunk in tqdm(pool.imap(lambda args: download_part(*args), args), total=len(args), desc='Download'): | |
f.write(chunk) | |
print('All done!') | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser() | |
parser.add_argument('url', help='The video url.') | |
parser.add_argument('out', help='The output filename.') | |
parser.add_argument('--chunk-size', type=int, default=2*MB, help='The size of each chunk.') | |
parser.add_argument('--n-threads', type=int, default=8, help='Number of threads to use.') | |
args = parser.parse_args() | |
download(args.url, args.out, args.chunk_size, args.n_threads) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment