Skip to content

Instantly share code, notes, and snippets.

@gaoconghui
Last active February 13, 2022 12:15
Show Gist options
  • Select an option

  • Save gaoconghui/f754d793270914da1b2cb25bde883f43 to your computer and use it in GitHub Desktop.

Select an option

Save gaoconghui/f754d793270914da1b2cb25bde883f43 to your computer and use it in GitHub Desktop.
大文件多线程下载
# -*- coding: utf-8 -*-
import os
import threading
import requests
import tqdm
class Downloader(object):
def __init__(self, url, name="notitle", worker_num=8, resume=True):
"""
多线程下载模块,使用tqdm作为进度条显示,threading 模块进行多线程
:param url: 文件下载地址
:param name: 文件名称
:param worker_num: 线程数
:param resume: 是否使用之前已经下载的内容(是否使用断点续传)
"""
self.url = url
self.num = worker_num
self.filename = name
r = requests.head(self.url)
self.total = int(r.headers['Content-Length'])
self.bar = tqdm.tqdm(total=self.total)
self.resume = resume
def get_range(self):
offset = int(self.total / self.num)
for i in range(self.num):
if i == self.num - 1:
yield (i * offset, '')
else:
yield (i * offset, (i + 1) * offset - 1)
def download(self, start, end, filename):
"""
具体下载模块,会先检查是否存在该文件,如果存在,尽量重复使用
:param start: 起始下载位置
:param end: 结束下载位置
:param filename: 文件名
:return:
"""
if self.resume and os.path.exists(filename):
if end == '':
end = self.total
self.update(end - start)
return
downloading_filename = filename + ".downloading"
if self.resume and os.path.exists(downloading_filename):
complete_size = os.path.getsize(downloading_filename)
self.update(complete_size)
start += complete_size
headers = {'Range': 'Bytes=%s-%s' % (start, end), 'Accept-Encoding': '*'}
r = requests.get(self.url, headers=headers, stream=True)
with open(downloading_filename, "ab") as f:
for chunk in r.iter_content(chunk_size=512):
if chunk:
f.write(chunk)
self.update(len(chunk))
os.rename(downloading_filename, filename)
def update(self, size):
self.bar.update(size)
def run(self):
thread_list = []
sub_filenames = []
start_list = []
for index, ran in enumerate(self.get_range()):
# 获取每个线程下载的数据块
filename = self.filename + "." + str(index)
sub_filenames.append(filename)
start, end = ran
start_list.append(start)
thread = threading.Thread(target=self.download, args=(start, end, filename))
thread.start()
thread_list.append(thread)
for i in thread_list:
i.join()
with open(self.filename, "wb") as f:
for index, sub_filename in enumerate(sub_filenames):
with open(sub_filename, "r") as sub_f:
f.write(sub_f.read())
os.remove(sub_filename)
if __name__ == "__main__":
Downloader(
url="http://61.240.28.1/v1.go2yd.com/user_upload/151446336492504301b5c6e9fc4fffc0679adc158ba9d.mp4_bd.mp4",
name="m.mp4", worker_num=2).run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment