Last active
February 13, 2022 12:15
-
-
Save gaoconghui/f754d793270914da1b2cb25bde883f43 to your computer and use it in GitHub Desktop.
大文件多线程下载
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # -*- coding: utf-8 -*- | |
| import os | |
| import threading | |
| import requests | |
| import tqdm | |
| class Downloader(object): | |
| def __init__(self, url, name="notitle", worker_num=8, resume=True): | |
| """ | |
| 多线程下载模块,使用tqdm作为进度条显示,threading 模块进行多线程 | |
| :param url: 文件下载地址 | |
| :param name: 文件名称 | |
| :param worker_num: 线程数 | |
| :param resume: 是否使用之前已经下载的内容(是否使用断点续传) | |
| """ | |
| self.url = url | |
| self.num = worker_num | |
| self.filename = name | |
| r = requests.head(self.url) | |
| self.total = int(r.headers['Content-Length']) | |
| self.bar = tqdm.tqdm(total=self.total) | |
| self.resume = resume | |
| def get_range(self): | |
| offset = int(self.total / self.num) | |
| for i in range(self.num): | |
| if i == self.num - 1: | |
| yield (i * offset, '') | |
| else: | |
| yield (i * offset, (i + 1) * offset - 1) | |
| def download(self, start, end, filename): | |
| """ | |
| 具体下载模块,会先检查是否存在该文件,如果存在,尽量重复使用 | |
| :param start: 起始下载位置 | |
| :param end: 结束下载位置 | |
| :param filename: 文件名 | |
| :return: | |
| """ | |
| if self.resume and os.path.exists(filename): | |
| if end == '': | |
| end = self.total | |
| self.update(end - start) | |
| return | |
| downloading_filename = filename + ".downloading" | |
| if self.resume and os.path.exists(downloading_filename): | |
| complete_size = os.path.getsize(downloading_filename) | |
| self.update(complete_size) | |
| start += complete_size | |
| headers = {'Range': 'Bytes=%s-%s' % (start, end), 'Accept-Encoding': '*'} | |
| r = requests.get(self.url, headers=headers, stream=True) | |
| with open(downloading_filename, "ab") as f: | |
| for chunk in r.iter_content(chunk_size=512): | |
| if chunk: | |
| f.write(chunk) | |
| self.update(len(chunk)) | |
| os.rename(downloading_filename, filename) | |
| def update(self, size): | |
| self.bar.update(size) | |
| def run(self): | |
| thread_list = [] | |
| sub_filenames = [] | |
| start_list = [] | |
| for index, ran in enumerate(self.get_range()): | |
| # 获取每个线程下载的数据块 | |
| filename = self.filename + "." + str(index) | |
| sub_filenames.append(filename) | |
| start, end = ran | |
| start_list.append(start) | |
| thread = threading.Thread(target=self.download, args=(start, end, filename)) | |
| thread.start() | |
| thread_list.append(thread) | |
| for i in thread_list: | |
| i.join() | |
| with open(self.filename, "wb") as f: | |
| for index, sub_filename in enumerate(sub_filenames): | |
| with open(sub_filename, "r") as sub_f: | |
| f.write(sub_f.read()) | |
| os.remove(sub_filename) | |
| if __name__ == "__main__": | |
| Downloader( | |
| url="http://61.240.28.1/v1.go2yd.com/user_upload/151446336492504301b5c6e9fc4fffc0679adc158ba9d.mp4_bd.mp4", | |
| name="m.mp4", worker_num=2).run() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment