Created
February 19, 2017 21:34
-
-
Save IceflowRE/cb61150a75de5fa0e54882fbdddc0e53 to your computer and use it in GitHub Desktop.
Download a list of links from a file (link_list.txt)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import functools | |
from concurrent import futures | |
import urllib3, certifi | |
from pathlib import Path | |
address = 'github.com' | |
done_links = 0 | |
file_list = [] | |
download_path = Path('./files/') | |
downloader = urllib3.HTTPSConnectionPool(address, maxsize=8, cert_reqs='CERT_REQUIRED', | |
ca_certs=certifi.where()) | |
def create_download_list(): | |
global file_list | |
temp_list = [] | |
with open('link_list.txt') as f: | |
temp_list = f.readlines() | |
for link in temp_list: | |
file_list.append(link[len(address):][:-1]) | |
def print_progress(full_percentage, job): | |
""" | |
Callback function prints a progress bar. | |
:param full_percentage: The number which is 100%. | |
:param job: The multi processing job result. | |
""" | |
global done_links | |
done_links += 1 | |
if full_percentage != 0: | |
print('\r' + 'Progress: %d/%d | %d %%' % ( | |
done_links, full_percentage, round((100 / full_percentage * done_links), 1)), end='') | |
else: | |
print('\r' + 'Error for full_percentage....', end='') | |
def download_file(url, target_path: Path): | |
""" | |
Download one file. | |
""" | |
download_success = "TRUE" | |
try: | |
while target_path.exists(): | |
target_path = Path(str(target_path) + "_d") | |
with downloader.request('GET', url, preload_content=False, retries=urllib3.util.retry.Retry(3)) as reader: | |
if reader.status == 200: | |
with target_path.open(mode='wb') as out_file: | |
out_file.write(reader.data) | |
else: | |
raise urllib3.exceptions.HTTPError(str(reader.status)) | |
except Exception as exception: | |
download_success = "DOWNLOAD ERROR " + str(exception) + ": " + url | |
return download_success | |
def download_files(): | |
""" | |
Download the files. | |
""" | |
print('== DOWNLOAD FILES ==') | |
with futures.ProcessPoolExecutor(max_workers=8) as executor: | |
for link in file_list: | |
# remove invalid file chars from file name | |
list = link.split("/") | |
file_name = list[len(list) - 1] | |
job = executor.submit(download_file, link, download_path.joinpath(file_name)) | |
job.add_done_callback(functools.partial(print_progress, len(file_list))) | |
print() | |
def check_files(): | |
for link in file_list: | |
list = link.split("/") | |
file_name = list[len(list) - 1] | |
path = download_path.joinpath(file_name) | |
if not path.is_file(): | |
print("Link " + link + " was not downloaded.") | |
if __name__ == "__main__": | |
if not download_path.exists(): | |
Path.mkdir(download_path) | |
create_download_list() | |
for i in file_list: | |
print(i) | |
download_files() | |
check_files() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment