Give a txt which contains urls to download, this script will download them for you.
- tqdm
| import csv | |
| import time | |
| from argparse import ArgumentParser | |
| from concurrent import futures | |
| from pathlib import Path | |
| from urllib import request | |
| from tqdm import tqdm | |
| def download(url: str, save_dir: Path, sleep_time: int = 1): | |
| save_path = save_dir / Path(url).name | |
| request.urlretrieve(url, save_path) | |
| time.sleep(sleep_time) | |
| return 'OK' | |
| def main(): | |
| parser = ArgumentParser() | |
| parser.add_argument("--data", type=str, default="data.txt") | |
| parser.add_argument("--data_index", type=int, default=0) | |
| parser.add_argument("--save", type=str, default="Download") | |
| parser.add_argument("--fail", type=str, default="fail.txt") | |
| parser.add_argument("--threads", type=int, default=20) | |
| parser.add_argument("--sleep", type=int, default=1) | |
| args = parser.parse_args() | |
| data_path = Path(args.data) | |
| data = csv.reader(data_path.open(), delimiter=',') | |
| save_path = Path(args.save) | |
| save_path.mkdir(exist_ok=True, parents=True) | |
| fail_record = Path(args.fail) | |
| fails = [] | |
| with futures.ThreadPoolExecutor(max_workers=args.threads) as executor: | |
| jobs = { | |
| executor.submit(download, line[args.data_index], save_path, args.sleep): ith for ith, line in enumerate(data) | |
| } | |
| for future in tqdm(futures.as_completed(jobs), total=len(jobs)): | |
| try: | |
| status = future.result() | |
| except Exception as e: | |
| tqdm.write("{} : {}".format(jobs[future], e)) | |
| fails.append(jobs[future]) | |
| else: | |
| tqdm.write("{} : {}".format(jobs[future], status)) | |
| fails = ["{}\n".format(x) for x in fails] | |
| fail_record.open("a").writelines(fails) | |
| if "__main__" == __name__: | |
| main() |