Created
March 24, 2023 08:09
-
-
Save elliotwutingfeng/216095c154f77f6054e90dea29fe967f to your computer and use it in GitHub Desktop.
Simple script for multithreaded downloading from archive.org
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
BSD Zero Clause License | |
Copyright (c) 2023 Wu Tingfeng | |
Permission to use, copy, modify, and/or distribute this software for any | |
purpose with or without fee is hereby granted. | |
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH | |
REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY | |
AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, | |
INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM | |
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR | |
OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR | |
PERFORMANCE OF THIS SOFTWARE. | |
""" | |
# pip install internetarchive | |
from internetarchive import download | |
import concurrent.futures | |
# archive.org raw link format | |
# https://archive.org/details/{identifier} | |
# Replace the below raw_links with the | |
# archive.org collections that you want to download | |
raw_links = """ | |
https://archive.org/details/nasa | |
https://archive.org/details/goodytwoshoes00newyiala | |
""" | |
links = [l.strip() for l in raw_links.split("\n") if l.strip()] | |
identifiers = [l.replace("https://archive.org/details/", "") for l in links] | |
with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor: | |
future_to_identifier = { | |
executor.submit( | |
download, identifier, **{"verbose": True, "checksum": True} | |
): identifier | |
for identifier in identifiers | |
} | |
for future in concurrent.futures.as_completed(future_to_identifier): | |
identifier = future_to_identifier[future] | |
try: | |
future.result() | |
except Exception as exc: | |
print("%r generated an exception: %s" % (identifier, exc)) | |
else: | |
print("%r is successful" % identifier) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment