Skip to content

Instantly share code, notes, and snippets.

@alyssadev
Created July 8, 2021 22:55
Show Gist options
  • Save alyssadev/6c0f1af7b8f0afb600bec60313213919 to your computer and use it in GitHub Desktop.
Save alyssadev/6c0f1af7b8f0afb600bec60313213919 to your computer and use it in GitHub Desktop.
A tool to get the size of an archive.org bucket
#!/usr/bin/env python3
import xmltodict
import requests
def sizeof_fmt(num, suffix='B'):
for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']:
if abs(num) < 1024.0:
return "%3.1f%s%s" % (num, unit, suffix)
num /= 1024.0
return "%.1f%s%s" % (num, 'Yi', suffix)
def get_size_of_bucket(bucket):
data = xmltodict.parse(
requests.get(f"https://archive.org/download/{bucket}/{bucket}_files.xml").text
)
return sum(int(f["size"]) for f in data["files"]["file"] if "size" in f)
def main():
from argparse import ArgumentParser
parser = ArgumentParser()
parser.add_argument("bucket")
parser.add_argument("-u", "--human", help="(hUman): Returns output as human-readable (e.g 1.1MiB)", action="store_true")
args = parser.parse_args()
size = get_size_of_bucket(args.bucket)
if args.human:
print(sizeof_fmt(size))
else:
print(size)
return 0
if __name__ == "__main__":
exit(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment