Skip to content

Instantly share code, notes, and snippets.

@blha303
Last active August 29, 2015 14:14
Show Gist options
  • Save blha303/28be06975e1acd0ddc8e to your computer and use it in GitHub Desktop.
Save blha303/28be06975e1acd0ddc8e to your computer and use it in GitHub Desktop.
import sys
import requests
from hurry.filesize import size, si
from bs4 import BeautifulSoup as Soup
total = 0
def add_to_total(uri, BASE=""):
global total
headers = requests.head(BASE + uri).headers
if "content-length" in headers:
adding = int(headers["content-length"])
print "{}: Adding {}".format(uri, size(adding, system=si))
total += adding
else:
print "{}: No content-length, skipping".format(uri)
def do_iterate(dir="", BASE="", COL=""):
print "Entering {}{}".format(BASE, dir)
for a in Soup(requests.get(BASE + dir).text).findAll('a')[COL:]:
if a["href"][-1] == "/":
do_iterate(dir + a["href"], BASE, COL)
else:
add_to_total(dir + a["href"], BASE)
def main():
BASE=sys.argv[1] if len(sys.argv) > 1 else "http://file.cite.wa.edu.au/"
COL=int(sys.argv[2]) if len(sys.argv) > 2 else 3
try:
do_iterate("", BASE, COL)
finally:
print "Total size of {}: {} ({})".format(BASE, total, size(total, system=si))
return 0
if __name__ == "__main__":
sys.exit(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment