Skip to content

Instantly share code, notes, and snippets.

@halit
Last active January 3, 2016 19:49
Show Gist options
  • Save halit/8510791 to your computer and use it in GitHub Desktop.
Save halit/8510791 to your computer and use it in GitHub Desktop.
import requests
import re
from multiprocessing import Pool
url = "http://www.cs.umd.edu/~gasarch/bookrev/bookrev.html"
r_url = re.compile("HREF=.*.pdf\"")
r_replace_t = "HREF=\""
r_replace_w = "http://www.cs.umd.edu/~gasarch/bookrev/"
site_content = requests.get(url).content
links = [s.replace(r_replace_t, r_replace_w).replace("\"", "")
for s in r_url.findall(site_content)]
def saver(link):
with open(link.split("/")[-1], "wb") as ff:
ff.write(requests.get(link).content)
pool = Pool(8)
pool.map(saver, links)
pool.close()
pool.join()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment