soeirosantos · October 9, 2012 17:10
diff --git a/downpdf.py b/downpdf.py
 """
 simple script for download pdfs from a specific page
 depends of BeautifulSoup http://www.crummy.com/software/BeautifulSoup/bs4/doc/
 """

 import urllib2
 import urllib
 from bs4 import BeautifulSoup

 #url from where are the pdfs
 path = 'http://some/interesting/place/with/pdfs'

 #specific page, if there is one
 page = 'somepage.html'

 page_path = path + "/" + page

 all_links = BeautifulSoup("".join(urllib2.urlopen(page_path).readlines())).find_all("a")

 def getFileName(href):
    return href.split("/")[-1]

 def completeUrl(href, path):
    if "http://" not in href:
        return path + "/" + href 
    else:
        return href

 for link in all_links:
    href = link.get("href")
    if ".pdf" in href:
        print "urllib.urlretrieve(completeUrl(href, path), getFileName(href))"
	"""
	simple script for download pdfs from a specific page
	depends of BeautifulSoup http://www.crummy.com/software/BeautifulSoup/bs4/doc/
	"""

	import urllib2
	import urllib
	from bs4 import BeautifulSoup

	#url from where are the pdfs
	path = 'http://some/interesting/place/with/pdfs'

	#specific page, if there is one
	page = 'somepage.html'

	page_path = path + "/" + page

	all_links = BeautifulSoup("".join(urllib2.urlopen(page_path).readlines())).find_all("a")

	def getFileName(href):
	return href.split("/")[-1]

	def completeUrl(href, path):
	if "http://" not in href:
	return path + "/" + href
	else:
	return href

	for link in all_links:
	href = link.get("href")
	if ".pdf" in href:
	print "urllib.urlretrieve(completeUrl(href, path), getFileName(href))"