apendleton · August 24, 2015 17:56
diff --git a/gabon_test.py b/gabon_test.py
 # first you need to make sure you have the libraries installed; I'm using pyquery and requests:
 #    pip install pyquery requests

 import requests
 from pyquery import PyQuery as pq
 from urlparse import urljoin

 URL = "http://www.stat-gabon.org/"

 response = requests.get(URL)
 page = pq(response.content)

 # print all the URLs on the page
 print 'all links'
 print [link.attr('href') for link in page('a[href]').items()]

 # the same as above, but with absolute URLs
 print 'all links with absolute urls'
 print [urljoin(URL, link.attr('href')) for link in page('a[href]').items()]

 # the same above, but with the URL labels as well
 print 'all links, absolute urls, and link text'
 print [(link.text(), urljoin(URL, link.attr('href'))) for link in page('a[href]').items()]

 # the same as above, but only for links that have 'pdf' in them somewhere
 print 'pdf links, absolute urls, and link text'
 print [(link.text(), urljoin(URL, link.attr('href'))) for link in page('a[href]').items() if 'pdf' in link.attr('href').lower()]
	# first you need to make sure you have the libraries installed; I'm using pyquery and requests:
	# pip install pyquery requests

	import requests
	from pyquery import PyQuery as pq
	from urlparse import urljoin

	URL = "http://www.stat-gabon.org/"

	response = requests.get(URL)
	page = pq(response.content)

	# print all the URLs on the page
	print 'all links'
	print [link.attr('href') for link in page('a[href]').items()]

	# the same as above, but with absolute URLs
	print 'all links with absolute urls'
	print [urljoin(URL, link.attr('href')) for link in page('a[href]').items()]

	# the same above, but with the URL labels as well
	print 'all links, absolute urls, and link text'
	print [(link.text(), urljoin(URL, link.attr('href'))) for link in page('a[href]').items()]

	# the same as above, but only for links that have 'pdf' in them somewhere
	print 'pdf links, absolute urls, and link text'
	print [(link.text(), urljoin(URL, link.attr('href'))) for link in page('a[href]').items() if 'pdf' in link.attr('href').lower()]