joedougherty · December 22, 2015 22:29
diff --git a/nihscrape.py b/nihscrape.py
 import requests
 from bs4 import BeautifulSoup

 # Set the URL to download
 endpoint = 'http://grants.nih.gov/searchGuide/Search_Guide_Results.cfm?Activity_Code=&Expdate_On_After=&OrderOn=ExpirationDate&OrderDirection=ASC&NoticesToo=0&OpeningDate_On_After=&Parent_FOA=All&PrimaryIC=Any&RelDate_On_After=&Status=1&SearchTerms=HIV&PAsToo=1&RFAsToo=1'

 # Pull down the HTML from that URL
 r = requests.get(endpoint)
 page = r.content

 # Make this parseable
 parsed_page = BeautifulSoup(page)

 # Now that you have this parsed_page object, you can call
 # the methods from BeautifulSoup on it.
 # Docs: http://www.crummy.com/software/BeautifulSoup/bs4/doc/
 #
 # Taking a quick look at the markup, we can see the target table is
 # 1.) a <table> element (precisely as expected) and 2.) has a class
 # of "searchtable." 
 #
 # All we need to do it use the 'find' method to extract the table
 # with this class.
 # 
 # Take a look at the docs to see how this works.
 extracted_table = parsed_page.find("table", {"class" : "searchtable"}) 

 # This will help convert any special characters into something non-ugly
 extracted_table = extracted_table.prettify('latin-1')

 print(extracted_table)
	import requests
	from bs4 import BeautifulSoup

	# Set the URL to download
	endpoint = 'http://grants.nih.gov/searchGuide/Search_Guide_Results.cfm?Activity_Code=&Expdate_On_After=&OrderOn=ExpirationDate&OrderDirection=ASC&NoticesToo=0&OpeningDate_On_After=&Parent_FOA=All&PrimaryIC=Any&RelDate_On_After=&Status=1&SearchTerms=HIV&PAsToo=1&RFAsToo=1'

	# Pull down the HTML from that URL
	r = requests.get(endpoint)
	page = r.content

	# Make this parseable
	parsed_page = BeautifulSoup(page)

	# Now that you have this parsed_page object, you can call
	# the methods from BeautifulSoup on it.
	# Docs: http://www.crummy.com/software/BeautifulSoup/bs4/doc/
	#
	# Taking a quick look at the markup, we can see the target table is
	# 1.) a <table> element (precisely as expected) and 2.) has a class
	# of "searchtable."
	#
	# All we need to do it use the 'find' method to extract the table
	# with this class.
	#
	# Take a look at the docs to see how this works.
	extracted_table = parsed_page.find("table", {"class" : "searchtable"})

	# This will help convert any special characters into something non-ugly
	extracted_table = extracted_table.prettify('latin-1')

	print(extracted_table)
No results found