tdpearson · October 19, 2017 19:58
diff --git a/sraping_example.py b/sraping_example.py
 import requests
 from bs4 import BeautifulSoup

 # Iterate over author pages
 for page in range(1, 2):  #TODO: update to number of pages + 1 (i.e. 15 pages would be 16)
    data = {"gruppo": "autori",
            "iniziale": "all",
            "pag": page}
    response = requests.post("http://digiliblt.lett.unipmn.it/testi.php", data=data)
    soup = BeautifulSoup(response.text, "lxml")
    # Look for all the links to authors
    for link in soup.find_all("a", {"href": "#", "onclick": True}):
        link_val = link['onclick']
        # Get the author ids
        if "scheda_autori" in link_val.lower():
            author_id = link_val.split("'")[1]
            data = {"id": author_id,
                    "bpag": "1"}
            # Load author page based off of author id
            response = requests.post("http://digiliblt.lett.unipmn.it/autore.php", data=data)
            # TODO: parse the author page
            print(response.text)


 exit(0)
 # This was the example of scraping the packhum site.
 loc = "http://latin.packhum.org/canon"
 html = requests.get(loc).text
 soup = BeautifulSoup(html, "lxml")
 anam_spans = soup.find_all("span", {'id': 'bolded'})
 for span in anam_spans:
    print(span.span.text)
	import requests
	from bs4 import BeautifulSoup

	# Iterate over author pages
	for page in range(1, 2): #TODO: update to number of pages + 1 (i.e. 15 pages would be 16)
	data = {"gruppo": "autori",
	"iniziale": "all",
	"pag": page}
	response = requests.post("http://digiliblt.lett.unipmn.it/testi.php", data=data)
	soup = BeautifulSoup(response.text, "lxml")
	# Look for all the links to authors
	for link in soup.find_all("a", {"href": "#", "onclick": True}):
	link_val = link['onclick']
	# Get the author ids
	if "scheda_autori" in link_val.lower():
	author_id = link_val.split("'")[1]
	data = {"id": author_id,
	"bpag": "1"}
	# Load author page based off of author id
	response = requests.post("http://digiliblt.lett.unipmn.it/autore.php", data=data)
	# TODO: parse the author page
	print(response.text)


	exit(0)
	# This was the example of scraping the packhum site.
	loc = "http://latin.packhum.org/canon"
	html = requests.get(loc).text
	soup = BeautifulSoup(html, "lxml")
	anam_spans = soup.find_all("span", {'id': 'bolded'})
	for span in anam_spans:
	print(span.span.text)
No results found