Skip to content

Instantly share code, notes, and snippets.

@DoubleMalt
Created October 17, 2016 18:10
Show Gist options
  • Save DoubleMalt/da9cfe6c65b72298630c54f16801d224 to your computer and use it in GitHub Desktop.
Save DoubleMalt/da9cfe6c65b72298630c54f16801d224 to your computer and use it in GitHub Desktop.
from urllib2 import urlopen
from BeautifulSoup import BeautifulSoup
import re
url = 'https://scrapebook22.appspot.com'
response = urlopen(url).read()
soup = BeautifulSoup(response)
with open("email_list.csv", "w") as csv_file:
for link in soup.findAll("a"):
if link["href"].startswith("/person"):
person_url = url + link["href"]
person_html = urlopen(person_url).read()
person_soup = BeautifulSoup(person_html)
email = person_soup.find("span", attrs={"class": "email"}).string
city = person_soup.find("span", attrs={"data-city": True}).string
name = person_soup.findAll("h1")[1].string
age = person_soup.find("li", text=re.compile("Age")).string[5:]
print email
print age
print name
print city
csv_file.write(email + "," + name + "," + city + "," + age + "\n")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment