Created
August 10, 2016 16:41
-
-
Save Miha-Pleskovic/564be31505d5979d19cc5e8396333bd0 to your computer and use it in GitHub Desktop.
Data Scraper
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 -*- | |
from urllib2 import urlopen | |
from BeautifulSoup import BeautifulSoup | |
import smtplib | |
from email.mime.multipart import MIMEMultipart | |
from email.mime.text import MIMEText | |
# SCRAPING DATA | |
url = "https://scrapebook22.appspot.com" | |
response = urlopen(url).read() | |
soup = BeautifulSoup(response) | |
print soup.html.head.title.string | |
print "" | |
csv_file = open("emaili.csv", "w") | |
for link in soup.findAll("a"): | |
if link.string == "See full profile": | |
person_url = url + link["href"] | |
person_html = urlopen(person_url).read() | |
person_soup = BeautifulSoup(person_html) | |
email = person_soup.find("span", attrs={"class": "email"}).string | |
name = person_soup.findAll("h1")[1].string | |
city = person_soup.find("span", attrs={"data-city": True}).string | |
print name + ", " + email + ", " + city | |
csv_file.write(name + "," + email + "," + city + "\n") | |
csv_file.close() | |
# SENDING E-MAIL | |
sender = "[email protected]" | |
password = "Fake_Password" | |
recipient = "[email protected]" | |
subject = "Scraping the barrel" | |
content = "I'm sending you a .csv file containing scrapped e-mails. Note that they're fake (obviously).\n\nWarm regards,\n\nJohn Hancock" | |
e_mail = MIMEMultipart() | |
e_mail ["From"] = sender | |
e_mail ["To"] = recipient | |
e_mail ["Subject"] = subject | |
e_mail.attach(MIMEText(content)) | |
filename = "emaili.csv" | |
f = file(filename) | |
attachment = MIMEText(f.read()) | |
attachment.add_header('Content-Disposition', 'attachment', filename=filename) | |
e_mail.attach(attachment) | |
try: | |
server = smtplib.SMTP("smtp.fakemail.com:420") | |
server.ehlo() | |
server.starttls() | |
server.login(sender, password) | |
server.sendmail(from_addr=sender, to_addrs=recipient, msg=e_mail.as_string()) | |
server.quit() | |
print "The message is sent!" | |
except Exception as error: | |
print "ERROR!" | |
print error |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment