Skip to content

Instantly share code, notes, and snippets.

@rafalf
Created December 29, 2016 19:04
Show Gist options
  • Save rafalf/25acf52d9c50b07dc3d191f3fc3ca512 to your computer and use it in GitHub Desktop.
Save rafalf/25acf52d9c50b07dc3d191f3fc3ca512 to your computer and use it in GitHub Desktop.
# http://gspread.readthedocs.io/en/latest/oauth2.html#
# http://alexsavio.github.io/gspread_oauth2client_intro.html
# SHARE with the json email - http://stackoverflow.com/questions/37602460/gspread-exceptions-spreadsheetnotfound
# https://github.com/burnash/gspread
import gspread
from oauth2client.service_account import ServiceAccountCredentials
import time
import requests
from bs4 import BeautifulSoup
def send_email(user, pwd, recipient, subject, body):
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
MESSAGE = MIMEMultipart('alternative')
MESSAGE['subject'] = subject
MESSAGE['To'] = recipient
MESSAGE['From'] = user
# Record the MIME type text/html.
HTML_BODY = MIMEText(body, 'html')
MESSAGE.attach(HTML_BODY)
try:
server = smtplib.SMTP("smtp.gmail.com", 587)
server.ehlo()
server.starttls()
server.login(user, pwd)
server.sendmail(user, recipient, MESSAGE.as_string())
server.close()
print 'successfully sent the mail'
except:
print "failed to send mail"
def get_email_template():
return """
<html>
<body>
<h3>{%Heading%}</h3>
{%Text%}
</body>
</html>
"""
json_key = 'google_api_service_acc_creds.json'
scope = ['https://spreadsheets.google.com/feeds']
credentials = ServiceAccountCredentials.from_json_keyfile_name(json_key, scope)
gc = gspread.authorize(credentials)
wks = gc.open_by_url("https://docs.google.com/spreadsheets/d/1EvUPN4NPlMo8m-rKtm0jmdaEW2rz5PnylkzM5u8vbO0/edit#gid=0")
worksheet = wks.worksheet("URL to check")
rows = worksheet.get_all_values()
urls = []
for row in rows:
urls.append(row[0])
print('Found url on the sheet: {}'.format(row[0]))
# scrap data
url_h1 = []
for url in urls:
r = requests.get(url)
data = r.text
soup = BeautifulSoup(data, "lxml")
all_h1 = soup.find_all('h1')
print('H1 found: {} on url {}'.format(len(all_h1), url))
url_h1.append([url, len(all_h1)])
# write data
# create a new worksheet
title = "Run: {}".format(time.strftime('%d%m%Y-%H%S', time.localtime()))
worksheet = wks.add_worksheet(title=title, rows="100", cols="20")
# fill in cells
i = 1
for url, h1 in url_h1:
worksheet.update_cell(i + 1, 1, url)
worksheet.update_cell(i + 1, 2, h1)
i += 1
# send email
user = '[email protected]'
recipient = '[email protected]'
pwd = ""
subject = ""
body = get_email_template()
send_email(user, pwd, recipient, subject, body)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment