Skip to content

Instantly share code, notes, and snippets.

@arjunnambiartc
Last active August 29, 2017 12:47
Show Gist options
  • Save arjunnambiartc/36c0a01fc109f280c77d0265b5560401 to your computer and use it in GitHub Desktop.
Save arjunnambiartc/36c0a01fc109f280c77d0265b5560401 to your computer and use it in GitHub Desktop.
import requests
import inspect
from selenium import webdriver
from bs4 import BeautifulSoup
from PIL import Image
import hashlib
import uuid
import cgi
import arrow
import httplib2
from io import BytesIO
import os
import oauth2client
from oauth2client import client, tools
import base64
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.mime.image import MIMEImage
from email.header import Header
from apiclient import errors, discovery
SCOPES = 'https://www.googleapis.com/auth/gmail.send'
CLIENT_SECRET_FILE = 'client_secret.json'
APPLICATION_NAME = 'Gmail API Python Send Email'
PAGE_URL = 'http://hypermonkey.in/index.php?page=schedule'
HOME_DIR = os.path.expanduser('~')
OLD_JPG_IMAGE_PATH = os.path.join(HOME_DIR, 'hypermonkey-schedule-checker', 'old_class_schedule.jpg')
NEW_JPG_IMAGE_PATH = os.path.join(HOME_DIR, 'hypermonkey-schedule-checker', 'new_class_schedule.jpg')
HASH_VALUE_FILE_PATH = os.path.join(HOME_DIR, 'hypermonkey-schedule-checker', 'hash-value')
def take_page_screenshot(filename):
'''
Use PhantomJS to take a screen shot.
Pass the image to PIL to crop the image and obtain just the timetable
'''
browser = webdriver.PhantomJS()
browser.set_window_size(1120, 550)
browser.get(PAGE_URL)
img = Image.open(BytesIO(browser.get_screenshot_as_png()))
w, h = img.size
img.crop((120, 210, w-120, h-110)).save(filename)
browser.quit()
return
def check_schedule_change(table):
'''
Calculate the hash value of the HTML table of the schedule page.
Create a new file and write the hash-value to it if not available.
If hash-value is already available,compare old and new hash-value and
send mail with screenshot if values differ else do nothing
'''
hash_object = hashlib.sha1(table.encode('utf-8'))
new_hash_value = hash_object.hexdigest()
# Create Hash value file if it does not exist
if not os.path.isfile(HASH_VALUE_FILE_PATH):
print ("hash value file not found")
with open(HASH_VALUE_FILE_PATH, 'a'):
os.utime(HASH_VALUE_FILE_PATH, None)
# Create Old JPG image file if it does not exist
if not os.path.isfile(OLD_JPG_IMAGE_PATH) or not os.path.getsize(OLD_JPG_IMAGE_PATH):
print ("OLD JPG not found or is empty")
take_page_screenshot(OLD_JPG_IMAGE_PATH)
if not os.path.getsize(HASH_VALUE_FILE_PATH):
print ("hash value file is empty")
print (new_hash_value)
with open(HASH_VALUE_FILE_PATH, 'w') as f:
f.write(new_hash_value)
main()
else:
with open(HASH_VALUE_FILE_PATH, 'r+') as f:
old_hash_value = f.readline().rstrip()
if new_hash_value != old_hash_value:
f.seek(0)
f.truncate()
f.write(new_hash_value)
take_page_screenshot(NEW_JPG_IMAGE_PATH)
send_message()
def get_gmail_credentials():
'''
Get Gmail credentials using the new Gmail API
'''
credential_dir = os.path.join(HOME_DIR, '.credentials')
if not os.path.exists(credential_dir):
os.makedirs(credential_dir)
credential_path = os.path.join(credential_dir, 'gmail-python-email-send.json')
store = oauth2client.file.Storage(credential_path)
credentials = store.get()
if not credentials or credentials.invalid:
flow = client.flow_from_clientsecrets(CLIENT_SECRET_FILE, SCOPES)
flow.user_agent = APPLICATION_NAME
credentials = tools.run_flow(flow, store)
print('Storing credentials to ' + credential_path)
return credentials
def send_message():
credentials = get_gmail_credentials()
http = credentials.authorize(httplib2.Http())
service = discovery.build('gmail', 'v1', http=http)
payload = create_message()
try:
message = (service.users().messages().send(userId="me", body=payload).execute())
print('Message Id: %s' % message['id'])
return message
except errors.HttpError as error:
print('An error occurred: %s' % error)
def create_message():
msg = MIMEMultipart('related')
current_time = arrow.utcnow().to('Asia/Calcutta').format('D MMM YYYY HH:mm')
msg['Subject'] = "ALERT !!! HyperMonkey Schedule has Changed on {} !!!" .format(current_time)
msg['From'] = "[email protected]"
msg['BCC'] = "[email protected], [email protected]"
msg_alternative = MIMEMultipart('alternative')
msg.attach(msg_alternative)
img_new = dict(title_new=u'New class schedule..', path_new=u'new_class_schedule.jpg',
cid_new=str(uuid.uuid4()))
img_old = dict(title_old=u'Old class schedule..', path_old=u'old_class_schedule.jpg',
cid_old=str(uuid.uuid4()))
msg_html = MIMEText(u'<p>Latest class schedule is as follows:</p>'
'<div dir="ltr">'
'<img src="cid:{cid_new}" alt="{alt_new}"><br></div>'
'<p>Old class schedule was as follows:</p>'
'<div dir="ltr">'
'<img src="cid:{cid_old}" alt="{alt_old}"><br></div>'
.format(alt_new=cgi.escape(img_new['title_new'], quote=True), **img_new,
alt_old=cgi.escape(img_old['title_old'], quote=True), **img_old),
'html', 'utf-8')
msg_alternative.attach(msg_html)
with open(img_new['path_new'], 'rb') as file:
msg_image_new = MIMEImage(file.read(), name=os.path.basename(img_new['path_new']))
msg.attach(msg_image_new)
msg_image_new.add_header('Content-ID', '<{}>'.format(img_new['cid_new']))
with open(img_old['path_old'], 'rb') as file:
msg_image_old = MIMEImage(file.read(), name=os.path.basename(img_old['path_old']))
msg.attach(msg_image_old)
msg_image_old.add_header('Content-ID', '<{}>'.format(img_old['cid_old']))
raw = base64.urlsafe_b64encode(msg.as_bytes())
raw = raw.decode()
body = {'raw': raw}
return body
def main():
html_content = requests.get(PAGE_URL).text
soup = BeautifulSoup(html_content, 'html.parser')
table = soup.find("table", attrs={"id":"schedule_table"})
check_schedule_change(table)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment