Last active
August 29, 2017 12:47
-
-
Save arjunnambiartc/36c0a01fc109f280c77d0265b5560401 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import inspect | |
from selenium import webdriver | |
from bs4 import BeautifulSoup | |
from PIL import Image | |
import hashlib | |
import uuid | |
import cgi | |
import arrow | |
import httplib2 | |
from io import BytesIO | |
import os | |
import oauth2client | |
from oauth2client import client, tools | |
import base64 | |
from email.mime.multipart import MIMEMultipart | |
from email.mime.text import MIMEText | |
from email.mime.image import MIMEImage | |
from email.header import Header | |
from apiclient import errors, discovery | |
SCOPES = 'https://www.googleapis.com/auth/gmail.send' | |
CLIENT_SECRET_FILE = 'client_secret.json' | |
APPLICATION_NAME = 'Gmail API Python Send Email' | |
PAGE_URL = 'http://hypermonkey.in/index.php?page=schedule' | |
HOME_DIR = os.path.expanduser('~') | |
OLD_JPG_IMAGE_PATH = os.path.join(HOME_DIR, 'hypermonkey-schedule-checker', 'old_class_schedule.jpg') | |
NEW_JPG_IMAGE_PATH = os.path.join(HOME_DIR, 'hypermonkey-schedule-checker', 'new_class_schedule.jpg') | |
HASH_VALUE_FILE_PATH = os.path.join(HOME_DIR, 'hypermonkey-schedule-checker', 'hash-value') | |
def take_page_screenshot(filename): | |
''' | |
Use PhantomJS to take a screen shot. | |
Pass the image to PIL to crop the image and obtain just the timetable | |
''' | |
browser = webdriver.PhantomJS() | |
browser.set_window_size(1120, 550) | |
browser.get(PAGE_URL) | |
img = Image.open(BytesIO(browser.get_screenshot_as_png())) | |
w, h = img.size | |
img.crop((120, 210, w-120, h-110)).save(filename) | |
browser.quit() | |
return | |
def check_schedule_change(table): | |
''' | |
Calculate the hash value of the HTML table of the schedule page. | |
Create a new file and write the hash-value to it if not available. | |
If hash-value is already available,compare old and new hash-value and | |
send mail with screenshot if values differ else do nothing | |
''' | |
hash_object = hashlib.sha1(table.encode('utf-8')) | |
new_hash_value = hash_object.hexdigest() | |
# Create Hash value file if it does not exist | |
if not os.path.isfile(HASH_VALUE_FILE_PATH): | |
print ("hash value file not found") | |
with open(HASH_VALUE_FILE_PATH, 'a'): | |
os.utime(HASH_VALUE_FILE_PATH, None) | |
# Create Old JPG image file if it does not exist | |
if not os.path.isfile(OLD_JPG_IMAGE_PATH) or not os.path.getsize(OLD_JPG_IMAGE_PATH): | |
print ("OLD JPG not found or is empty") | |
take_page_screenshot(OLD_JPG_IMAGE_PATH) | |
if not os.path.getsize(HASH_VALUE_FILE_PATH): | |
print ("hash value file is empty") | |
print (new_hash_value) | |
with open(HASH_VALUE_FILE_PATH, 'w') as f: | |
f.write(new_hash_value) | |
main() | |
else: | |
with open(HASH_VALUE_FILE_PATH, 'r+') as f: | |
old_hash_value = f.readline().rstrip() | |
if new_hash_value != old_hash_value: | |
f.seek(0) | |
f.truncate() | |
f.write(new_hash_value) | |
take_page_screenshot(NEW_JPG_IMAGE_PATH) | |
send_message() | |
def get_gmail_credentials(): | |
''' | |
Get Gmail credentials using the new Gmail API | |
''' | |
credential_dir = os.path.join(HOME_DIR, '.credentials') | |
if not os.path.exists(credential_dir): | |
os.makedirs(credential_dir) | |
credential_path = os.path.join(credential_dir, 'gmail-python-email-send.json') | |
store = oauth2client.file.Storage(credential_path) | |
credentials = store.get() | |
if not credentials or credentials.invalid: | |
flow = client.flow_from_clientsecrets(CLIENT_SECRET_FILE, SCOPES) | |
flow.user_agent = APPLICATION_NAME | |
credentials = tools.run_flow(flow, store) | |
print('Storing credentials to ' + credential_path) | |
return credentials | |
def send_message(): | |
credentials = get_gmail_credentials() | |
http = credentials.authorize(httplib2.Http()) | |
service = discovery.build('gmail', 'v1', http=http) | |
payload = create_message() | |
try: | |
message = (service.users().messages().send(userId="me", body=payload).execute()) | |
print('Message Id: %s' % message['id']) | |
return message | |
except errors.HttpError as error: | |
print('An error occurred: %s' % error) | |
def create_message(): | |
msg = MIMEMultipart('related') | |
current_time = arrow.utcnow().to('Asia/Calcutta').format('D MMM YYYY HH:mm') | |
msg['Subject'] = "ALERT !!! HyperMonkey Schedule has Changed on {} !!!" .format(current_time) | |
msg['From'] = "[email protected]" | |
msg['BCC'] = "[email protected], [email protected]" | |
msg_alternative = MIMEMultipart('alternative') | |
msg.attach(msg_alternative) | |
img_new = dict(title_new=u'New class schedule..', path_new=u'new_class_schedule.jpg', | |
cid_new=str(uuid.uuid4())) | |
img_old = dict(title_old=u'Old class schedule..', path_old=u'old_class_schedule.jpg', | |
cid_old=str(uuid.uuid4())) | |
msg_html = MIMEText(u'<p>Latest class schedule is as follows:</p>' | |
'<div dir="ltr">' | |
'<img src="cid:{cid_new}" alt="{alt_new}"><br></div>' | |
'<p>Old class schedule was as follows:</p>' | |
'<div dir="ltr">' | |
'<img src="cid:{cid_old}" alt="{alt_old}"><br></div>' | |
.format(alt_new=cgi.escape(img_new['title_new'], quote=True), **img_new, | |
alt_old=cgi.escape(img_old['title_old'], quote=True), **img_old), | |
'html', 'utf-8') | |
msg_alternative.attach(msg_html) | |
with open(img_new['path_new'], 'rb') as file: | |
msg_image_new = MIMEImage(file.read(), name=os.path.basename(img_new['path_new'])) | |
msg.attach(msg_image_new) | |
msg_image_new.add_header('Content-ID', '<{}>'.format(img_new['cid_new'])) | |
with open(img_old['path_old'], 'rb') as file: | |
msg_image_old = MIMEImage(file.read(), name=os.path.basename(img_old['path_old'])) | |
msg.attach(msg_image_old) | |
msg_image_old.add_header('Content-ID', '<{}>'.format(img_old['cid_old'])) | |
raw = base64.urlsafe_b64encode(msg.as_bytes()) | |
raw = raw.decode() | |
body = {'raw': raw} | |
return body | |
def main(): | |
html_content = requests.get(PAGE_URL).text | |
soup = BeautifulSoup(html_content, 'html.parser') | |
table = soup.find("table", attrs={"id":"schedule_table"}) | |
check_schedule_change(table) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment