Created
September 16, 2012 09:37
-
-
Save justinvw/3731769 to your computer and use it in GitHub Desktop.
Quick-and-dirty script that notifies you by mail when a webpage of interest changes
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import cPickle | |
import urllib2 | |
import hashlib | |
from datetime import datetime | |
import smtplib | |
from email.mime.text import MIMEText | |
PICKLE_FILE = '/home/justin/tools/updatedyet.pkl' | |
MAIL_ON_CHANGE = ['[email protected]'] | |
MAIL_FROM = '[email protected]' | |
URLS_OF_INTEREST = [ | |
'http://store.apple.com/nl/browse/home/shop_iphone/family/iphone', | |
'http://vwees.net/justin/test.html' | |
] | |
def fetch_and_hash_content(url): | |
try: | |
contents = urllib2.urlopen(url).read() | |
except: | |
return None | |
hash = hashlib.sha1(contents) | |
return hash.hexdigest() | |
def notify_of_change(url, prev_hash, prev_date, curr_hash, curr_date): | |
message = MIMEText('Contents of %s changed.\n\nPreviously (%s): %s\nNow (%s):'\ | |
' %s' % (url, prev_date, prev_hash, curr_date, curr_hash)) | |
message['Subject'] = 'updatedyet.py: contents of %s changed' % url | |
message['From'] = MAIL_FROM | |
for mailaddress in MAIL_ON_CHANGE: | |
message['To'] = mailaddress | |
s = smtplib.SMTP('localhost') | |
s.sendmail(MAIL_FROM, [mailaddress], message.as_string()) | |
s.quit() | |
def load_pickle(pickle_location=PICKLE_FILE): | |
try: | |
pickle = cPickle.load(open(pickle_location, 'r')) | |
except: | |
pickle = {} | |
return pickle | |
def save_pickle(obj, pickle_location=PICKLE_FILE): | |
cPickle.dump(obj, open(pickle_location, 'w')) | |
def check_if_updated(urls=URLS_OF_INTEREST): | |
prev_hashes = load_pickle() | |
for url in urls: | |
current_time = datetime.utcnow() | |
current_hash = fetch_and_hash_content(url) | |
if not current_hash: | |
continue | |
if url not in prev_hashes: | |
prev_hashes[url] = [] | |
else: | |
prev_sample = prev_hashes[url][-1] | |
if prev_sample[1] != current_hash: | |
notify_of_change(url, prev_sample[1], prev_sample[0],\ | |
current_hash, current_time) | |
prev_hashes[url].append((current_time, current_hash)) | |
save_pickle(prev_hashes) | |
if __name__ == '__main__': | |
check_if_updated() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment