Last active
August 29, 2015 14:19
-
-
Save philshem/b156726eca9b5a327332 to your computer and use it in GitHub Desktop.
Send a Twitter status or message when a webpage has a change or is updated
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # -*- coding: utf-8 -*- | |
| # 1. scrape a webpage | |
| # 2. compare to previous version | |
| # 3. send a tweet (or direct message) when page is updated | |
| import requests | |
| import os | |
| from lxml import html | |
| from datetime import datetime | |
| import twitter #https://github.com/bear/python-twitter | |
| url_list = ['http://www.tired.com','http://www.purple.com'] # must have 'http:// to start the url | |
| screen_name_list = ['philshem'] # recipients of tweet or direct message | |
| api = twitter.Api(consumer_key='INSERT_YOURS', \ | |
| consumer_secret='INSERT_YOURS', \ | |
| access_token_key='INSERT_YOURS', \ | |
| access_token_secret='INSERT_YOURS') | |
| getnow = datetime.now() # used to create html file archive, and to add to tweet text to make it unique | |
| def main(): | |
| for url in url_list: | |
| file_name = url.split('/')[2].replace('www.','').replace('.','_')+'.html' | |
| r = requests.get(url) | |
| html_new = r.text.encode('utf-8') | |
| if not os.path.isfile(file_name): | |
| print 'Writing',file_name,'because it did not exist.' | |
| with open(file_name,'wb') as fyle: | |
| fyle.write(html_new) | |
| continue | |
| else: | |
| with open(file_name,'rb') as fyle: | |
| html_old = fyle.read() | |
| text_old = clean_html(html_old) | |
| text_new = clean_html(html_new) | |
| if text_old != text_new: | |
| print 'Change to: ',file_name | |
| send_tweet(screen_name_list,url,'message') # send direct messages | |
| #send_tweet(screen_name_list,url,'tweet') # send a tweet | |
| # save old html with timestamp | |
| with open(getnow.strftime('%Y%m%d_%H%M%S')+'.'+file_name,'wb') as fyle: | |
| fyle.write(html_old) | |
| # save new html without timestamp (active copy) | |
| with open(file_name,'wb') as fyle: | |
| fyle.write(html_new) | |
| else: | |
| print 'No change for:',file_name | |
| def clean_html(html_in): | |
| tmp = html.fromstring(html_in).text_content().encode('utf-8') | |
| tmp = ' '.join(tmp.split()) | |
| return tmp | |
| def send_tweet(screen_name_list,in_text,method='message'): | |
| text = 'Change to: '+in_text+ ' at '+getnow.strftime('%Y%m%d_%H%M%S') | |
| # send tweet | |
| if method == 'tweet': | |
| text = '@'+' @'.join(screen_name_list) + ' ' + text | |
| status = api.PostUpdate(status=text) | |
| elif method == 'message': | |
| for screen_name in screen_name_list: | |
| print 'Sent DM to:',screen_name | |
| status = api.PostDirectMessage(text,screen_name=screen_name) | |
| else: | |
| print 'Unknown twitter method' | |
| exit(1) | |
| if __name__== "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment