Skip to content

Instantly share code, notes, and snippets.

@philshem
Last active August 29, 2015 14:19
Show Gist options
  • Select an option

  • Save philshem/b156726eca9b5a327332 to your computer and use it in GitHub Desktop.

Select an option

Save philshem/b156726eca9b5a327332 to your computer and use it in GitHub Desktop.
Send a Twitter status or message when a webpage has a change or is updated
# -*- coding: utf-8 -*-
# 1. scrape a webpage
# 2. compare to previous version
# 3. send a tweet (or direct message) when page is updated
import requests
import os
from lxml import html
from datetime import datetime
import twitter #https://github.com/bear/python-twitter
url_list = ['http://www.tired.com','http://www.purple.com'] # must have 'http:// to start the url
screen_name_list = ['philshem'] # recipients of tweet or direct message
api = twitter.Api(consumer_key='INSERT_YOURS', \
consumer_secret='INSERT_YOURS', \
access_token_key='INSERT_YOURS', \
access_token_secret='INSERT_YOURS')
getnow = datetime.now() # used to create html file archive, and to add to tweet text to make it unique
def main():
for url in url_list:
file_name = url.split('/')[2].replace('www.','').replace('.','_')+'.html'
r = requests.get(url)
html_new = r.text.encode('utf-8')
if not os.path.isfile(file_name):
print 'Writing',file_name,'because it did not exist.'
with open(file_name,'wb') as fyle:
fyle.write(html_new)
continue
else:
with open(file_name,'rb') as fyle:
html_old = fyle.read()
text_old = clean_html(html_old)
text_new = clean_html(html_new)
if text_old != text_new:
print 'Change to: ',file_name
send_tweet(screen_name_list,url,'message') # send direct messages
#send_tweet(screen_name_list,url,'tweet') # send a tweet
# save old html with timestamp
with open(getnow.strftime('%Y%m%d_%H%M%S')+'.'+file_name,'wb') as fyle:
fyle.write(html_old)
# save new html without timestamp (active copy)
with open(file_name,'wb') as fyle:
fyle.write(html_new)
else:
print 'No change for:',file_name
def clean_html(html_in):
tmp = html.fromstring(html_in).text_content().encode('utf-8')
tmp = ' '.join(tmp.split())
return tmp
def send_tweet(screen_name_list,in_text,method='message'):
text = 'Change to: '+in_text+ ' at '+getnow.strftime('%Y%m%d_%H%M%S')
# send tweet
if method == 'tweet':
text = '@'+' @'.join(screen_name_list) + ' ' + text
status = api.PostUpdate(status=text)
elif method == 'message':
for screen_name in screen_name_list:
print 'Sent DM to:',screen_name
status = api.PostDirectMessage(text,screen_name=screen_name)
else:
print 'Unknown twitter method'
exit(1)
if __name__== "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment