Last active
October 22, 2015 18:40
-
-
Save jschaub30/be53fa1dc30642186021 to your computer and use it in GitHub Desktop.
Python script to email users tagged in blogs/comments of a Wordpress blog
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
import sys | |
import feedparser | |
import smtplib | |
import datetime | |
import time | |
import requests | |
from bs4 import BeautifulSoup | |
from email.mime.text import MIMEText | |
debug_flag = False | |
def main(blog_url, username, password): | |
blog_url = blog_url.rstrip('/') | |
oldTimeStamp = read_old_timestamp(blog_url) | |
subscribers = get_subscribers(blog_url, username, password) | |
sys.stdout.write("\n============================\nScraping %s\n" % blog_url) | |
sys.stdout.write("\nAnalyzing posts and comments since %s\n" % str(oldTimeStamp)) | |
posts = feedparser.parse(blog_url + '/?feed=rss2') | |
comments = feedparser.parse(blog_url + '/?feed=comments-rss2') | |
all_entries = posts['entries'] + comments['entries'] | |
for post in all_entries: | |
sys.stdout.write('Analyzing "%s": %s\n' % (post['title'].encode('utf-8'), | |
post['published'].encode('utf-8'))) | |
pdate = ' '.join(post['published'].encode('ascii').split()[0:-1]) | |
pdate = datetime.datetime.strptime(pdate, '%a, %d %b %Y %H:%M:%S') | |
for handle in subscribers.keys(): | |
email = subscribers[handle] | |
content = post['content'][0]['value'] if 'content' in post.keys() else post.summary | |
isTagged = '@' + handle.replace(' ', '-').lower() in content.lower() | |
if isTagged and (oldTimeStamp < pdate): | |
notify_user(blog_url, handle, email, post) | |
write_new_timestamp(blog_url) | |
def notify_user(blog_url, handle, email, post): | |
sys.stdout.write("!! Found user %s tagged in %s\n" % (handle, post['link'])) | |
sender = '[email protected]' | |
msg = 'Greetings %s,\n\nOn %s, you were tagged in ' % (handle, post['published']) | |
msg += 'this post on the spark blog:\n\ntitle:\t"%s"\nurl:\t%s' % ( | |
post['title'],post['link']) | |
msg += '\n\nTo disable these notifications, please email \n' | |
msg += 'Jeremy ([email protected]).\n\n' | |
msg = MIMEText(msg,'plain','utf-8') | |
msg['From'] = sender | |
msg['To'] = email | |
msg['Subject'] = blog_url.split('/')[-1] + " blog: you have been tagged" | |
if not debug_flag: | |
try: | |
smtpObj = smtplib.SMTP('localhost') | |
smtpObj.sendmail(sender, [email], msg.as_string()) | |
sys.stdout.write("Successfully sent email\n") | |
except Exception as err: | |
sys.stdout.write(err) | |
sys.stdout.write("Error: unable to send email\n") | |
sys.stdout.write('Run "sudo postfix start"\n') | |
def write_new_timestamp(blog_url): | |
fn = blog_url.replace('http://', '').replace('/', '.').lower() + '.timestamp' | |
f = open(fn, 'w') | |
f.write(time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime())) | |
f.close() | |
def read_old_timestamp(blog_url): | |
try: | |
fn = blog_url.replace('http://', '').replace('/', '.').lower() + '.timestamp' | |
f = open(fn, 'r') | |
oldTimeStamp = datetime.datetime.strptime( | |
f.readline().strip(), '%Y-%m-%d %H:%M:%S') | |
f.close() | |
except IOError: | |
write_new_timestamp(blog_url) | |
sys.exit() | |
return oldTimeStamp | |
def get_subscribers(blog_url, username, password): | |
subscribers = dict() | |
payload = { | |
'action': 'login', | |
'log': username, | |
'pwd': password | |
} | |
c = requests.session() | |
c.post('%s/wp-login.php' % blog_url, data=payload) | |
users_url = '%s/wp-admin/users.php' % blog_url | |
response = c.get(users_url) | |
soup = BeautifulSoup(response.text, 'html.parser') | |
rows = soup.find_all('tr')[1:] # first row is header | |
try: | |
num_subs = int(soup.find_all('li', class_="all")[0].span.text.split('(')[1].split(')')[0]) | |
except IndexError: | |
sys.stderr.write("Authentication error\n") | |
sys.exit() | |
page = 0 | |
while len(subscribers) < num_subs: | |
for row in rows: | |
cells = row.find_all('td') | |
try: | |
username = cells[0].find_all('a')[0].text | |
email = cells[2].text | |
subscribers[username] = email | |
except IndexError: | |
pass | |
if len(subscribers) < num_subs: | |
print "Found %d out of %d subscribers" % (len(subscribers), num_subs) | |
page += 1 | |
url = '%s?paged=%d' % (users_url, page) | |
print "Get(%s)" % url | |
response = c.get(url) | |
soup = BeautifulSoup(response.text, 'html.parser') | |
rows = soup.find_all('tr')[1:] # first row is header | |
if debug_flag: | |
for handle in subscribers.keys(): | |
print "Found user %s (%s)" % (handle, subscribers[handle]) | |
return subscribers | |
def usage(): | |
sys.stdout.write('blog_notifier.py <blog_url> <username> <password>') | |
if __name__ == '__main__': | |
if len(sys.argv) < 3: | |
usage() | |
sys.exit() | |
else: | |
main(sys.argv[1], sys.argv[2], sys.argv[3]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment