Skip to content

Instantly share code, notes, and snippets.

@sarlmolapple
Created May 15, 2012 05:18
Show Gist options
  • Save sarlmolapple/2699314 to your computer and use it in GitHub Desktop.
Save sarlmolapple/2699314 to your computer and use it in GitHub Desktop.
send RSS articles to my kindle everyday
# -*- coding: utf-8 -*-
import feedparser
import urllib
import re
import smtplib
import base64
from email.MIMEMultipart import MIMEMultipart
from email.MIMEText import MIMEText
RSS_SOURCE='http://www.infzm.com/rss/home/rss2.0.xml'
KINDLE_MAIL='******@kindle.com'
USER_MAIL='******@gmail.com'
USER_PASS='******'
SMTP_SERVER='smtp.gmail.com'
SMTP_PORT=587
def get_num(url):
reg = re.compile(r'\d+',re.DOTALL)
match = reg.search(url)
if match:
return match.group()
else:
return ''
def get_reg(html):
reg = re.compile(r"""<section id=\"articleContent\".*<\/section>""",re.DOTALL)
match = reg.search(html)
if match:
return match.group()
else:
return ''
def get_article(url):
file = urllib.urlopen(url)
html = file.read()
file.close()
return get_reg(html)
def process_img(html):
reg = re.compile(r'http.*jpeg',re.DOTALL)
reg2 = re.compile("<img width.*?jpeg\" \/>", re.DOTALL)
imgs = re.findall("<img width.*?jpeg\" \/>", html)
for img in imgs:
match = reg.search(img)
url = match.group()
img_bin = urllib.urlopen(url)
dest = open('tmp.html', 'w')
base64.encode(img_bin,dest)
dest.close()
dest = open('tmp.html','r')
img_url = '<img src=\"data:image/jpeg;base64,'+dest.read()+'\"/>'
dest.close()
img_bin.close()
html = reg2.sub(img_url,html,1)
return html
def send_email(subject, filePath, fileName):
message = MIMEMultipart('related')
message['Subject'] = subject
message['From'] = USER_MAIL
message['FromName'] = 'Sarlmol'
message['To'] = KINDLE_MAIL
message.preamble = 'This is a multi-part message in MIME format.'
msgAlternative = MIMEMultipart('alternative')
message.attach(msgAlternative)
att = MIMEText(open(filePath, 'rb').read(), 'base64', 'gb2312')
att["Content-Type"] = 'application/octet-stream'
att["Content-Disposition"] = 'attachment; filename="'+fileName+'"'
msgAlternative.attach(att)
sm = smtplib.SMTP(SMTP_SERVER, port=SMTP_PORT, timeout=20)
sm.set_debuglevel(1)
sm.ehlo()
sm.starttls()
sm.ehlo()
sm.login(USER_MAIL, USER_PASS)
sm.sendmail(USER_MAIL, KINDLE_MAIL, message.as_string())
sm.quit()
doc = feedparser.parse(RSS_SOURCE)
items = doc['items']
for item in items:
html = '<html><head><title>'+item.title.encode('UTF-8')+'</title><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /></head><body>'+get_article(item.link)+'</body></html>'
filename = get_num(item.link)+'.html'
filepath = './htmls/'+filename
file = open(filepath,'w')
file.write(html)
file.close()
send_email(item.title,filepath,filename)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment