Created
May 15, 2012 05:18
-
-
Save sarlmolapple/2699314 to your computer and use it in GitHub Desktop.
send RSS articles to my kindle everyday
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import feedparser | |
import urllib | |
import re | |
import smtplib | |
import base64 | |
from email.MIMEMultipart import MIMEMultipart | |
from email.MIMEText import MIMEText | |
RSS_SOURCE='http://www.infzm.com/rss/home/rss2.0.xml' | |
KINDLE_MAIL='******@kindle.com' | |
USER_MAIL='******@gmail.com' | |
USER_PASS='******' | |
SMTP_SERVER='smtp.gmail.com' | |
SMTP_PORT=587 | |
def get_num(url): | |
reg = re.compile(r'\d+',re.DOTALL) | |
match = reg.search(url) | |
if match: | |
return match.group() | |
else: | |
return '' | |
def get_reg(html): | |
reg = re.compile(r"""<section id=\"articleContent\".*<\/section>""",re.DOTALL) | |
match = reg.search(html) | |
if match: | |
return match.group() | |
else: | |
return '' | |
def get_article(url): | |
file = urllib.urlopen(url) | |
html = file.read() | |
file.close() | |
return get_reg(html) | |
def process_img(html): | |
reg = re.compile(r'http.*jpeg',re.DOTALL) | |
reg2 = re.compile("<img width.*?jpeg\" \/>", re.DOTALL) | |
imgs = re.findall("<img width.*?jpeg\" \/>", html) | |
for img in imgs: | |
match = reg.search(img) | |
url = match.group() | |
img_bin = urllib.urlopen(url) | |
dest = open('tmp.html', 'w') | |
base64.encode(img_bin,dest) | |
dest.close() | |
dest = open('tmp.html','r') | |
img_url = '<img src=\"data:image/jpeg;base64,'+dest.read()+'\"/>' | |
dest.close() | |
img_bin.close() | |
html = reg2.sub(img_url,html,1) | |
return html | |
def send_email(subject, filePath, fileName): | |
message = MIMEMultipart('related') | |
message['Subject'] = subject | |
message['From'] = USER_MAIL | |
message['FromName'] = 'Sarlmol' | |
message['To'] = KINDLE_MAIL | |
message.preamble = 'This is a multi-part message in MIME format.' | |
msgAlternative = MIMEMultipart('alternative') | |
message.attach(msgAlternative) | |
att = MIMEText(open(filePath, 'rb').read(), 'base64', 'gb2312') | |
att["Content-Type"] = 'application/octet-stream' | |
att["Content-Disposition"] = 'attachment; filename="'+fileName+'"' | |
msgAlternative.attach(att) | |
sm = smtplib.SMTP(SMTP_SERVER, port=SMTP_PORT, timeout=20) | |
sm.set_debuglevel(1) | |
sm.ehlo() | |
sm.starttls() | |
sm.ehlo() | |
sm.login(USER_MAIL, USER_PASS) | |
sm.sendmail(USER_MAIL, KINDLE_MAIL, message.as_string()) | |
sm.quit() | |
doc = feedparser.parse(RSS_SOURCE) | |
items = doc['items'] | |
for item in items: | |
html = '<html><head><title>'+item.title.encode('UTF-8')+'</title><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /></head><body>'+get_article(item.link)+'</body></html>' | |
filename = get_num(item.link)+'.html' | |
filepath = './htmls/'+filename | |
file = open(filepath,'w') | |
file.write(html) | |
file.close() | |
send_email(item.title,filepath,filename) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment