Skip to content

Instantly share code, notes, and snippets.

@bcdejp
Created December 29, 2014 08:10
Show Gist options
  • Save bcdejp/9a6a61457bce570786c7 to your computer and use it in GitHub Desktop.
Save bcdejp/9a6a61457bce570786c7 to your computer and use it in GitHub Desktop.
英語のニュースをKindleに送信する
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import feedparser
import requests
import lxml.html
import gmail
import re
import os
from time import mktime
from datetime import datetime
#Kindleのアドレス
KINDLE_ADDRESS = "[email protected]"
class News:
"""
ニュースを格納するクラス
"""
def __init__(self, title, content, url, date):
self.title = title
self.url = url
self.date = date
self.content = content
def showinfo(self):
print '%s (%s)' % (self.title, self.date)
def create_html(list, filename):
from jinja2 import Environment, FileSystemLoader
path = os.path.abspath(os.path.dirname(__file__))
env = Environment(loader=FileSystemLoader(path, encoding='utf8'))
tpl = env.get_template('template.html')
news_list = []
for news in list:
content = news.content
news_list.append({'title':news.title, 'body':content})
#現在時刻を取得
now = datetime.today()
title = now.strftime("News_%Y-%m-%d_%H:%M:%S")
html = tpl.render({'title':title, 'news_list':news_list})
tmpfile = open(path + "/" + filename, 'w') #書き込みモードで開く
tmpfile.write(html.encode('utf-8'))
tmpfile.close()
if __name__ == '__main__':
#RSSのURL
rss_url = "http://www.japantoday.com/feed/"
#interval_time時間以内のニュースを取得
interval_time = 24
#空のNewsリスト
news_list = []
#現在時刻を取得
now = datetime.today()
feed = feedparser.parse(rss_url)
for entry in range(len(feed.entries)):
#RSSの内容を一件づつ処理する
title = feed.entries[entry].title
link = feed.entries[entry].link
#現在時刻から更新日時の時間差を確認
tmp = feed.entries[entry].updated_parsed
date = datetime.fromtimestamp(mktime(tmp))
diff_time = now - date
diff_hours = (diff_time.seconds/3600) + (diff_time.days*24)
if diff_hours <= interval_time:
#interval_time時間以内に更新された記事のみを対象とする
#Webページ(HTML)の取得
req = requests.get(link)
root = lxml.html.fromstring(req.text)
#HTMLから記事の抽出(Japan Today)
title = root.get_element_by_id('main_title').text
content = lxml.html.tostring(root.get_element_by_id('article_content'))
p = re.compile(r"<[^>]*?>")
content = p.sub("", content)
#記事リストの中に追加
news = News(title, content, link, date)
news_list.append(news)
filename = now.strftime("News_%Y-%m-%d_%H:%M:%S") + ".html"
create_html(news_list, filename)
#メッセージの作成
to_addr = KINDLE_ADDRESS
subject = filename
body = ""
mime={'type':'text', 'subtype':'comma-separated-values'}
attach_file={'name':filename, 'path':filename}
msg = gmail.create_message(gmail.ADDRESS, to_addr, subject, body, mime, attach_file)
#メールの送信
gmail.send(gmail.ADDRESS, [to_addr], msg)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment