Last active
August 29, 2015 14:00
-
-
Save noexpect/11051994 to your computer and use it in GitHub Desktop.
本日発売のコミックを毎日htmlメールでお知らせ pythonでBeautifulSoupとmailコマンド on Amazon Linux
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding:utf-8 -*- | |
import codecs | |
import sys | |
reload(sys) | |
sys.setdefaultencoding('shift_jis') | |
from BeautifulSoup import BeautifulSoup | |
import urllib2 | |
import re | |
class scrape_web: | |
url_base = "http://www.taiyosha.co.jp/comic/comic" | |
url_tail1 = "_date1.html" | |
url_tail2 = "_date2.html" | |
book_list = "" | |
def __init__(self, yymm, dd): | |
soup = BeautifulSoup(urllib2.urlopen(self.create_url(yymm, dd))) | |
texts = soup.find("table", {"class":"table_box_new_book"}) | |
ts = unicode(texts).splitlines() | |
book_list = "" | |
for i in range(1,len(ts)-8): | |
if i % 8 == 0: | |
date = re.sub("<[^>]*?>", "", ts[i+2])[3:5] | |
publisher = re.sub("<[^>]*?>", "", ts[i+3]) | |
title = re.sub("<[^>]*?>", "", ts[i+4]) | |
auther = re.sub("<[^>]*?>", "", ts[i+5]) | |
if date == dd: | |
book_list += "<tr><td>" + title + "</td><td>" + auther + "</td><td>" + publisher + "</td></tr>" | |
self.book_list = book_list | |
def get_book_list(self): | |
return self.book_list.encode("utf_8") | |
def create_url(self, yymm, dd): | |
if int(dd) < 16: | |
return self.url_base + yymm + self.url_tail1 | |
else: | |
return self.url_base + yymm + self.url_tail2 | |
if __name__ == '__main__': | |
argvs = sys.argv | |
yymm = argvs[1] | |
dd = argvs[2] | |
s = scrape_web(yymm, dd) | |
body = s.get_book_list() | |
print "MIME-Version: 1.0" | |
print "Content-Type: text/html" | |
print "From: [email protected]" | |
print "To: [email protected]" | |
print "Subject: Book released today" | |
print "<html><body><table border=1 cellspacing=0 cellpadding=3 ><tr><td colspan=3 >" | |
print yymm[2:4] + "/" + dd + " books on sale" | |
print "</td></tr>" | |
print body | |
print "</table></body></html>" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
00 6 * * * /home/hoge/python/send_mail.sh |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
MIME-Version: 1.0 | |
Content-Type: text/html | |
From: [email protected] | |
To: [email protected] | |
Subject: Book released today | |
<html><body><table border=1 cellspacing=0 cellpadding=3 ><tr><td colspan=3 > | |
05/08 books on sale | |
</td></tr> | |
<tr><td>ちーちゃんはちょっと足りない</td><td>阿部 共実</td><td>秋田書店</td></tr><tr><td>ブラックギャラクシー6</td><td>阿部 共実</td><td>秋田書店</td></tr><tr><td>ブラックギャラクシー6</td><td>阿部 共実</td><td>秋田書店</td></tr><tr><td>刃牙道 1</td><td>板垣 恵介</td><td>秋田書店</td></tr><tr><td>オイ!!オバさん 9</td><td>いづみ かつき</td><td>秋田書店</td></tr><tr><td>鈍色の青春</td><td>オオゾネ サトシ</td><td>秋田書店</td></tr><tr><td>暁の明星 1</td><td>きづき あきら+サトウ ナンキ</td><td>秋田書店</td></tr><tr><td>ANGEL VOICE 37</td><td>古谷野 孝雄</td><td>秋田書店</td></tr><tr><td>みつどもえ 14</td><td>桜井 のりお</td><td>秋田書店</td></tr><tr><td>雨天決行 4</td><td>重本 ハジメ</td><td>秋田書店</td></tr><tr><td>少年Y 4</td><td>とうじ たつや/ハジメ</td><td>秋田書店</td></tr><tr><td>真・餓狼伝 5</td><td>野部 優美/夢枕 獏</td><td>秋田書店</td></tr><tr><td>錻力のアーチスト 3</td><td>細川 雅巳</td><td>秋田書店</td></tr><tr><td>実は私は 6</td><td>増田 英二</td><td>秋田書店</td></tr><tr><td>透明人間の作り方</td><td>増田 英二</td><td>秋田書店</td></tr><tr><td>ヤコとポコ 1</td><td>水沢 悦子</td><td>秋田書店</td></tr><tr><td>ニボシ君の変態 2</td><td>ミッチェル 田中</td><td>秋田書店</td></tr><tr><td>かみさまドロップ 4</td><td>みなもと 悠</td><td>秋田書店</td></tr><tr><td>ウチコミ!! 3</td><td>村岡 ユウ</td><td>秋田書店</td></tr><tr><td>サンセットローズ 7</td><td>米原 秀幸</td><td>秋田書店</td></tr><tr><td>新装版 フルアヘッド!ココ 19(完)</td><td>米原 秀幸</td><td>秋田書店</td></tr><tr><td>バーサスアース 9(完)</td><td>渡辺 義彦/一智 和智</td><td>秋田書店</td></tr><tr><td>(成)偽典 生贄夫人 獄 ~淫肛悶絶~</td><td>飛野 俊之</td><td>海王社</td></tr><tr><td>いとへん</td><td>宇仁田 ゆみ</td><td>祥伝社</td></tr><tr><td>my dear Life</td><td>桜沢 エリカ</td><td>祥伝社</td></tr><tr><td>カレは女とシたことない。</td><td>都 陽子</td><td>祥伝社</td></tr><tr><td>ぷち本当にあった愉快な話 タクシー告発!交通警察のやり口</td><td>東條 さち子</td><td>竹書房</td></tr> | |
</table></body></html> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
python /home/hoge/python/updates_books.py `date +"%y%m %d"` |sendmail -t |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment