Created
July 31, 2012 14:57
-
-
Save kimihito/3217623 to your computer and use it in GitHub Desktop.
Twitterで流れてくるはてなブックマークの記事をストックする
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# coding: utf-8 | |
import sqlite3 | |
import tweepy | |
import urllib | |
import re | |
import sys | |
import urllib2 | |
consumer_key = "YOUR_CONSUMER_KEY" | |
consumer_secret = "YOUR_SECRET_KEY" | |
access_token = "YOUR_ACCESS_TOKEN" | |
access_secret = "YOUR_ACCESS_SECRET" | |
auth = tweepy.OAuthHandler(CONSUMER_KEY,CONSUMER_SECRET) | |
auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET) | |
api = tweepy.API(auth_handler=auth) | |
#tweet searchでhtn.to を検索 | |
query = u"htn.to/" | |
results = api.search(urllib.quote_plus(query.encode('utf-8'))) | |
htnurl = [] | |
htntitle = [] | |
for r in results: | |
u = re.search('http://t.co/........',r.text) | |
tcourl = u.group(0) | |
try: | |
expandurl = urllib2.urlopen(tcourl).geturl() | |
soup = BeautifulSoup.BeautifulSoup(urllib.urlopen(tcourl)) | |
title = soup.title.string | |
htnurl.append(str(expandurl)) | |
htntitle.append(str(title)) | |
os.system('sleep 1') | |
except urllib2.URLError,e: | |
htnurl.append(str(tcourl)) | |
soup = BeautifulSoup.BeautifulSoup(urllib.urlopen(tcourl)) | |
title = soup.title.string | |
htntitle.append(str(title)) | |
os.system('sleep 1') | |
htn = zip(htntitle, htnurl) | |
#取ってきたhtn.to をデータベースに格納。 | |
con = sqlite3.connect('test.db') | |
con.text_factory = str | |
#DBの作成(title, url) | |
#TODO created_at をつけるかどうか。 | |
sql = """ | |
create table if not exists htn ( | |
title varchar(100), | |
url varchar(100) | |
); | |
""" | |
con.execute(sql) | |
#重複タイトルを防ぐ処理 | |
sql2 = "create table temptable as select * from htn GROUP BY 'title','url'" | |
con.execute(sql2) | |
sql2 = "drop table htn" | |
con.execute(sql2) | |
sql2 = "alter table temptable rename to htn" | |
con.execute(sql2) | |
#データの挿入 | |
sql = "insert into htn values (?,?)" | |
for t,u in htn: | |
con.execute(sql,(t,u)) | |
#最新のDBを表示取る | |
c = con.cursor() | |
c.execute("select * from htn") | |
for row in c: | |
print row[0], row[1] | |
con.close() | |
#RSSに変換 | |
#Webから見れるようにする | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment