Skip to content

Instantly share code, notes, and snippets.

@kimihito
Created July 31, 2012 14:57
Show Gist options
  • Save kimihito/3217623 to your computer and use it in GitHub Desktop.
Save kimihito/3217623 to your computer and use it in GitHub Desktop.
Twitterで流れてくるはてなブックマークの記事をストックする
#!/usr/bin/env python
# coding: utf-8
import sqlite3
import tweepy
import urllib
import re
import sys
import urllib2
consumer_key = "YOUR_CONSUMER_KEY"
consumer_secret = "YOUR_SECRET_KEY"
access_token = "YOUR_ACCESS_TOKEN"
access_secret = "YOUR_ACCESS_SECRET"
auth = tweepy.OAuthHandler(CONSUMER_KEY,CONSUMER_SECRET)
auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
api = tweepy.API(auth_handler=auth)
#tweet searchでhtn.to を検索
query = u"htn.to/"
results = api.search(urllib.quote_plus(query.encode('utf-8')))
htnurl = []
htntitle = []
for r in results:
u = re.search('http://t.co/........',r.text)
tcourl = u.group(0)
try:
expandurl = urllib2.urlopen(tcourl).geturl()
soup = BeautifulSoup.BeautifulSoup(urllib.urlopen(tcourl))
title = soup.title.string
htnurl.append(str(expandurl))
htntitle.append(str(title))
os.system('sleep 1')
except urllib2.URLError,e:
htnurl.append(str(tcourl))
soup = BeautifulSoup.BeautifulSoup(urllib.urlopen(tcourl))
title = soup.title.string
htntitle.append(str(title))
os.system('sleep 1')
htn = zip(htntitle, htnurl)
#取ってきたhtn.to をデータベースに格納。
con = sqlite3.connect('test.db')
con.text_factory = str
#DBの作成(title, url)
#TODO created_at をつけるかどうか。
sql = """
create table if not exists htn (
title varchar(100),
url varchar(100)
);
"""
con.execute(sql)
#重複タイトルを防ぐ処理
sql2 = "create table temptable as select * from htn GROUP BY 'title','url'"
con.execute(sql2)
sql2 = "drop table htn"
con.execute(sql2)
sql2 = "alter table temptable rename to htn"
con.execute(sql2)
#データの挿入
sql = "insert into htn values (?,?)"
for t,u in htn:
con.execute(sql,(t,u))
#最新のDBを表示取る
c = con.cursor()
c.execute("select * from htn")
for row in c:
print row[0], row[1]
con.close()
#RSSに変換
#Webから見れるようにする
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment