|
|
@@ -0,0 +1,56 @@ |
|
|
import requests |
|
|
|
|
|
from db import db |
|
|
|
|
|
""" |
|
|
The Douban Group API which not display on http://developers.douban.com/wiki/?title=api_v2 |
|
|
|
|
|
Base url: https://api.douban.com/v2 |
|
|
|
|
|
Group info: /group/:id |
|
|
Group topics: /group/:id/topics |
|
|
Group comments: /group/topic/:id/comments |
|
|
|
|
|
REF: http://www.douban.com/group/topic/33507002/ |
|
|
""" |
|
|
|
|
|
base_url = 'http://api.douban.com/v2/group/%s/topics' |
|
|
PER_PAGE_COUNT = 100 |
|
|
|
|
|
|
|
|
def real_fetch(group_id, start=0): |
|
|
headers = { |
|
|
'Host': 'api.douban.com', |
|
|
'Referer':'api.douban.com', |
|
|
'Cookie': '', |
|
|
'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.143 Safari/537.36', |
|
|
'Connection': 'keep-alive', |
|
|
'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', |
|
|
'Accept-Language':'zh-cn,zh;q=0.5', |
|
|
'Accept-Charset':'GB2312,utf-8;q=0.7,*;q=0.7', |
|
|
} |
|
|
params = {'start': start, 'count': PER_PAGE_COUNT} |
|
|
url = base_url % group_id |
|
|
r = requests.get(url, params=params, headers=headers) |
|
|
|
|
|
ret = r.json() |
|
|
for topic in ret['topics']: |
|
|
db.topic.insert(group_id = group_id, |
|
|
title=topic['title'], |
|
|
content=topic['content'], |
|
|
url=topic['alt'], |
|
|
created=topic['created']) |
|
|
db.flush() |
|
|
|
|
|
|
|
|
def fetch(group_id): |
|
|
for i in range(10): |
|
|
print i |
|
|
real_fetch(group_id, i * PER_PAGE_COUNT) |
|
|
|
|
|
group_ids = ['zhufang', 'xiaotanzi'] |
|
|
|
|
|
if __name__ == '__main__': |
|
|
for group_id in group_ids: |
|
|
print group_id |
|
|
fetch(group_id) |