-
-
Save tranch/aa247082a2771c4ac1e13a7374abeead to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from db import db | |
""" | |
The Douban Group API which not display on http://developers.douban.com/wiki/?title=api_v2 | |
Base url: https://api.douban.com/v2 | |
Group info: /group/:id | |
Group topics: /group/:id/topics | |
Group comments: /group/topic/:id/comments | |
REF: http://www.douban.com/group/topic/33507002/ | |
""" | |
base_url = 'http://api.douban.com/v2/group/%s/topics' | |
PER_PAGE_COUNT = 100 | |
def real_fetch(group_id, start=0): | |
headers = { | |
'Host': 'api.douban.com', | |
'Referer':'api.douban.com', | |
'Cookie': '', | |
'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.143 Safari/537.36', | |
'Connection': 'keep-alive', | |
'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', | |
'Accept-Language':'zh-cn,zh;q=0.5', | |
'Accept-Charset':'GB2312,utf-8;q=0.7,*;q=0.7', | |
} | |
params = {'start': start, 'count': PER_PAGE_COUNT} | |
url = base_url % group_id | |
r = requests.get(url, params=params, headers=headers) | |
ret = r.json() | |
for topic in ret['topics']: | |
db.topic.insert(group_id = group_id, | |
title=topic['title'], | |
content=topic['content'], | |
url=topic['alt'], | |
created=topic['created']) | |
db.flush() | |
def fetch(group_id): | |
for i in range(10): | |
print i | |
real_fetch(group_id, i * PER_PAGE_COUNT) | |
group_ids = ['zhufang', 'xiaotanzi'] | |
if __name__ == '__main__': | |
for group_id in group_ids: | |
print group_id | |
fetch(group_id) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment