Skip to content

Instantly share code, notes, and snippets.

@btfak
Forked from iamsk/doubangroupapi
Last active August 29, 2015 14:22

Revisions

  1. @iamsk iamsk created this gist Sep 2, 2014.
    56 changes: 56 additions & 0 deletions doubangroupapi
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,56 @@
    import requests

    from db import db

    """
    The Douban Group API which not display on http://developers.douban.com/wiki/?title=api_v2

    Base url: https://api.douban.com/v2

    Group info: /group/:id
    Group topics: /group/:id/topics
    Group comments: /group/topic/:id/comments

    REF: http://www.douban.com/group/topic/33507002/
    """

    base_url = 'http://api.douban.com/v2/group/%s/topics'
    PER_PAGE_COUNT = 100


    def real_fetch(group_id, start=0):
    headers = {
    'Host': 'api.douban.com',
    'Referer':'api.douban.com',
    'Cookie': '',
    'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.143 Safari/537.36',
    'Connection': 'keep-alive',
    'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
    'Accept-Language':'zh-cn,zh;q=0.5',
    'Accept-Charset':'GB2312,utf-8;q=0.7,*;q=0.7',
    }
    params = {'start': start, 'count': PER_PAGE_COUNT}
    url = base_url % group_id
    r = requests.get(url, params=params, headers=headers)

    ret = r.json()
    for topic in ret['topics']:
    db.topic.insert(group_id = group_id,
    title=topic['title'],
    content=topic['content'],
    url=topic['alt'],
    created=topic['created'])
    db.flush()


    def fetch(group_id):
    for i in range(10):
    print i
    real_fetch(group_id, i * PER_PAGE_COUNT)

    group_ids = ['zhufang', 'xiaotanzi']

    if __name__ == '__main__':
    for group_id in group_ids:
    print group_id
    fetch(group_id)