Skip to content

Instantly share code, notes, and snippets.

@takuan-osho
Created January 25, 2013 03:23
Show Gist options
  • Save takuan-osho/4631477 to your computer and use it in GitHub Desktop.
Save takuan-osho/4631477 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# -*- coding:utf-8 -*-
import urllib2
from pyquery import PyQuery as pq
MATOME_PORTALS = {
u'blomaga': u'http://ch.nicovideo.jp/portal/blomaga/matomemedia',
u'livedoor': u'http://blog.livedoor.com/category/243/',
}
def get_matome_dom(portal_name, encoding='utf-8'):
res = urllib2.urlopen(MATOME_PORTALS[portal_name])
html = res.read().decode(encoding)
dom = pq(html)
return dom
def get_matome_info_from_livedoor():
dom = get_matome_dom('livedoor')
matome_info_doms = dom('ul.list[data-id="243"] h3.ttl>a')
matome_info = set()
for matome_info_dom in matome_info_doms:
matome_info.add(
(matome_info_dom.text, matome_info_dom.get('href'))
)
return matome_info
def get_matome_info_from_blomaga():
dom = get_matome_dom('blomaga')
title_doms = dom('span.blomaga_name.matome>a>span')
url_doms = dom('span.blomaga_name.matome>a')
matome_info_doms = zip(title_doms, url_doms)
matome_info = set()
for (title_dom, url_dom) in matome_info_doms:
matome_info.add(
(title_dom.text, url_dom.get('href'))
)
return matome_info
def get_matome_info(portal_name):
if portal_name == u'livedoor' or portal_name == u'ld':
matome_info = get_matome_info_from_livedoor()
elif portal_name == u'blomaga' or portal_name == u'bm':
matome_info = get_matome_info_from_blomaga()
return matome_info
def main():
blomaga_matome = get_matome_info('livedoor')
livedoor_matome = get_matome_info('blomaga')
for matome in [blomaga_matome, livedoor_matome]:
for title, url in matome:
print title
print url + '\n'
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment