Skip to content

Instantly share code, notes, and snippets.

@takuan-osho
Last active December 11, 2015 14:59
Show Gist options
  • Select an option

  • Save takuan-osho/4618237 to your computer and use it in GitHub Desktop.

Select an option

Save takuan-osho/4618237 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# -*- coding:utf-8 -*-
import urllib2
from pyquery import PyQuery as pq
def get_matome_info(url):
res = urllib2.urlopen(url)
html = res.read().decode('utf-8')
dom = pq(html)
title_doms = dom('span.blomaga_name.matome span')
url_doms = dom('span.blomaga_name.matome a')
matome_info_doms = zip(title_doms, url_doms)
matome_info = set()
for title_dom, url_dom in matome_info_doms:
matome_info.add((title_dom.text, url_dom.get('href')))
return matome_info
def main():
blomaga_url = u'http://ch.nicovideo.jp/portal/blomaga/matomemedia'
matome_info = get_matome_info(blomaga_url)
for title, url in matome_info:
print title + u' | ' + url
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment