Skip to content

Instantly share code, notes, and snippets.

@mizchi
Created January 24, 2011 13:28
Show Gist options
  • Select an option

  • Save mizchi/793211 to your computer and use it in GitHub Desktop.

Select an option

Save mizchi/793211 to your computer and use it in GitHub Desktop.
datから取得するので人大杉回避する
#!/usr/bin/env python
# -*- encoding:utf8 -*-
import sys
import pykf
import urllib2
from pyquery import PyQuery as pq
reload(sys)
sys.setdefaultencoding('utf-8')
BBS_LIST = "http://www.zonubbs.net/bbsmenu.html"
def get_bbslist():
txt = urllib2.urlopen(BBS_LIST).read()
n = pq(unicode(txt, "sjis", "ignore"))("a")
return [ ( n.eq(i).text(), n.eq(i).attr.href )
for i in range(n.length)]
def get_boards(url):
n = pq( unicode(urllib2.urlopen(url+"subback.html").read(),
"sjis", "ignore"))("#trad a")
return [ ( n.eq(i).text(), n.eq(i).attr.href )
for i in range(n.length)]
def get_boards_test():
url = "http://toki.2ch.net/nandemo/"
bs = get_boards(url)
for i in bs:
print i[0], i[1]
def get_daturl(url):
surl = url.split("/")
url = "/".join(surl[0:-2])
return url+"/dat/"+surl[-2]+".dat"
def get_dat(url):
url = get_daturl(url)
txt = unicode(urllib2.urlopen(url).read(), "sjis", "ignore").split("\n")
items = []
for a in txt:
sitem = a.split("<>")
if len(sitem) > 3:
items.append({
"name":sitem[0],
"date":sitem[2],
"content":sitem[3].replace("<br>", "\n")
})
else:
print "None"
return items
def test():
from random import randint
area = randint(0, 130)
bbs = get_bbslist()
for title, baseurl in bbs[area:area+3]:
print title, baseurl
boards = get_boards(baseurl)
if boards:
for title, link in boards[:3]:
print "++ " + title,baseurl+link
arts = get_dat(baseurl+link)
for i in arts[0:5]:
print "-----", i["date"], i["content"]
test()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment