Skip to content

Instantly share code, notes, and snippets.

@laiso
Created June 13, 2010 11:13
Show Gist options
  • Save laiso/436579 to your computer and use it in GitHub Desktop.
Save laiso/436579 to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
# togchk.models
import re
import urllib2
import logging
from BeautifulSoup import BeautifulSoup
RE_URL = re.compile(r'http://togetter\.com\/li\/\d+$')
class Checker(object):
def __init__(self, url=None):
self.founder = None
self.members = []
self._url = url
self._soup = None
self._retrieve()
def count_founder_block(self):
hit = [c for c in self.members if c == self.founder]
return len(hit)
def _retrieve(self):
if self._validate():
source = self._fetch_source()
self._parse(source)
def _fetch_source(self):
req = urllib2.urlopen(self._url)
source = ''
if req.getcode() == 200:
source = req.read()
return source
def _parse(self, source):
self._soup = BeautifulSoup(source)
self._parse_founder()
self._parse_members()
def _parse_members(self):
list_body = self._soup.findAll('div', {'class': 'list_body'})
h5 = []
for set in list_body:
h5.append(str(set.find('h5').find('a').string))
self.members = h5
return self.members
def _parse_founder(self):
profile_link = self._soup.find('a', {'class': 'profile_link'})
founder = profile_link.string.strip(u'\n\t')
self.founder = founder
return self.founder
def _validate(self):
url = self._url
return RE_URL.match(url)
import unittest
class CheckerTest(unittest.TestCase):
def testParse(self):
c = Checker('http://togetter.com/li/28715')
f = file('togchk/testdata/source28715.html', 'r') # saved it
c._parse(f.read())
self.assertEqual(c.founder, 'rocaz')
self.assertEqual(type(c.members), list)
self.assertEqual(len(c.members), 93)
self.assertEqual(type(c.members[0]), str)
self.assertEqual(c.members.pop(), 'msk_fzt')
self.assertEqual(c.count_founder_block(), 1)
if __name__ == '__main__':
unittest.main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment