Skip to content

Instantly share code, notes, and snippets.

@snowleung
Created October 15, 2014 10:00
Show Gist options
  • Save snowleung/cf1f3d4bc907f155233a to your computer and use it in GitHub Desktop.
Save snowleung/cf1f3d4bc907f155233a to your computer and use it in GitHub Desktop.
#coding:utf-8
import requests
import time
from bs4 import BeautifulSoup
class TopicResponse(object):
title = u''
url = ''
reply_count = -1
created_at = 'None'
def __str__(self):
return '''%i %s %s %s ''' % (self.reply_count, self.created_at, self.title, self.url)
class FunnyRead(object):
def __init__(self):
pass
def datas(self, content):
_topics = []
c = BeautifulSoup(content, from_encoding='gbk')
data = c.find_all('tr', class_ = 'tr3 t_one')
for d in data:
_t = TopicResponse()
alink = d.find('h3').find('a')
_t.title = alink.text.encode('utf-8')
_t.url = 'http://t66y.com/' + alink.get('href',None)
rly_count = d.find('td', class_ = 'tal f10 y-style')
if rly_count:
_t.reply_count = int(rly_count.text)
else:
_t.reply_count = -1
_t.created_at = d.find('td', class_ = 'tal y-style').find('div', class_ = 'f10').text.encode('utf-8')
_topics.append(_t)
return _topics
def funnyOutput(url = None, limit = 25):
print 'funny'
_url = 'http://t66y.com/thread0806.php?fid=2'
if url:
_url = url
pages = 3
_topics = []
funny = FunnyRead()
for i in range(pages):
print 'working'
u = _url + '&page=%i'%i
try:
req = requests.get(u)
except:
continue
rs = funny.datas(req.content)
_topics = _topics + rs
uniques = [o.url for o in _topics]
t = [ss for ss in _topics if uniques.count(ss.url) == 1]
t = sorted(t, key = lambda t: t.reply_count, reverse = True)
t = t[:limit]
for out in t:
print out
if __name__ == '__main__':
funnyOutput('http://t66y.com/thread0806.php?fid=2')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment