Created
October 15, 2014 10:00
-
-
Save snowleung/cf1f3d4bc907f155233a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#coding:utf-8 | |
import requests | |
import time | |
from bs4 import BeautifulSoup | |
class TopicResponse(object): | |
title = u'' | |
url = '' | |
reply_count = -1 | |
created_at = 'None' | |
def __str__(self): | |
return '''%i %s %s %s ''' % (self.reply_count, self.created_at, self.title, self.url) | |
class FunnyRead(object): | |
def __init__(self): | |
pass | |
def datas(self, content): | |
_topics = [] | |
c = BeautifulSoup(content, from_encoding='gbk') | |
data = c.find_all('tr', class_ = 'tr3 t_one') | |
for d in data: | |
_t = TopicResponse() | |
alink = d.find('h3').find('a') | |
_t.title = alink.text.encode('utf-8') | |
_t.url = 'http://t66y.com/' + alink.get('href',None) | |
rly_count = d.find('td', class_ = 'tal f10 y-style') | |
if rly_count: | |
_t.reply_count = int(rly_count.text) | |
else: | |
_t.reply_count = -1 | |
_t.created_at = d.find('td', class_ = 'tal y-style').find('div', class_ = 'f10').text.encode('utf-8') | |
_topics.append(_t) | |
return _topics | |
def funnyOutput(url = None, limit = 25): | |
print 'funny' | |
_url = 'http://t66y.com/thread0806.php?fid=2' | |
if url: | |
_url = url | |
pages = 3 | |
_topics = [] | |
funny = FunnyRead() | |
for i in range(pages): | |
print 'working' | |
u = _url + '&page=%i'%i | |
try: | |
req = requests.get(u) | |
except: | |
continue | |
rs = funny.datas(req.content) | |
_topics = _topics + rs | |
uniques = [o.url for o in _topics] | |
t = [ss for ss in _topics if uniques.count(ss.url) == 1] | |
t = sorted(t, key = lambda t: t.reply_count, reverse = True) | |
t = t[:limit] | |
for out in t: | |
print out | |
if __name__ == '__main__': | |
funnyOutput('http://t66y.com/thread0806.php?fid=2') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment