Skip to content

Instantly share code, notes, and snippets.

@sdhjl2000
Created December 30, 2012 09:59
Show Gist options
  • Save sdhjl2000/4411867 to your computer and use it in GitHub Desktop.
Save sdhjl2000/4411867 to your computer and use it in GitHub Desktop.
douban reder
#encoding: utf-8
#author:t-y
#http://pythoner.net
import time,math,os,re,urllib,urllib2,cookielib
class douban_robot:
email = '[email protected]'
password = 'sdhjl200'
login_path = 'https://www.douban.com/accounts/login'
def __init__(self):
self.cj =cookielib.LWPCookieJar()
self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cj))
urllib2.install_opener(self.opener)
self.opener.addheaders = [('User-agent','Opera/9.23')]
self.opener.addheaders = [('Accept-Charset', 'utf-8')]
def login(self):
post_data = urllib.urlencode({
'form_email':self.email,
'form_password':self.password,
'remeber':'on',
})
request = urllib2.Request(self.login_path,post_data)
html = self.opener.open(request).read()
#encoding=request.headers['content-type'].split('charset=')[-1]
html = unicode(html, 'utf-8')
print re.findall('<h1>(.*)</h1>',html,re.S)[0].decode('utf-8')
get_url = self.opener.open(request).geturl()
if get_url == 'http://www.douban.com/':
self.cj.save('douban.cookie')
print 'Login success !'
return True
else:
print get_url
print 'Login error'
return False
def run(self,d_url):
self.login()
return
self.post_url = d_url
while True:
if self.sleep_time() > 10:
timer = self.sleep_time()-10
print '%s:waiting %s' %(time.strftime('%H:%M:%S'),timer)
time.sleep(timer)
response = urllib2.Request(self.post_url)
html = urllib2.urlopen(response).read()
ck = re.findall('(?<=class=\"page\" \/><\/div>)',html,re.S)[0]
if ck[0]<>'':
print 'get ck success!'
print ck[0]
timer = self.sleep_time()
time.sleep(timer)
dt = list(time.localtime())
minit = dt[4]
second = dt[5]
#if minit==second==0:
#request = urllib2.Request(self.post_url,post_data)
#urllib2.urlopen(request)
def sleep_time(self):
dt = list(time.localtime())
hour = dt[3]
minut = dt[4]
second = dt[5]
return 3600-(minut*60+second)
def get_title(self):
title = re.findall('<title>(.*)</title>',self.html,re.S)[0].decode('utf8')
h1 = re.findall('<h1>(.*)</h1>',self.html,re.S)[0].decode('utf8')
print '%s--%s' %(title,h1)
def write_log(self,text):
os.system('cls')
print text
log = open('log.txt','a')
log.write(text)
log.close()
url = 'http://read.douban.com/reader/ebook/384559/page/121/'
app = douban_robot()
app.run(url)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment