Skip to content

Instantly share code, notes, and snippets.

@user19
Created February 16, 2015 04:36
Show Gist options
  • Save user19/2228b4d942672588ad5b to your computer and use it in GitHub Desktop.
Save user19/2228b4d942672588ad5b to your computer and use it in GitHub Desktop.
import re
import webapp2
import lxml.html
from google.appengine.api import urlfetch
class MainHandler(webapp2.RequestHandler):
def get(self):
page = int(self.request.get('entries'))/10
s = 0
p = 0
param = ''
res = urlfetch.fetch(url='https://www.hatena.ne.jp/login', payload='name=name&password=password', method=urlfetch.POST)
headers = {'Cookie': 'rk=%s' % re.findall('rk=(.*?);',res.headers['Set-Cookie'])[0]}
for i in range(page):
res = urlfetch.fetch(url='http://blog.hatena.ne.jp/-/recent%s' % param, headers=headers)
dom = lxml.html.fromstring(res.content)
element = dom.get_element_by_id('item-list')
elements = element.find_class('list nozebra')[0].findall('li')
s += len(elements)
for ele in elements:
if ele.find_class('icon-pro'):
p += 1
n = element.find_class('more-blogs more')
if not n:
break
param = n[0].find('a').attrib['href']
self.response.headers['Access-Control-Allow-Origin'] = '*'
self.response.write('{"sum":%d,"pro":%d}' % (s,p))
app = webapp2.WSGIApplication([
('/', MainHandler)
], debug=True)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment