Skip to content

Instantly share code, notes, and snippets.

@RyanKung
Created February 5, 2013 09:06
Show Gist options
  • Save RyanKung/4713188 to your computer and use it in GitHub Desktop.
Save RyanKung/4713188 to your computer and use it in GitHub Desktop.
from SQLHandle import cursor as db
import os, sys, urllib
import redis
import pyquery as q
import lxml.html.soupparser as sparser
rds = redis.StrictRedis(host='localhost', port=6379, db=0)
parseDict = {
'var': 'xpath of val'
}
def parser(url, tarDict, name=''):
sock = urllib.urlopen(url)
src = sock.read()
sock.close()
dom = sparser.fromstring(src)
for var, val in tarDict.iteritems():
val = dom.xpath(val)[0].text
rds.set('%s:%s'%(name, var), val)
return rds.save()
def get_val(url, xpath):
sock = urllib.urlopen(url)
src = sock.read()
sock.close()
dom = sparser.fromstring(src)
return dom.xpath(xpath)[0].text
def main():
url = 'http://www.qidian.com/Book/1223147.aspx'
QIDIAN = {
'title': './/body/form/div[3]/div[3]/div/div/div/div/div[3]/div[2]/div/h1'
}
#parser(url, QIDIAN)
#print rds.get('title')
print get_val(url, QIDIAN['title'])
if __name__ == "__main__":
sys.exit(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment