Skip to content

Instantly share code, notes, and snippets.

@flyer103
Created July 1, 2014 03:13
Show Gist options
  • Save flyer103/ad78e3d1ac6ad57c1830 to your computer and use it in GitHub Desktop.
Save flyer103/ad78e3d1ac6ad57c1830 to your computer and use it in GitHub Desktop.
简单的爬虫,用来演示
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""抓取百姓网首页
"""
import requests
from lxml import etree
class CrawlBaixing(object):
"""抓取百姓网首页"""
def __init__(self):
self.seed = 'http://www.baixing.com'
def run(self):
try:
# 发起网络请求
res = requests.get(self.seed, timeout=10)
except Exception as e:
print('Failed to crawl {0}. Error: {1}'.format(self.seed, e))
return
# 解析页面
html_dom = etree.HTML(res.text)
title = html_dom.xpath('/html/head/title')[0].text
print('Title: {0}'.format(title))
if __name__ == '__main__':
spider = CrawlBaixing()
spider.run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment