Skip to content

Instantly share code, notes, and snippets.

Created June 24, 2014 14:57
Show Gist options
  • Save alecxe/46f95778072ce4b59e79 to your computer and use it in GitHub Desktop.
Save alecxe/46f95778072ce4b59e79 to your computer and use it in GitHub Desktop.
from scrapy.http import Request
from scrapy.item import Item, Field
from scrapy.selector import Selector
from scrapy.spider import BaseSpider
from scrapy.selector import HtmlXPathSelector
class MyItem(Item):
reviewer_ranking = Field()
class MySpider(BaseSpider):
name = 'myspider'
allowed_domains = [""]
def start_requests(self):
yield Request("",
headers={'User-Agent': "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1"})
def parse(self, response):
sel = Selector(response)
hxs = HtmlXPathSelector(response)
item = MyItem()
item["reviewer_ranking"] ='//span[@class="a-size-small a-color-secondary"]/text()').extract()
return item
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment