Skip to content

Instantly share code, notes, and snippets.

@barraponto
Created May 24, 2015 06:46
Show Gist options
  • Save barraponto/88929e2daccb99a9c01b to your computer and use it in GitHub Desktop.
Save barraponto/88929e2daccb99a9c01b to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider, Rule
from yfinance.items import YfinanceItem
class FinanceSpider(CrawlSpider):
name = 'finance'
allowed_domains = ['finance.yahoo.com']
start_urls = ['http://finance.yahoo.com/q/hp?s=PWF.TO&a=04&b=19&c=2005&d=04&e=19&f=2010&g=d&z=66&y=132']
rules = (
Rule(LinkExtractor(restrict_css='[rel="next"]'), callback='parse_items', follow=True),
)
def parse_items(self, response):
for line in response.css('.yfnc_datamodoutline1 table tr')[1:-1]:
yield YfinanceItem(date=line.css('td:first-child::text').extract()[0])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment