Created
May 24, 2015 06:46
-
-
Save barraponto/88929e2daccb99a9c01b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
from scrapy.linkextractors import LinkExtractor | |
from scrapy.spiders import CrawlSpider, Rule | |
from yfinance.items import YfinanceItem | |
class FinanceSpider(CrawlSpider): | |
name = 'finance' | |
allowed_domains = ['finance.yahoo.com'] | |
start_urls = ['http://finance.yahoo.com/q/hp?s=PWF.TO&a=04&b=19&c=2005&d=04&e=19&f=2010&g=d&z=66&y=132'] | |
rules = ( | |
Rule(LinkExtractor(restrict_css='[rel="next"]'), callback='parse_items', follow=True), | |
) | |
def parse_items(self, response): | |
for line in response.css('.yfnc_datamodoutline1 table tr')[1:-1]: | |
yield YfinanceItem(date=line.css('td:first-child::text').extract()[0]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment