Last active
December 29, 2016 00:26
-
-
Save gfreivasc/7f7fb25d8ffc5a341b33a7b2ef15ddfa to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import scrapy | |
class PageItem(scrapy.Item): | |
title = scrapy.Field() | |
class LinkReaderSpider(scrapy.Spider): | |
name = 'link_follower' | |
start_urls = ['http://gabrielfv.com'] | |
def parse(self, response): | |
data = PageItem() | |
data['title'] = response.css('title::text').extract_first() | |
yield data | |
links = response.css('a') | |
for link in links: | |
yield scrapy.Request( | |
link.xpath('./@href').extract_first(), | |
callback=self.parse_links | |
) | |
def parse_links(self, response): | |
data = PageItem() | |
data['title'] = response.css('title::text').extract_first() | |
return data |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment