dreyescat · October 23, 2014 21:36
diff --git a/gistfile1.txt b/gistfile1.txt
 import scrapy

 from scrapy.contrib.spiders import CrawlSpider, Rule
 from scrapy.contrib.linkextractors import LinkExtractor

 class HackerNewsItem(scrapy.Item):
    title = scrapy.Field()
    comment = scrapy.Field()

 class HackerNewsSpider(CrawlSpider):
    name = 'hackernews'
    allowed_domains = ['news.ycombinator.com']
    start_urls = [
        'https://news.ycombinator.com/'
    ]
    rules = (
        Rule(LinkExtractor(allow=('item.*', )), callback='parse_item'),
    )

    def parse_item(self, response):
        item = HackerNewsItem()
        item['title'] = response.xpath('//*[contains(@class, "title")]/a/text()').extract()
        item['comment'] = response.xpath('(//*[contains(@class, "comment")])[1]/font/text()').extract()
        return item
	import scrapy

	from scrapy.contrib.spiders import CrawlSpider, Rule
	from scrapy.contrib.linkextractors import LinkExtractor

	class HackerNewsItem(scrapy.Item):
	title = scrapy.Field()
	comment = scrapy.Field()

	class HackerNewsSpider(CrawlSpider):
	name = 'hackernews'
	allowed_domains = ['news.ycombinator.com']
	start_urls = [
	'https://news.ycombinator.com/'
	]
	rules = (
	Rule(LinkExtractor(allow=('item.*', )), callback='parse_item'),
	)

	def parse_item(self, response):
	item = HackerNewsItem()
	item['title'] = response.xpath('//*[contains(@class, "title")]/a/text()').extract()
	item['comment'] = response.xpath('(//*[contains(@class, "comment")])[1]/font/text()').extract()
	return item
No results found