Skip to content

Instantly share code, notes, and snippets.

@ItachiSan
Last active May 24, 2018 20:38
Show Gist options
  • Save ItachiSan/fe201852ba49bfd634b0abbef04fc930 to your computer and use it in GitHub Desktop.
Save ItachiSan/fe201852ba49bfd634b0abbef04fc930 to your computer and use it in GitHub Desktop.
LFGSpider WIP
# -*- coding: utf-8 -*-
import scrapy
class Comic(scrapy.Item):
name = scrapy.Field()
volumes = scrapy.Field()
volumes_selectors = scrapy.Field()
class Volume(scrapy.Item):
name = scrapy.Field()
chapters = scrapy.Field()
class LFGSpider2(scrapy.Spider):
name = 'lfg2'
allowed_domains = ['lfg.co']
start_urls = ['http://www.lfg.co/archives/']
def parse(self, response):
#comic = Comic()
#self.parse_comic(response, comic)
# How to get the volumes?
#links = comic['volumes_selectors']
#comic['volumes'] = dict()
comic = 'Looking for a group'
links = response.xpath("//a[@class='archive-thumb']/@href").extract()[:3]
if len(links) > 0:
url = response.urljoin(links[0])
yield scrapy.Request(url, callback=self.parse_volume,
meta={'comic': comic, 'links': links})
yield {'Comic': comic}
def parse_comic(self, response, comic):
comic['name'] = 'Looking for a group'
comic['volumes_selectors'] = response.xpath("//a[@class='archive-thumb']/@href").extract()[:3]
def parse_volume(self, response):
# Remove the first link
response.meta['links'].pop(0)
#volume = Volume()
#volume['name'] = 'TO DO'
#volume['chapters'] = 'TO DO'
#response.meta['comic']['volumes']['TO DO'] = volume
yield {'Volume': len(response.meta['links'])}
if len(response.meta['links']) > 0:
url = response.urljoin(response.meta['links'][0])
yield scrapy.Request(url, callback=self.parse_volume,
meta={'comic': response.meta['comic'], 'links': response.meta['links']})
@stav
Copy link

stav commented May 24, 2018

Why call self.parse_comic(response, comic)? I would just do that code inline in parse

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment