Skip to content

Instantly share code, notes, and snippets.

@rafikahmed
Created October 17, 2019 18:19
Show Gist options
  • Save rafikahmed/09d8d874aba88d7100d85ef47fc3a1ff to your computer and use it in GitHub Desktop.
Save rafikahmed/09d8d874aba88d7100d85ef47fc3a1ff to your computer and use it in GitHub Desktop.
quotes
# -*- coding: utf-8 -*-
import scrapy
from scrapy.http import FormRequest
class QuotesSpider(scrapy.Spider):
name = 'quotes'
allowed_domains = ['quotes.toscrape.com']
start_urls = [
'http://quotes.toscrape.com/login'
]
def parse(self, response):
csrf_token = response.xpath("//input[@name='csrf_token']/@value").get()
yield FormRequest.from_response(response, formxpath="//form", formdata={
'csrf_token': csrf_token,
'username': 'admin',
'password': 'admin'
}, callback=self.after_login)
def after_login(self, response):
for quote in response.xpath("//div[@class='quote']"):
yield {
'quote': quote.xpath(".//span[@class='text']/text()").get()
}
next_page = response.xpath("//li[@class='next']/a/@href").get()
if next_page:
yield response.follow(url=next_page, callback=self.after_login)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment