Skip to content

Instantly share code, notes, and snippets.

@rafikahmed
Created September 29, 2018 11:31
Show Gist options
  • Save rafikahmed/eb542cf074358bf0aeb3c31a24f197ba to your computer and use it in GitHub Desktop.
Save rafikahmed/eb542cf074358bf0aeb3c31a24f197ba to your computer and use it in GitHub Desktop.
import scrapy
from demo_project.items import JokeItem
from scrapy.loader import ItemLoader
class JokesSpider(scrapy.Spider):
name= 'jokes'
allowed_domais = ['www.laughfactory.com']
start_urls = [
'https://www.laughfactory.com/jokes/family-jokes'
]
def parse(self, response):
for joke in response.xpath("//div[@class='jokes']"):
l= ItemLoader(item=JokeItem(), selector=joke)
l.add_xpath('joke_text', ".//div[@class='joke-text']/p")
yield l.load_item()
next_page= response.xpath("//li[@class='next']/a/@href").extract_first()
if next_page is not None:
next_page_link= response.urljoin(next_page)
yield scrapy.Request(url=next_page_link, callback=self.parse)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment