Skip to content

Instantly share code, notes, and snippets.

@theArjun
Created December 11, 2022 05:05
Show Gist options
  • Select an option

  • Save theArjun/893dddcceff64a0a7e021a9a3d345fbc to your computer and use it in GitHub Desktop.

Select an option

Save theArjun/893dddcceff64a0a7e021a9a3d345fbc to your computer and use it in GitHub Desktop.
Love Quote Scraper using Scrapy
import re
import scrapy
from icecream import ic
from scrapy.http import HtmlResponse
class QuoteSpider(scrapy.Spider):
name = 'quote'
start_urls = ["https://www.imnepal.com/quotes-about-love-nepali/%d/" % i for i in range(1, 9)]
def start_requests(self):
for url in self.start_urls:
yield scrapy.Request(url=url, callback=self.parse)
def parse(self, response: HtmlResponse, **kwargs):
content = response.xpath('//body')
lines = content.xpath('.//p/text()').getall()
# Filter only devanagari lines by regex [\u0900-\u097F]+
devanagari_lines = list(filter(lambda line: re.search(r'[\u0900-\u097F]+', line), lines))
for line in devanagari_lines:
yield {
'url': response.url,
'quote': line,
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment