Skip to content

Instantly share code, notes, and snippets.

@aitoehigie
Created May 20, 2017 07:53
Show Gist options
  • Save aitoehigie/96b1340a3ad3303b11db77101d09b362 to your computer and use it in GitHub Desktop.
Save aitoehigie/96b1340a3ad3303b11db77101d09b362 to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
import scrapy
from scrapy.selector import Selector
from scrapy.loader import ItemLoader
from scrapy.loader.processors import MapCompose, Join
from nairalandapi.items import NairalandapiItem
import datetime
class NairalandapiSpider(scrapy.Spider):
name = "nairaland"
allowed_domains = ["nairaland.com"]
start_urls = ['http://nairaland.com']
def parse(self, response):
titles = response.xpath("//td[@class='featured w']/a[position() >= 1 and not(position() > 65)]")
for selector in titles:
news = ItemLoader(item=GidimagicItem(), selector=selector)
news.add_xpath("title", ".//text()", MapCompose(unicode.strip, lambda i: i.replace("\r\n", "")))
news.add_xpath("link", ".//@href/text()", MapCompose(unicode.strip))
news.add_value("timestamp", datetime.datetime.now())
yield news.load_item()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment