Skip to content

Instantly share code, notes, and snippets.

@dgtal
Created January 27, 2017 00:42
Show Gist options
  • Select an option

  • Save dgtal/5a061cb374dd886e095b268bae6641a1 to your computer and use it in GitHub Desktop.

Select an option

Save dgtal/5a061cb374dd886e095b268bae6641a1 to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
# scrapy crawl spider-infracciones -a url=http://www.impo.com.uy/bases/notificaciones-cgm/1-2017/ -o infracciones-1-2017.json
import scrapy
class SpiderInfraccionesSpider(scrapy.Spider):
name = "spider-infracciones"
allowed_domains = ["impo.com.uy"]
start_urls = ['http://www.impo.com.uy/bases/notificaciones-cgm/15-2017/']
def __init__(self, url, *args, **kwargs):
super(SpiderInfraccionesSpider, self).__init__(*args, **kwargs)
self.start_urls = [url]
def parse(self, response):
records = response.css('table.tabla_en_texto > tr')
records.pop(0)
for record in records:
tds = record.css('td > pre')
yield {
'matricula': tds[0].css('::text').extract_first() or '',
'fecha_hora': tds[1].css('::text').extract_first() or '',
'interseccion': tds[2].css('::text').extract_first() or '',
'intervenido': tds[3].css('::text').extract_first() or '',
'articulo': tds[4].css('::text').extract_first() or '',
'valor_ur': tds[5].css('::text').extract_first() or ''
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment