Skip to content

Instantly share code, notes, and snippets.

@macndesign
Last active July 11, 2016 14:32
Show Gist options
  • Save macndesign/6f254ef3b24acb0c13fb5b0e303c2d3a to your computer and use it in GitHub Desktop.
Save macndesign/6f254ef3b24acb0c13fb5b0e303c2d3a to your computer and use it in GitHub Desktop.
montenegro scraping
import shutil
from lxml import html
import requests
BASE_URL = 'http://www.montenegroleiloes.com.br/'
page = requests.get(BASE_URL)
tree = html.fromstring(page.content)
links_home = tree.xpath('//a[contains(@class, "link_status")]/@href')
# TODO: Fazer for no resultado de cada tipo de link
url_ativo_1 = BASE_URL + links_home[0]
ativo_1 = requests.get(url_ativo_1)
tree_ativo_1 = html.fromstring(ativo_1.content)
links_lotes_ativo_1 = tree_ativo_1.xpath('//select[@id="navlotes"]/option/@value')
# TODO: fazer for no resultado de cada item do tipo
url_lote_detail = BASE_URL + links_lotes_ativo_1[0]
lote_detail_ativo_1 = requests.get(url_lote_detail)
tree_lote_detail_ativo_1 = html.fromstring(lote_detail_ativo_1.content)
# Pegar infos
data = tree_lote_detail_ativo_1.xpath('//h2[text()="Data:"]/following::p[1]/text()')
local = tree_lote_detail_ativo_1.xpath('//h2[text()="Local:"]/following::p[1]/text()')
modo = tree_lote_detail_ativo_1.xpath('//h2[text()="Local:"]/following::p[2]/strong/text()')
situacao = tree_lote_detail_ativo_1.xpath('//h2[text()="Situação:"]/following::a[1]/text()')
lote = tree_lote_detail_ativo_1.xpath('//div[@class="coluna_detalhes"]/div/h2/span/text()')
comitente, lance_inicial, incremento_minimo, visitas = tree_lote_detail_ativo_1.xpath(
'//div[@class="coluna_detalhes"]/div/p/text()')
situacao_lote = tree_lote_detail_ativo_1.xpath('//div[contains(@class, "status-lote")]/text()')
lance_atual = tree_lote_detail_ativo_1.xpath('//h4[text()="LANCE ATUAL"]/following::span/strong/text()')
detalhes = tree_lote_detail_ativo_1.xpath('//div[@class="linha_maisdetalhes"]/p/text()')
descricao_lote = tree_lote_detail_ativo_1.xpath('//h2[text()="DESCRIÇÃO DO LOTE"]/following::p[1]/text()')
valor_avaliacao = tree_lote_detail_ativo_1.xpath('//h2[text()="DESCRIÇÃO DO LOTE"]/following::p[1]/strong/text()')
print(url_lote_detail)
print(data[0])
print(local[0])
print(modo[0])
print(situacao[0])
print(lote[0])
print(comitente.strip())
print(lance_inicial.strip())
print(incremento_minimo.strip())
print(visitas.strip())
print(situacao_lote[0])
print('R$ {}'.format(lance_atual[0].strip()))
print(descricao_lote[0])
print(valor_avaliacao[0].replace('Valor de Avaliação: ', ''))
url_imagems = tree_lote_detail_ativo_1.xpath('//div[@class="coluna_esquerda"]/a/@href')
lote_imagens = requests.get(BASE_URL + url_imagems[0])
tree_imagens = html.fromstring(lote_imagens.content)
link_imagens = tree_imagens.xpath('//a/@href')
list_link_imagens = [BASE_URL + link for link in link_imagens]
for url_imagem in list_link_imagens:
r = requests.get(url_imagem, stream=True)
if r.status_code == 200:
# TODO: montar path com (comitente + data + imagem)
path = url_imagem.split('/')[-1]
with open(path, 'wb') as f:
r.raw.decode_content = True
shutil.copyfileobj(r.raw, f)
print([BASE_URL + link for link in link_imagens])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment