Last active
July 11, 2016 14:32
-
-
Save macndesign/6f254ef3b24acb0c13fb5b0e303c2d3a to your computer and use it in GitHub Desktop.
montenegro scraping
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import shutil | |
from lxml import html | |
import requests | |
BASE_URL = 'http://www.montenegroleiloes.com.br/' | |
page = requests.get(BASE_URL) | |
tree = html.fromstring(page.content) | |
links_home = tree.xpath('//a[contains(@class, "link_status")]/@href') | |
# TODO: Fazer for no resultado de cada tipo de link | |
url_ativo_1 = BASE_URL + links_home[0] | |
ativo_1 = requests.get(url_ativo_1) | |
tree_ativo_1 = html.fromstring(ativo_1.content) | |
links_lotes_ativo_1 = tree_ativo_1.xpath('//select[@id="navlotes"]/option/@value') | |
# TODO: fazer for no resultado de cada item do tipo | |
url_lote_detail = BASE_URL + links_lotes_ativo_1[0] | |
lote_detail_ativo_1 = requests.get(url_lote_detail) | |
tree_lote_detail_ativo_1 = html.fromstring(lote_detail_ativo_1.content) | |
# Pegar infos | |
data = tree_lote_detail_ativo_1.xpath('//h2[text()="Data:"]/following::p[1]/text()') | |
local = tree_lote_detail_ativo_1.xpath('//h2[text()="Local:"]/following::p[1]/text()') | |
modo = tree_lote_detail_ativo_1.xpath('//h2[text()="Local:"]/following::p[2]/strong/text()') | |
situacao = tree_lote_detail_ativo_1.xpath('//h2[text()="Situação:"]/following::a[1]/text()') | |
lote = tree_lote_detail_ativo_1.xpath('//div[@class="coluna_detalhes"]/div/h2/span/text()') | |
comitente, lance_inicial, incremento_minimo, visitas = tree_lote_detail_ativo_1.xpath( | |
'//div[@class="coluna_detalhes"]/div/p/text()') | |
situacao_lote = tree_lote_detail_ativo_1.xpath('//div[contains(@class, "status-lote")]/text()') | |
lance_atual = tree_lote_detail_ativo_1.xpath('//h4[text()="LANCE ATUAL"]/following::span/strong/text()') | |
detalhes = tree_lote_detail_ativo_1.xpath('//div[@class="linha_maisdetalhes"]/p/text()') | |
descricao_lote = tree_lote_detail_ativo_1.xpath('//h2[text()="DESCRIÇÃO DO LOTE"]/following::p[1]/text()') | |
valor_avaliacao = tree_lote_detail_ativo_1.xpath('//h2[text()="DESCRIÇÃO DO LOTE"]/following::p[1]/strong/text()') | |
print(url_lote_detail) | |
print(data[0]) | |
print(local[0]) | |
print(modo[0]) | |
print(situacao[0]) | |
print(lote[0]) | |
print(comitente.strip()) | |
print(lance_inicial.strip()) | |
print(incremento_minimo.strip()) | |
print(visitas.strip()) | |
print(situacao_lote[0]) | |
print('R$ {}'.format(lance_atual[0].strip())) | |
print(descricao_lote[0]) | |
print(valor_avaliacao[0].replace('Valor de Avaliação: ', '')) | |
url_imagems = tree_lote_detail_ativo_1.xpath('//div[@class="coluna_esquerda"]/a/@href') | |
lote_imagens = requests.get(BASE_URL + url_imagems[0]) | |
tree_imagens = html.fromstring(lote_imagens.content) | |
link_imagens = tree_imagens.xpath('//a/@href') | |
list_link_imagens = [BASE_URL + link for link in link_imagens] | |
for url_imagem in list_link_imagens: | |
r = requests.get(url_imagem, stream=True) | |
if r.status_code == 200: | |
# TODO: montar path com (comitente + data + imagem) | |
path = url_imagem.split('/')[-1] | |
with open(path, 'wb') as f: | |
r.raw.decode_content = True | |
shutil.copyfileobj(r.raw, f) | |
print([BASE_URL + link for link in link_imagens]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment