Skip to content

Instantly share code, notes, and snippets.

@marcelcaraciolo
Created June 5, 2012 03:55
Show Gist options
  • Save marcelcaraciolo/2872533 to your computer and use it in GitHub Desktop.
Save marcelcaraciolo/2872533 to your computer and use it in GitHub Desktop.
crawler_p5.py
import urllib
import sys
import re
import json
URL = 'http://websro.correios.com.br/sro_bin/txect01$.QueryList?P_ITEMCODE=&P_LINGUA=001&P_TESTE=&P_TIPO=001&P_COD_UNI=%s'
def captura_html(url):
html_data = urllib.urlopen(url).read()
return html_data
def extrai_status(html):
table = re.search(r'<table.*</TABLE>', html, re.S)
if table:
trs = re.findall(r'<tr>(.*?)</tr>', table.group(), re.S)
#'<td rowspan=1>25/04/2012 17:36</td><td>CDD CACOAL - CACOAL/RO</td><td><FONT COLOR="5F9F9F">Entrega Efetuada</font></td>',
status = {}
if len(trs) > 1:
st = trs[1]
local = re.search(r'<td>(.*?)</td>', st, re.S).group(1) #extrai o local
data = re.search(r'<td rowspan=\d>(.*?)</td>', st, re.S).group(1) #extrai a data
situacao = re.search(r'<FONT COLOR="(.*?)">(.*?)</font>', st, re.S).group(2) #extrai a situacao
status = {'local': local, 'data': data , 'situacao': situacao}
else:
status = {'situacao': 'Objeto nao encontrado ou Objeto ainda recem postado'}
else:
status = {'situacao': 'Objeto nao encontrado ou Objeto ainda recem postado'}
return status
def obter_rastreamento(codigo):
html_data = captura_html(URL % codigo)
status = extrai_status(html_data)
return json.dumps(status)
if __name__ == '__main__':
codigo = sys.argv[1]
status = obter_rastreamento(codigo)
print 'status', status
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment