Skip to content

Instantly share code, notes, and snippets.

@marcelcaraciolo
Created June 5, 2012 03:06
Show Gist options
  • Save marcelcaraciolo/2872334 to your computer and use it in GitHub Desktop.
Save marcelcaraciolo/2872334 to your computer and use it in GitHub Desktop.
crawler_p3.py
import urllib
import sys
import re
URL = 'http://websro.correios.com.br/sro_bin/txect01$.QueryList?P_ITEMCODE=&P_LINGUA=001&P_TESTE=&P_TIPO=001&P_COD_UNI=%s'
def captura_html(url):
html_data = urllib.urlopen(url).read()
return html_data
def extrai_status(html):
table = re.search(r'<table.*</TABLE>', html, re.S).group()
print table
def obter_rastreamento(codigo):
html_data = captura_html(URL % codigo)
status = extrai_status(html_data)
if __name__ == '__main__':
codigo = sys.argv[1]
status = obter_rastreamento(codigo)
print 'status', status
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment