Skip to content

Instantly share code, notes, and snippets.

@danilovazb
Created January 23, 2015 17:49
Show Gist options
  • Save danilovazb/f319f4f634e6d1551e48 to your computer and use it in GitHub Desktop.
Save danilovazb/f319f4f634e6d1551e48 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import threading
import time
import urllib2,sys,json,requests,re
import pycurl
from bs4 import BeautifulSoup
from itertools import islice
# Máximo de conexões/threads simultâneas
MAX_CONEXOES = 50
# Função para imprimir uma linha por vez via lock
print_lock = threading.Lock()
arquivoArgv = sys.argv[1]
def mostrar_msg(msg):
print_lock.acquire()
print msg
print_lock.release()
# Função para cada thread
def consultar_cpf(cpf):
cpfConsulta = cpf.strip()
url = "http://websro.correios.com.br/sro_bin/txect01$.QueryList"
refer = "http://websro.correios.com.br/sro_bin/txect01$.startup?P_LINGUA=001&P_TIPO=001"
cookie = {'ASPSESSIONIDSCTADABA': 'asdasdasdasdasd'}
payload = {'P_ITEMCODE':'','P_LINGUA':'001','P_TESTE':'','P_TIPO':'001','P_COD_UNI':cpfConsulta,'Z_ACTION':'Search'}
headers = {'Referer': refer}
response = requests.post(url, cookies=cookie, data=payload, headers=headers)
data = response.text
print data
soup = BeautifulSoup(data)
contribuinte = {'dados': ''}
for txt in soup.select("tr td.rowspan"):
contribuinte['dados'] += txt.get_text().encode('utf8').strip() + '|'
print contribuinte
# Thread principal
lista_threads = []
with open(arquivoArgv, 'rb') as arquivo:
for linha in arquivo:
cpf = linha.strip()
while threading.active_count() > MAX_CONEXOES:
#mostrar_msg("Esperando 1s...")
time.sleep(1)
thread = threading.Thread(target=consultar_cpf, args=(cpf,))
lista_threads.append(thread)
thread.start()
# Esperando pelas threads abertas terminarem
mostrar_msg("Esperando threads abertas terminarem...")
for thread in lista_threads:
thread.join()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment